don't rename certain zip/exe subtypes. fixes #1

This commit is contained in:
Lynne Megido 2022-05-02 17:55:48 +10:00
parent 4b84b01adc
commit 1b0505aad1
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
3 changed files with 122 additions and 55 deletions

View file

@ -4,6 +4,14 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html). [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Changed
- Some extensions are considered to be always valid - these are:
- "filepart", "part", "crdownload": Partially downloaded files, renaming could break download
- "bak", "backup": Backup copies are a common idiom (e.g. "game.exe.bak") and should be respected
### Fixed
- Support for many file types that are subcategories of others (e.g., fif will no longer rename apk files to zip)
## v0.5.1 - 2022-04-12 ## v0.5.1 - 2022-04-12
### Added ### Added
- When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries - When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries

87
Cargo.lock generated
View file

@ -80,16 +80,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "clap" name = "clap"
version = "3.1.8" version = "3.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c" checksum = "535434c063ced786eb04aaf529308092c5ab60889e8fe24275d15de07b01fa97"
dependencies = [ dependencies = [
"atty", "atty",
"bitflags", "bitflags",
"clap_derive", "clap_derive",
"clap_lex",
"indexmap", "indexmap",
"lazy_static", "lazy_static",
"os_str_bytes",
"termcolor", "termcolor",
"terminal_size", "terminal_size",
"textwrap", "textwrap",
@ -109,6 +109,15 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "clap_lex"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213"
dependencies = [
"os_str_bytes",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.4" version = "0.5.4"
@ -351,9 +360,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.122" version = "0.2.125"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
@ -382,9 +391,9 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.4.1" version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]] [[package]]
name = "memoffset" name = "memoffset"
@ -445,9 +454,6 @@ name = "os_str_bytes"
version = "6.0.0" version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
@ -461,9 +467,9 @@ dependencies = [
[[package]] [[package]]
name = "parking_lot_core" name = "parking_lot_core"
version = "0.9.2" version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37" checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
@ -618,9 +624,9 @@ dependencies = [
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.5.1" version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" checksum = "fd249e82c21598a9a426a4e00dd7adc1d640b22445ec8545feef801d1a74c221"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"crossbeam-deque", "crossbeam-deque",
@ -630,14 +636,13 @@ dependencies = [
[[package]] [[package]]
name = "rayon-core" name = "rayon-core"
version = "1.9.1" version = "1.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" checksum = "9f51245e1e62e1f1629cbfec37b5793bbabcaeb90f30e94d2ba03564687353e4"
dependencies = [ dependencies = [
"crossbeam-channel", "crossbeam-channel",
"crossbeam-deque", "crossbeam-deque",
"crossbeam-utils", "crossbeam-utils",
"lazy_static",
"num_cpus", "num_cpus",
] ]
@ -714,18 +719,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.136" version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.136" version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -734,9 +739,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.79" version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" checksum = "f972498cf015f7c0746cac89ebe1d6ef10c293b94175a243a2d9442c163d9944"
dependencies = [ dependencies = [
"itoa", "itoa",
"ryu", "ryu",
@ -782,9 +787,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.91" version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" checksum = "7ff7c592601f11445996a06f8ad0c27f094a58857c2f89e97974ab9235b92c52"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -842,18 +847,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.30" version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.30" version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -958,9 +963,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [ dependencies = [
"windows_aarch64_msvc", "windows_aarch64_msvc",
"windows_i686_gnu", "windows_i686_gnu",
@ -971,33 +976,33 @@ dependencies = [
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]] [[package]]
name = "xdg-mime" name = "xdg-mime"

View file

@ -127,16 +127,39 @@ pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, Sc
Ok(Some(result)) => result, Ok(Some(result)) => result,
}; };
// set of known extensions for the given MIME type
let known_exts = mime_extension_lookup(result.essence_str().into());
// file extension for this particular file
let entry_ext = path.extension();
let valid = match known_exts { // determine whether or not the file's current extension is valid
// there is a known set of extensions for this MIME type, and the file has an extension let valid = if let Some(entry_ext) = path.extension() {
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()), // discard invalid UTF-8 and convert to lowercase. all extensions in both backend's databases are lowercase
// either this file has no extension, or there is no known set of extensions for this MIME type :( // ascii, so this assumption is fine.
Some(_) | None => false, let entry_ext = entry_ext.to_string_lossy().to_lowercase();
// if the file has any of these extensions, it is probably either:
// - a copy of another file renamed for backup purposes (e.g. a word processor might save by renaming "my.doc" to
// "my.doc.bak", then creating "my.doc", leaving the backup for safekeeping), which shouldn't be renamed so as
// not to break the backup program
// - a partially downloaded file, which shouldn't be renamed to avoid corrupting it and blocking the downloader
// from resuming
if ["bak", "backup", "filepart", "part", "crdownload"]
.iter()
.any(|ext| ext == &entry_ext)
{
true
} else {
// otherwise, check to see whether there's a known extension for this file type
// retrieve set of known extensions for the given MIME type
let known_exts = mime_extension_lookup(result.essence_str().into());
match known_exts {
// there is a known set of extensions for this MIME type - is entry_ext in the given set?
Some(e) => e.contains(&entry_ext.into()),
// there is no known set of extensions for this MIME type :(
None => false,
}
}
} else {
// this file has no extension
false
}; };
let path = if canonical_paths { let path = if canonical_paths {
@ -350,16 +373,47 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
// classic office files considered harmful // classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")] vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" { } else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly... // both backends seem to be unable to consistently detect OOXML files, so they should be considered valid
// extensions for zip files to prevent them being erroneously renamed.
// additionally, there are various popular formats that are just renamed zip files, such as android's apk
// format, that also shouldn't be renamed.
[ [
vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], vec![
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
String::from("apk"),
String::from("ipa"),
String::from("docbook"),
String::from("kdenlive"),
String::from("vcpkg"),
String::from("nupkg"),
String::from("whl"),
String::from("xpi"),
],
possible_exts, possible_exts,
] ]
.concat() .concat()
} else if essence == "application/x-ms-dos-executable" { } else if essence == "application/x-ms-dos-executable" {
// .dll, .exe, and .scr files are given the same MIME type... but you definitely don't want to rename one to the // .dll, .exe, .scr, etc. files are given the same MIME type, and aren't really distinguishable from each other
// other! // ... but you definitely don't want to rename one to the other!
[vec![String::from("dll"), String::from("exe"), String::from("scr")], possible_exts].concat() [
vec![
String::from("exe"),
String::from("dll"),
String::from("scr"),
String::from("com"),
String::from("dll16"),
String::from("drv"),
String::from("drv16"),
String::from("cpl"),
String::from("msstyles"),
String::from("sys"),
],
possible_exts,
]
.concat()
} else { } else {
possible_exts possible_exts
}) })