diff --git a/CHANGELOG.md b/CHANGELOG.md index 31d5afb..3291d0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased +### Changed +- Some extensions are considered to be always valid - these are: + - "filepart", "part", "crdownload": Partially downloaded files, renaming could break download + - "bak", "backup": Backup copies are a common idiom (e.g. "game.exe.bak") and should be respected +### Fixed +- Support for many file types that are subcategories of others (e.g., fif will no longer rename apk files to zip) + ## v0.5.1 - 2022-04-12 ### Added - When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries diff --git a/Cargo.lock b/Cargo.lock index ca30bd5..463e005 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,16 +80,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.1.8" +version = "3.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c" +checksum = "535434c063ced786eb04aaf529308092c5ab60889e8fe24275d15de07b01fa97" dependencies = [ "atty", "bitflags", "clap_derive", + "clap_lex", "indexmap", "lazy_static", - "os_str_bytes", "termcolor", "terminal_size", "textwrap", @@ -109,6 +109,15 @@ dependencies = [ "syn", ] +[[package]] +name = "clap_lex" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213" +dependencies = [ + "os_str_bytes", +] + [[package]] name = "crossbeam-channel" version = "0.5.4" @@ -351,9 +360,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.122" +version = "0.2.125" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" +checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b" [[package]] name = "lock_api" @@ -382,9 +391,9 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] name = "memchr" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" @@ -445,9 +454,6 @@ name = "os_str_bytes" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" -dependencies = [ - "memchr", -] [[package]] name = "parking_lot" @@ -461,9 +467,9 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37" +checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" dependencies = [ "cfg-if", "libc", @@ -618,9 +624,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +checksum = "fd249e82c21598a9a426a4e00dd7adc1d640b22445ec8545feef801d1a74c221" dependencies = [ "autocfg", "crossbeam-deque", @@ -630,14 +636,13 @@ dependencies = [ [[package]] name = "rayon-core" -version = "1.9.1" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +checksum = "9f51245e1e62e1f1629cbfec37b5793bbabcaeb90f30e94d2ba03564687353e4" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", - "lazy_static", "num_cpus", ] @@ -714,18 +719,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" -version = "1.0.136" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.136" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", @@ -734,9 +739,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" +checksum = "f972498cf015f7c0746cac89ebe1d6ef10c293b94175a243a2d9442c163d9944" dependencies = [ "itoa", "ryu", @@ -782,9 +787,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "1.0.91" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" +checksum = "7ff7c592601f11445996a06f8ad0c27f094a58857c2f89e97974ab9235b92c52" dependencies = [ "proc-macro2", "quote", @@ -842,18 +847,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.30" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.30" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" dependencies = [ "proc-macro2", "quote", @@ -958,9 +963,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" -version = "0.34.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ "windows_aarch64_msvc", "windows_i686_gnu", @@ -971,33 +976,33 @@ dependencies = [ [[package]] name = "windows_aarch64_msvc" -version = "0.34.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" [[package]] name = "windows_i686_gnu" -version = "0.34.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" [[package]] name = "windows_i686_msvc" -version = "0.34.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" [[package]] name = "windows_x86_64_gnu" -version = "0.34.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" [[package]] name = "windows_x86_64_msvc" -version = "0.34.0" +version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" [[package]] name = "xdg-mime" diff --git a/src/files.rs b/src/files.rs index 2be3659..fc7218f 100644 --- a/src/files.rs +++ b/src/files.rs @@ -127,16 +127,39 @@ pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result result, }; - // set of known extensions for the given MIME type - let known_exts = mime_extension_lookup(result.essence_str().into()); - // file extension for this particular file - let entry_ext = path.extension(); - let valid = match known_exts { - // there is a known set of extensions for this MIME type, and the file has an extension - Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()), - // either this file has no extension, or there is no known set of extensions for this MIME type :( - Some(_) | None => false, + // determine whether or not the file's current extension is valid + let valid = if let Some(entry_ext) = path.extension() { + // discard invalid UTF-8 and convert to lowercase. all extensions in both backend's databases are lowercase + // ascii, so this assumption is fine. + let entry_ext = entry_ext.to_string_lossy().to_lowercase(); + + // if the file has any of these extensions, it is probably either: + // - a copy of another file renamed for backup purposes (e.g. a word processor might save by renaming "my.doc" to + // "my.doc.bak", then creating "my.doc", leaving the backup for safekeeping), which shouldn't be renamed so as + // not to break the backup program + // - a partially downloaded file, which shouldn't be renamed to avoid corrupting it and blocking the downloader + // from resuming + if ["bak", "backup", "filepart", "part", "crdownload"] + .iter() + .any(|ext| ext == &entry_ext) + { + true + } else { + // otherwise, check to see whether there's a known extension for this file type + + // retrieve set of known extensions for the given MIME type + let known_exts = mime_extension_lookup(result.essence_str().into()); + match known_exts { + // there is a known set of extensions for this MIME type - is entry_ext in the given set? + Some(e) => e.contains(&entry_ext.into()), + // there is no known set of extensions for this MIME type :( + None => false, + } + } + } else { + // this file has no extension + false }; let path = if canonical_paths { @@ -350,16 +373,47 @@ pub fn mime_extension_lookup(essence: String) -> Option> { // classic office files considered harmful vec![String::from("doc"), String::from("xls"), String::from("ppt")] } else if essence == "application/zip" { - // neither xdg-mime nor infer seem to be able to detect office XML files properly... + // both backends seem to be unable to consistently detect OOXML files, so they should be considered valid + // extensions for zip files to prevent them being erroneously renamed. + // additionally, there are various popular formats that are just renamed zip files, such as android's apk + // format, that also shouldn't be renamed. [ - vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], + vec![ + String::from("zip"), + String::from("docx"), + String::from("xlsx"), + String::from("pptx"), + String::from("apk"), + String::from("ipa"), + String::from("docbook"), + String::from("kdenlive"), + String::from("vcpkg"), + String::from("nupkg"), + String::from("whl"), + String::from("xpi"), + ], possible_exts, ] .concat() } else if essence == "application/x-ms-dos-executable" { - // .dll, .exe, and .scr files are given the same MIME type... but you definitely don't want to rename one to the - // other! - [vec![String::from("dll"), String::from("exe"), String::from("scr")], possible_exts].concat() + // .dll, .exe, .scr, etc. files are given the same MIME type, and aren't really distinguishable from each other + // ... but you definitely don't want to rename one to the other! + [ + vec![ + String::from("exe"), + String::from("dll"), + String::from("scr"), + String::from("com"), + String::from("dll16"), + String::from("drv"), + String::from("drv16"), + String::from("cpl"), + String::from("msstyles"), + String::from("sys"), + ], + possible_exts, + ] + .concat() } else { possible_exts })