Compare commits

..

No commits in common. "cabed845ce693c37c6bcccc8cec8128ff11bf7c1" and "4b84b01adc36edfbded84182ad9cf442a492b8b9" have entirely different histories.

6 changed files with 60 additions and 137 deletions

View file

@ -4,16 +4,6 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html). [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Added
- Output now contains a reminder to use `fif --fix`
### Changed
- Some extensions are considered to be always valid - these are:
- "filepart", "part", "crdownload": Partially downloaded files, renaming could break download
- "bak", "backup": Backup copies are a common idiom (e.g. "game.exe.bak") and should be respected
### Fixed
- Support for many file types that are subcategories of others (e.g., fif will no longer rename apk files to zip) (#1)
## v0.5.1 - 2022-04-12 ## v0.5.1 - 2022-04-12
### Added ### Added
- When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries - When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries

89
Cargo.lock generated
View file

@ -80,16 +80,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "clap" name = "clap"
version = "3.1.14" version = "3.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "535434c063ced786eb04aaf529308092c5ab60889e8fe24275d15de07b01fa97" checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c"
dependencies = [ dependencies = [
"atty", "atty",
"bitflags", "bitflags",
"clap_derive", "clap_derive",
"clap_lex",
"indexmap", "indexmap",
"lazy_static", "lazy_static",
"os_str_bytes",
"termcolor", "termcolor",
"terminal_size", "terminal_size",
"textwrap", "textwrap",
@ -109,15 +109,6 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "clap_lex"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213"
dependencies = [
"os_str_bytes",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.4" version = "0.5.4"
@ -230,7 +221,7 @@ dependencies = [
[[package]] [[package]]
name = "fif" name = "fif"
version = "0.5.2" version = "0.5.1"
dependencies = [ dependencies = [
"assert_cmd", "assert_cmd",
"cfg-if", "cfg-if",
@ -360,9 +351,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.125" version = "0.2.122"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b" checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
@ -391,9 +382,9 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.5.0" version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]] [[package]]
name = "memoffset" name = "memoffset"
@ -454,6 +445,9 @@ name = "os_str_bytes"
version = "6.0.0" version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
@ -467,9 +461,9 @@ dependencies = [
[[package]] [[package]]
name = "parking_lot_core" name = "parking_lot_core"
version = "0.9.3" version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
@ -624,9 +618,9 @@ dependencies = [
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.5.2" version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd249e82c21598a9a426a4e00dd7adc1d640b22445ec8545feef801d1a74c221" checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"crossbeam-deque", "crossbeam-deque",
@ -636,13 +630,14 @@ dependencies = [
[[package]] [[package]]
name = "rayon-core" name = "rayon-core"
version = "1.9.2" version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f51245e1e62e1f1629cbfec37b5793bbabcaeb90f30e94d2ba03564687353e4" checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
dependencies = [ dependencies = [
"crossbeam-channel", "crossbeam-channel",
"crossbeam-deque", "crossbeam-deque",
"crossbeam-utils", "crossbeam-utils",
"lazy_static",
"num_cpus", "num_cpus",
] ]
@ -719,18 +714,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.137" version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.137" version = "1.0.136"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -739,9 +734,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.80" version = "1.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f972498cf015f7c0746cac89ebe1d6ef10c293b94175a243a2d9442c163d9944" checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
dependencies = [ dependencies = [
"itoa", "itoa",
"ryu", "ryu",
@ -787,9 +782,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.92" version = "1.0.91"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ff7c592601f11445996a06f8ad0c27f094a58857c2f89e97974ab9235b92c52" checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -847,18 +842,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.31" version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.31" version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -963,9 +958,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.36.1" version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825"
dependencies = [ dependencies = [
"windows_aarch64_msvc", "windows_aarch64_msvc",
"windows_i686_gnu", "windows_i686_gnu",
@ -976,33 +971,33 @@ dependencies = [
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.36.1" version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.36.1" version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.36.1" version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.36.1" version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.36.1" version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9"
[[package]] [[package]]
name = "xdg-mime" name = "xdg-mime"

View file

@ -1,7 +1,7 @@
[package] [package]
name = "fif" name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.5.2" version = "0.5.1"
authors = ["Lynnesbian <lynne@bune.city>"] authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018" edition = "2018"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"

View file

@ -232,10 +232,6 @@ same line (["OTBS"](https://en.wikipedia.org/wiki/Indentation_style#Variant:_1TB
For more detailed information on the formatting rules used by this project, see the configured options in For more detailed information on the formatting rules used by this project, see the configured options in
[`rustfmt.toml`](https://gitlab.com/Lynnesbian/fif/-/blob/master/rustfmt.toml). [`rustfmt.toml`](https://gitlab.com/Lynnesbian/fif/-/blob/master/rustfmt.toml).
## Additional credits
Some of the code for correctly handling files with multiple valid extensions (particularly in the case of the
Portable Executable format) comes from [Czkawka](https://github.com/qarmin/czkawka)
## License ## License
Copyright (C) 2021 Lynnesbian Copyright (C) 2021 Lynnesbian

View file

@ -127,38 +127,16 @@ pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, Sc
Ok(Some(result)) => result, Ok(Some(result)) => result,
}; };
// determine whether or not the file's current extension is valid // set of known extensions for the given MIME type
let valid = if let Some(entry_ext) = path.extension() {
// discard invalid UTF-8 and convert to lowercase. all extensions in both backend's databases are lowercase
// ascii, so this assumption is fine.
let entry_ext = entry_ext.to_string_lossy().to_lowercase();
// if the file has any of these extensions, it is probably either:
// - a copy of another file renamed for backup purposes (e.g. a word processor might save by renaming "my.doc" to
// "my.doc.bak", then creating "my.doc", leaving the backup for safekeeping), which shouldn't be renamed so as
// not to break the backup program
// - a partially downloaded file, which shouldn't be renamed to avoid corrupting it and blocking the downloader
// from resuming
if ["bak", "backup", "filepart", "part", "crdownload"]
.iter()
.any(|ext| ext == &entry_ext)
{
true
} else {
// otherwise, check to see whether there's a known extension for this file type
// retrieve set of known extensions for the given MIME type
let known_exts = mime_extension_lookup(result.essence_str().into()); let known_exts = mime_extension_lookup(result.essence_str().into());
match known_exts { // file extension for this particular file
// there is a known set of extensions for this MIME type - is entry_ext in the given set? let entry_ext = path.extension();
Some(e) => e.contains(&entry_ext.into()),
// there is no known set of extensions for this MIME type :( let valid = match known_exts {
None => false, // there is a known set of extensions for this MIME type, and the file has an extension
} Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()),
} // either this file has no extension, or there is no known set of extensions for this MIME type :(
} else { Some(_) | None => false,
// this file has no extension
false
}; };
let path = if canonical_paths { let path = if canonical_paths {
@ -284,8 +262,9 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
file.seek(SeekFrom::Start(0))?; file.seek(SeekFrom::Start(0))?;
read = file.read(&mut buffer); read = file.read(&mut buffer);
match read { match read {
Ok(_) => break,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(_) | Ok(_) => break, Err(_) => break,
} }
} }
@ -371,47 +350,16 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
// classic office files considered harmful // classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")] vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" { } else if essence == "application/zip" {
// both backends seem to be unable to consistently detect OOXML files, so they should be considered valid // neither xdg-mime nor infer seem to be able to detect office XML files properly...
// extensions for zip files to prevent them being erroneously renamed.
// additionally, there are various popular formats that are just renamed zip files, such as android's apk
// format, that also shouldn't be renamed.
[ [
vec![ vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")],
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
String::from("apk"),
String::from("ipa"),
String::from("docbook"),
String::from("kdenlive"),
String::from("vcpkg"),
String::from("nupkg"),
String::from("whl"),
String::from("xpi"),
],
possible_exts, possible_exts,
] ]
.concat() .concat()
} else if essence == "application/x-ms-dos-executable" { } else if essence == "application/x-ms-dos-executable" {
// .dll, .exe, .scr, etc. files are given the same MIME type, and aren't really distinguishable from each other // .dll, .exe, and .scr files are given the same MIME type... but you definitely don't want to rename one to the
// ... but you definitely don't want to rename one to the other! // other!
[ [vec![String::from("dll"), String::from("exe"), String::from("scr")], possible_exts].concat()
vec![
String::from("exe"),
String::from("dll"),
String::from("scr"),
String::from("com"),
String::from("dll16"),
String::from("drv"),
String::from("drv16"),
String::from("cpl"),
String::from("msstyles"),
String::from("sys"),
],
possible_exts,
]
.concat()
} else { } else {
possible_exts possible_exts
}) })

View file

@ -197,7 +197,6 @@ impl FormatSteps for Shell {
if let Ok(working_directory) = std::env::current_dir() { if let Ok(working_directory) = std::env::current_dir() {
smart_write(f, writablesln!["# Run from ", (working_directory.as_path())])?; smart_write(f, writablesln!["# Run from ", (working_directory.as_path())])?;
} }
write!(f, "# Happy with these changes? Run `fif --fix` from the same directory!")?;
smart_write(f, writablesln![Newline, "set -e", Newline]) smart_write(f, writablesln![Newline, "set -e", Newline])
} }
@ -254,7 +253,6 @@ impl FormatSteps for PowerShell {
if let Ok(working_directory) = std::env::current_dir() { if let Ok(working_directory) = std::env::current_dir() {
smart_write(f, writablesln!["<# Run from ", (working_directory.as_path()), " #>"])?; smart_write(f, writablesln!["<# Run from ", (working_directory.as_path()), " #>"])?;
} }
write!(f, "<# Happy with these changes? Run `fif --fix` from the same directory! #>")?;
smart_write(f, writables![Newline]) smart_write(f, writables![Newline])
} }
@ -289,11 +287,7 @@ impl FormatSteps for Text {
} }
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> { fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![(generated_by().as_str()), Newline])?; smart_write(f, writablesln![(generated_by().as_str()), Newline])
if let Ok(working_directory) = std::env::current_dir() {
smart_write(f, writablesln!["Run from ", (working_directory.as_path())])?;
}
write!(f, "Happy with these changes? Run `fif --fix` from the same directory!")
} }
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> { fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {