Compare commits

...

5 Commits

Author SHA1 Message Date
Lynne Megido cabed845ce
release v0.5.2 🥳 2022-05-02 18:22:09 +10:00
Lynne Megido 20587fd663
clippy! 2022-05-02 18:13:06 +10:00
Lynne Megido 944360f9ab
documentation 2022-05-02 18:11:28 +10:00
Lynne Megido c9f68d8373
add reminder to use `fif --fix` 2022-05-02 18:07:14 +10:00
Lynne Megido 1b0505aad1
don't rename certain zip/exe subtypes. fixes #1 2022-05-02 18:06:51 +10:00
6 changed files with 137 additions and 60 deletions

View File

@ -4,6 +4,16 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html). [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Added
- Output now contains a reminder to use `fif --fix`
### Changed
- Some extensions are considered to be always valid - these are:
- "filepart", "part", "crdownload": Partially downloaded files, renaming could break download
- "bak", "backup": Backup copies are a common idiom (e.g. "game.exe.bak") and should be respected
### Fixed
- Support for many file types that are subcategories of others (e.g., fif will no longer rename apk files to zip) (#1)
## v0.5.1 - 2022-04-12 ## v0.5.1 - 2022-04-12
### Added ### Added
- When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries - When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries

89
Cargo.lock generated
View File

@ -80,16 +80,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]] [[package]]
name = "clap" name = "clap"
version = "3.1.8" version = "3.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "71c47df61d9e16dc010b55dba1952a57d8c215dbb533fd13cdd13369aac73b1c" checksum = "535434c063ced786eb04aaf529308092c5ab60889e8fe24275d15de07b01fa97"
dependencies = [ dependencies = [
"atty", "atty",
"bitflags", "bitflags",
"clap_derive", "clap_derive",
"clap_lex",
"indexmap", "indexmap",
"lazy_static", "lazy_static",
"os_str_bytes",
"termcolor", "termcolor",
"terminal_size", "terminal_size",
"textwrap", "textwrap",
@ -109,6 +109,15 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "clap_lex"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213"
dependencies = [
"os_str_bytes",
]
[[package]] [[package]]
name = "crossbeam-channel" name = "crossbeam-channel"
version = "0.5.4" version = "0.5.4"
@ -221,7 +230,7 @@ dependencies = [
[[package]] [[package]]
name = "fif" name = "fif"
version = "0.5.1" version = "0.5.2"
dependencies = [ dependencies = [
"assert_cmd", "assert_cmd",
"cfg-if", "cfg-if",
@ -351,9 +360,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.122" version = "0.2.125"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec647867e2bf0772e28c8bcde4f0d19a9216916e890543b5a03ed8ef27b8f259" checksum = "5916d2ae698f6de9bfb891ad7a8d65c09d232dc58cc4ac433c7da3b2fd84bc2b"
[[package]] [[package]]
name = "lock_api" name = "lock_api"
@ -382,9 +391,9 @@ checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.4.1" version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
[[package]] [[package]]
name = "memoffset" name = "memoffset"
@ -445,9 +454,6 @@ name = "os_str_bytes"
version = "6.0.0" version = "6.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64" checksum = "8e22443d1643a904602595ba1cd8f7d896afe56d26712531c5ff73a15b2fbf64"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "parking_lot" name = "parking_lot"
@ -461,9 +467,9 @@ dependencies = [
[[package]] [[package]]
name = "parking_lot_core" name = "parking_lot_core"
version = "0.9.2" version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37" checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"libc", "libc",
@ -618,9 +624,9 @@ dependencies = [
[[package]] [[package]]
name = "rayon" name = "rayon"
version = "1.5.1" version = "1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" checksum = "fd249e82c21598a9a426a4e00dd7adc1d640b22445ec8545feef801d1a74c221"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"crossbeam-deque", "crossbeam-deque",
@ -630,14 +636,13 @@ dependencies = [
[[package]] [[package]]
name = "rayon-core" name = "rayon-core"
version = "1.9.1" version = "1.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" checksum = "9f51245e1e62e1f1629cbfec37b5793bbabcaeb90f30e94d2ba03564687353e4"
dependencies = [ dependencies = [
"crossbeam-channel", "crossbeam-channel",
"crossbeam-deque", "crossbeam-deque",
"crossbeam-utils", "crossbeam-utils",
"lazy_static",
"num_cpus", "num_cpus",
] ]
@ -714,18 +719,18 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.136" version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.136" version = "1.0.137"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -734,9 +739,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.79" version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" checksum = "f972498cf015f7c0746cac89ebe1d6ef10c293b94175a243a2d9442c163d9944"
dependencies = [ dependencies = [
"itoa", "itoa",
"ryu", "ryu",
@ -782,9 +787,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.91" version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" checksum = "7ff7c592601f11445996a06f8ad0c27f094a58857c2f89e97974ab9235b92c52"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -842,18 +847,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.30" version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.30" version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -958,9 +963,9 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [ dependencies = [
"windows_aarch64_msvc", "windows_aarch64_msvc",
"windows_i686_gnu", "windows_i686_gnu",
@ -971,33 +976,33 @@ dependencies = [
[[package]] [[package]]
name = "windows_aarch64_msvc" name = "windows_aarch64_msvc"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]] [[package]]
name = "windows_i686_gnu" name = "windows_i686_gnu"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]] [[package]]
name = "windows_i686_msvc" name = "windows_i686_msvc"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]] [[package]]
name = "windows_x86_64_gnu" name = "windows_x86_64_gnu"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]] [[package]]
name = "windows_x86_64_msvc" name = "windows_x86_64_msvc"
version = "0.34.0" version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"
[[package]] [[package]]
name = "xdg-mime" name = "xdg-mime"

View File

@ -1,7 +1,7 @@
[package] [package]
name = "fif" name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.5.1" version = "0.5.2"
authors = ["Lynnesbian <lynne@bune.city>"] authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018" edition = "2018"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"

View File

@ -232,6 +232,10 @@ same line (["OTBS"](https://en.wikipedia.org/wiki/Indentation_style#Variant:_1TB
For more detailed information on the formatting rules used by this project, see the configured options in For more detailed information on the formatting rules used by this project, see the configured options in
[`rustfmt.toml`](https://gitlab.com/Lynnesbian/fif/-/blob/master/rustfmt.toml). [`rustfmt.toml`](https://gitlab.com/Lynnesbian/fif/-/blob/master/rustfmt.toml).
## Additional credits
Some of the code for correctly handling files with multiple valid extensions (particularly in the case of the
Portable Executable format) comes from [Czkawka](https://github.com/qarmin/czkawka)
## License ## License
Copyright (C) 2021 Lynnesbian Copyright (C) 2021 Lynnesbian

View File

@ -127,16 +127,38 @@ pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, Sc
Ok(Some(result)) => result, Ok(Some(result)) => result,
}; };
// set of known extensions for the given MIME type // determine whether or not the file's current extension is valid
let known_exts = mime_extension_lookup(result.essence_str().into()); let valid = if let Some(entry_ext) = path.extension() {
// file extension for this particular file // discard invalid UTF-8 and convert to lowercase. all extensions in both backend's databases are lowercase
let entry_ext = path.extension(); // ascii, so this assumption is fine.
let entry_ext = entry_ext.to_string_lossy().to_lowercase();
let valid = match known_exts { // if the file has any of these extensions, it is probably either:
// there is a known set of extensions for this MIME type, and the file has an extension // - a copy of another file renamed for backup purposes (e.g. a word processor might save by renaming "my.doc" to
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()), // "my.doc.bak", then creating "my.doc", leaving the backup for safekeeping), which shouldn't be renamed so as
// either this file has no extension, or there is no known set of extensions for this MIME type :( // not to break the backup program
Some(_) | None => false, // - a partially downloaded file, which shouldn't be renamed to avoid corrupting it and blocking the downloader
// from resuming
if ["bak", "backup", "filepart", "part", "crdownload"]
.iter()
.any(|ext| ext == &entry_ext)
{
true
} else {
// otherwise, check to see whether there's a known extension for this file type
// retrieve set of known extensions for the given MIME type
let known_exts = mime_extension_lookup(result.essence_str().into());
match known_exts {
// there is a known set of extensions for this MIME type - is entry_ext in the given set?
Some(e) => e.contains(&entry_ext.into()),
// there is no known set of extensions for this MIME type :(
None => false,
}
}
} else {
// this file has no extension
false
}; };
let path = if canonical_paths { let path = if canonical_paths {
@ -262,9 +284,8 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
file.seek(SeekFrom::Start(0))?; file.seek(SeekFrom::Start(0))?;
read = file.read(&mut buffer); read = file.read(&mut buffer);
match read { match read {
Ok(_) => break,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(_) => break, Err(_) | Ok(_) => break,
} }
} }
@ -350,16 +371,47 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
// classic office files considered harmful // classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")] vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" { } else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly... // both backends seem to be unable to consistently detect OOXML files, so they should be considered valid
// extensions for zip files to prevent them being erroneously renamed.
// additionally, there are various popular formats that are just renamed zip files, such as android's apk
// format, that also shouldn't be renamed.
[ [
vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], vec![
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
String::from("apk"),
String::from("ipa"),
String::from("docbook"),
String::from("kdenlive"),
String::from("vcpkg"),
String::from("nupkg"),
String::from("whl"),
String::from("xpi"),
],
possible_exts, possible_exts,
] ]
.concat() .concat()
} else if essence == "application/x-ms-dos-executable" { } else if essence == "application/x-ms-dos-executable" {
// .dll, .exe, and .scr files are given the same MIME type... but you definitely don't want to rename one to the // .dll, .exe, .scr, etc. files are given the same MIME type, and aren't really distinguishable from each other
// other! // ... but you definitely don't want to rename one to the other!
[vec![String::from("dll"), String::from("exe"), String::from("scr")], possible_exts].concat() [
vec![
String::from("exe"),
String::from("dll"),
String::from("scr"),
String::from("com"),
String::from("dll16"),
String::from("drv"),
String::from("drv16"),
String::from("cpl"),
String::from("msstyles"),
String::from("sys"),
],
possible_exts,
]
.concat()
} else { } else {
possible_exts possible_exts
}) })

View File

@ -197,6 +197,7 @@ impl FormatSteps for Shell {
if let Ok(working_directory) = std::env::current_dir() { if let Ok(working_directory) = std::env::current_dir() {
smart_write(f, writablesln!["# Run from ", (working_directory.as_path())])?; smart_write(f, writablesln!["# Run from ", (working_directory.as_path())])?;
} }
write!(f, "# Happy with these changes? Run `fif --fix` from the same directory!")?;
smart_write(f, writablesln![Newline, "set -e", Newline]) smart_write(f, writablesln![Newline, "set -e", Newline])
} }
@ -253,6 +254,7 @@ impl FormatSteps for PowerShell {
if let Ok(working_directory) = std::env::current_dir() { if let Ok(working_directory) = std::env::current_dir() {
smart_write(f, writablesln!["<# Run from ", (working_directory.as_path()), " #>"])?; smart_write(f, writablesln!["<# Run from ", (working_directory.as_path()), " #>"])?;
} }
write!(f, "<# Happy with these changes? Run `fif --fix` from the same directory! #>")?;
smart_write(f, writables![Newline]) smart_write(f, writables![Newline])
} }
@ -287,7 +289,11 @@ impl FormatSteps for Text {
} }
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> { fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![(generated_by().as_str()), Newline]) smart_write(f, writablesln![(generated_by().as_str()), Newline])?;
if let Ok(working_directory) = std::env::current_dir() {
smart_write(f, writablesln!["Run from ", (working_directory.as_path())])?;
}
write!(f, "Happy with these changes? Run `fif --fix` from the same directory!")
} }
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> { fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {