Compare commits

...

3 commits

12 changed files with 103 additions and 33 deletions

View file

@ -2,10 +2,16 @@
Dates are given in YYYY-MM-DD format. Dates are given in YYYY-MM-DD format.
## v0.3 ## v0.3
### v0.3.4 (2021-mm-dd) ### v0.3.4 (2021-08-07)
#### Features
- Added `-I`/`--ignore-unknown-exts` flag for ignoring files with unknown extensions - for example, if fif doesn't know
what a ".fake" file is, setting this flag will prevent it from renaming "photo.fake" to "photo.jpg". This is useful
for avoiding the case where fif incorrectly mislabels an obscure format it isn't aware of as something else.
#### Other #### Other
- Refactored `formats.rs` - Refactored `formats.rs`
- More accurate dependency versions in `Cargo.toml` to ensure that the MSRV stays supported - More accurate dependency versions in `Cargo.toml` to ensure that the MSRV stays supported
- Sister project (?) [`new-mime-guess`] is now v3.0.0, updated to 2018 edition Rust, and with some new file and MIME
types added
### v0.3.3 (2021-07-07) ### v0.3.3 (2021-07-07)
#### Features #### Features

58
Cargo.lock generated
View file

@ -183,7 +183,7 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
[[package]] [[package]]
name = "fif" name = "fif"
version = "0.3.3" version = "0.3.4"
dependencies = [ dependencies = [
"cached", "cached",
"cfg-if", "cfg-if",
@ -193,6 +193,7 @@ dependencies = [
"infer", "infer",
"itertools", "itertools",
"log", "log",
"mime",
"new_mime_guess", "new_mime_guess",
"once_cell", "once_cell",
"rand", "rand",
@ -344,11 +345,13 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
[[package]] [[package]]
name = "new_mime_guess" name = "new_mime_guess"
version = "2.1.1" version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "991337b97f81dff759c3edabb0bc01ceac92bff6f54852853824bbe1acd969f7" checksum = "c8c8e990db7ba41ce2b4b3a38380a0f3385d61c55b3d4999d6e00389562a79a2"
dependencies = [ dependencies = [
"mime", "mime",
"phf",
"phf_codegen",
"unicase", "unicase",
] ]
@ -385,6 +388,45 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85"
[[package]]
name = "phf"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b21b531851d1d86a90bd62dd79be87ce2d90b7bcb6afbae07813921d6156696a"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f8c56073a14f772740bd86231763732f7559635215bf75df9f26f1d713d99e0"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a68318426de33640f02be62b4ae8eb1261be2efbc337b60c54d845bf4484e0d9"
dependencies = [
"siphasher",
"unicase",
]
[[package]] [[package]]
name = "ppv-lite86" name = "ppv-lite86"
version = "0.2.10" version = "0.2.10"
@ -500,9 +542,9 @@ dependencies = [
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.2.9" version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [ dependencies = [
"bitflags", "bitflags",
] ]
@ -578,6 +620,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "siphasher"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "729a25c17d72b06c68cb47955d44fda88ad2d3e7d77e025663fdd69b93dd71a1"
[[package]] [[package]]
name = "smartstring" name = "smartstring"
version = "0.2.7" version = "0.2.7"

View file

@ -1,7 +1,7 @@
[package] [package]
name = "fif" name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.3.3" version = "0.3.4"
authors = ["Lynnesbian <lynne@bune.city>"] authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018" edition = "2018"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
@ -27,7 +27,8 @@ json = ["serde", "serde_json"]
[dependencies] [dependencies]
walkdir = "2.3.2" walkdir = "2.3.2"
log = "0.4.14" log = "0.4.14"
mime_guess = { package = "new_mime_guess", version = "2.1.1" } mime = "0.3.16"
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "3.0.0" }
snailquote = "0.3.0" snailquote = "0.3.0"
once_cell = "1.8.0" once_cell = "1.8.0"
rayon = { version = "1.5.0", optional = true } rayon = { version = "1.5.0", optional = true }

View file

@ -32,6 +32,8 @@ for backend in "${_backends[@]}"; do
-A clippy::shadow_unrelated \ -A clippy::shadow_unrelated \
-A clippy::option_if_let_else \ -A clippy::option_if_let_else \
-A clippy::multiple-crate-versions \ -A clippy::multiple-crate-versions \
-A clippy::cast-possible-truncation \
-A clippy::cast-possible-wrap \
"$_extra" "$_extra"
done done
@ -41,3 +43,5 @@ done
# shadow_unrelated: sometimes things that seem unrelated are actually related ;) # shadow_unrelated: sometimes things that seem unrelated are actually related ;)
# option_if_let_else: the suggested code is usually harder to read than the original # option_if_let_else: the suggested code is usually harder to read than the original
# multiple_crate_versions: cached uses an old version of hashbrown :c # multiple_crate_versions: cached uses an old version of hashbrown :c
# cast-possible-truncation: only ever used where it would be totally fine
# cast-possible-wrap: ditto

View file

@ -1,6 +1,6 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use mime_guess::Mime; use mime::Mime;
use crate::inspectors::mime_extension_lookup; use crate::inspectors::mime_extension_lookup;
use crate::string_type::String; use crate::string_type::String;

View file

@ -33,10 +33,6 @@ macro_rules! writables {
$crate::formats::Writable::Newline $crate::formats::Writable::Newline
}; };
(@do Space) => {
$crate::formats::Writable::Space
};
(@do $arg:expr) => { (@do $arg:expr) => {
$arg.into() $arg.into()
} }
@ -57,7 +53,6 @@ type Entries<'a> = [Result<Findings, ScanError<'a>>];
pub enum Writable<'a> { pub enum Writable<'a> {
String(&'a str), String(&'a str),
Path(&'a Path), Path(&'a Path),
Space,
Newline, Newline,
} }
@ -80,7 +75,6 @@ fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
// ehhhh // ehhhh
for writeable in writeables { for writeable in writeables {
match writeable { match writeable {
Writable::Space => write!(f, " ")?,
Writable::Newline => { Writable::Newline => {
cfg_if! { cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {
@ -189,7 +183,7 @@ impl Format for Shell {
impl FormatSteps for Shell { impl FormatSteps for Shell {
fn rename<W: Write>(&self, f: &mut W, from: &Path, to: &Path) -> io::Result<()> { fn rename<W: Write>(&self, f: &mut W, from: &Path, to: &Path) -> io::Result<()> {
smart_write(f, writablesln!("mv -v -i -- ", from, Space, to)) smart_write(f, writablesln!("mv -v -i -- ", from, "\t", to))
} }
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> { fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {

View file

@ -7,7 +7,7 @@ use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use cached::cached; use cached::cached;
use mime_guess::Mime; use mime::Mime;
use crate::mime_db::MimeDb; use crate::mime_db::MimeDb;
use crate::string_type::String; use crate::string_type::String;
@ -33,7 +33,7 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let r = db.get_type(&buffer).filter(|mime| let r = db.get_type(&buffer).filter(|mime|
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already // some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG) // one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
mime != &mime_guess::mime::TEXT_XML mime != &mime::TEXT_XML
// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures. // another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.
// determining that a file is in one of the MS office formats in particular requires looking quite far into the // determining that a file is in one of the MS office formats in particular requires looking quite far into the
// file. // file.
@ -92,13 +92,13 @@ cached! {
Some(exts) => { Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect(); let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
Some(if essence == mime_guess::mime::IMAGE_JPEG.essence_str() { Some(if essence == mime::IMAGE_JPEG.essence_str() {
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are // possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can // far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif. // add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat() [vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime_guess::mime::TEXT_XML.essence_str() || essence == "application/xml" { } else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should // a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml". // (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually // there's also another problem: SVG files can easily be misidentified as XML files, because they usually

View file

@ -33,6 +33,7 @@ use crate::formats::Format;
use crate::mime_db::MimeDb; use crate::mime_db::MimeDb;
use crate::parameters::{OutputFormat, ScanOpts}; use crate::parameters::{OutputFormat, ScanOpts};
use crate::utils::{clap_long_version, os_name}; use crate::utils::{clap_long_version, os_name};
use mime_guess::from_ext;
use std::collections::BTreeSet; use std::collections::BTreeSet;
mod findings; mod findings;
@ -217,6 +218,11 @@ fn wanted_file(
let ext = ext.to_string_lossy().to_lowercase(); let ext = ext.to_string_lossy().to_lowercase();
let ext = ext.as_str(); let ext = ext.as_str();
if scan_opts.ignore_unknown_exts && from_ext(ext).is_empty() {
// unknown extension, skip.
return false;
}
if let Some(exts) = exts { if let Some(exts) = exts {
// only scan if the file has one of the specified extensions. // only scan if the file has one of the specified extensions.
exts.contains(&ext) exts.contains(&ext)

View file

@ -1,7 +1,7 @@
//! Backend-neutral Mime database implementation. //! Backend-neutral Mime database implementation.
use cfg_if::cfg_if; use cfg_if::cfg_if;
use mime_guess::Mime; use mime::Mime;
pub trait MimeDb { pub trait MimeDb {
fn init() -> Self; fn init() -> Self;

View file

@ -128,6 +128,11 @@ pub struct Parameters {
/// The directory to process. /// The directory to process.
#[clap(name = "DIR", default_value = ".", parse(from_os_str))] #[clap(name = "DIR", default_value = ".", parse(from_os_str))]
pub dir: PathBuf, pub dir: PathBuf,
/// Don't rename files with extensions unknown to fif.
/// For example, with this option, fif will not rename "image.unknown" to "image.jpg"
#[clap(short = 'I', long)]
pub ignore_unknown_exts: bool,
} }
fn lowercase_exts(exts: &str) -> Result<(), String> { fn lowercase_exts(exts: &str) -> Result<(), String> {
@ -140,6 +145,7 @@ fn lowercase_exts(exts: &str) -> Result<(), String> {
/// Further options relating to scanning. /// Further options relating to scanning.
#[derive(PartialEq, Debug)] #[derive(PartialEq, Debug)]
#[allow(clippy::struct_excessive_bools)]
pub struct ScanOpts { pub struct ScanOpts {
/// Whether hidden files and directories should be scanned. /// Whether hidden files and directories should be scanned.
pub hidden: bool, pub hidden: bool,
@ -147,6 +153,8 @@ pub struct ScanOpts {
pub extensionless: bool, pub extensionless: bool,
/// Should symlinks be followed? /// Should symlinks be followed?
pub follow_symlinks: bool, pub follow_symlinks: bool,
/// Whether to rename files with unknown extensions.
pub ignore_unknown_exts: bool,
} }
impl Parameters { impl Parameters {
@ -214,6 +222,7 @@ impl Parameters {
hidden: self.scan_hidden, hidden: self.scan_hidden,
extensionless: self.scan_extensionless, extensionless: self.scan_extensionless,
follow_symlinks: self.follow_symlinks, follow_symlinks: self.follow_symlinks,
ignore_unknown_exts: self.ignore_unknown_exts,
} }
} }

View file

@ -7,8 +7,7 @@ use crate::{scan_directory, scan_from_walkdir};
use crate::parameters::Parameters; use crate::parameters::Parameters;
use clap::Clap; use clap::Clap;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime;
use crate::parameters::ExtensionSet; use crate::parameters::ExtensionSet;
use std::collections::HashMap; use std::collections::HashMap;
@ -101,6 +100,7 @@ fn simple_directory() {
files.insert("test.pdf", PDF_BYTES); files.insert("test.pdf", PDF_BYTES);
files.insert("test.zip", ZIP_BYTES); files.insert("test.zip", ZIP_BYTES);
files.insert("wrong.jpg", PNG_BYTES); files.insert("wrong.jpg", PNG_BYTES);
files.insert("ignore.fake_ext", ZIP_BYTES);
let dir = tempdir().expect("Failed to create temporary directory."); let dir = tempdir().expect("Failed to create temporary directory.");
set_current_dir(dir.path()).expect("Failed to change directory."); set_current_dir(dir.path()).expect("Failed to change directory.");
@ -118,11 +118,13 @@ fn simple_directory() {
hidden: true, hidden: true,
extensionless: false, extensionless: false,
follow_symlinks: false, follow_symlinks: false,
ignore_unknown_exts: true,
}; };
let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed."); let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed.");
assert_eq!(entries.len(), files.len()); // there should be one file missing: "ignore.fake_ext"
assert_eq!(entries.len(), files.len() - 1);
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present. // initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
crate::init_db(); crate::init_db();
@ -210,6 +212,7 @@ fn argument_parsing() {
hidden: false, hidden: false,
extensionless: false, extensionless: false,
follow_symlinks: true, follow_symlinks: true,
ignore_unknown_exts: false,
}, },
"ScanOpts are incorrect" "ScanOpts are incorrect"
); );
@ -330,7 +333,10 @@ fn identify_random_bytes() {
for (mime, count) in &results { for (mime, count) in &results {
println!("{}:\t{} counts", mime, count); println!("{}:\t{} counts", mime, count);
} }
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>()); println!(
"No type found:\t{} counts",
results.values().len() as i32 - results.values().sum::<i32>()
);
} }
#[test] #[test]
@ -364,7 +370,7 @@ fn outputs_move_commands() {
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg" // the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
assert!( assert!(
contents.contains("misnamed_file.jpg"), contents.contains("misnamed_file.jpg") && contents.contains("misnamed_file.png"),
"{} output doesn't contain move command!\n===\n{}", "{} output doesn't contain move command!\n===\n{}",
format, format,
contents contents
@ -430,13 +436,8 @@ fn writables_is_correct() {
use crate::writables; use crate::writables;
assert_eq!( assert_eq!(
&[ &["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
"henlo".into(), writables!["henlo", (Path::new("henlo")), Newline]
Path::new("henlo").into(),
Writable::Newline,
Writable::Space
],
writables!["henlo", (Path::new("henlo")), Newline, Space]
); );
} }

View file

@ -54,6 +54,7 @@ def main():
sys.exit(2) sys.exit(2)
print("Done! You might want to run cargo clean...") print("Done! You might want to run cargo clean...")
subprocess.run(["du", "-sh", "target"])
if __name__ == "__main__": if __name__ == "__main__":
main() main()