From fb67c11eb6d211fc268c7fdee2f869e8e7de3aae Mon Sep 17 00:00:00 2001 From: Lynnesbian Date: Sun, 4 Apr 2021 23:52:16 +1000 Subject: [PATCH] add option for following symlinks, make extensions optional, new version!! --- CHANGELOG.md | 10 ++++++++-- Cargo.lock | 2 +- Cargo.toml | 6 +++--- clippy.sh | 4 +++- src/formats.rs | 2 ++ src/main.rs | 21 ++++++++++++++------- src/parameters.rs | 28 +++++++++++++++------------- src/string_type.rs | 2 +- src/tests/mod.rs | 41 ++++++++++++++++++++++++++++++++++++----- 9 files changed, 83 insertions(+), 33 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 590cf0e..3c93b92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,17 @@ Dates are given in YYYY-MM-DD format. ## v0.2 -### v0.2.11 (Unreleased) +### v0.2.11 (0201-) +#### Features +- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag +- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension + (files without extensions are still skipped unless the -S flag is used) +#### Bugfixes - Fixed compilation on big endian 32-bit architectures (see [here]https://github.com/bodil/smartstring/blob/v0.2.6/src/config.rs#L101-L103 for why that was a problem in the first place) -- Fixed broken tests +- Fixed broken tests for the [`infer`] backend +#### Other - Better mime type detection: - Consider "some/x-thing" and "some/thing" to be identical - Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension<->type mappings diff --git a/Cargo.lock b/Cargo.lock index d18d079..a769684 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -177,7 +177,7 @@ dependencies = [ [[package]] name = "fif" -version = "0.2.10" +version = "0.2.11" dependencies = [ "cached", "cfg-if", diff --git a/Cargo.toml b/Cargo.toml index 8e17f49..daca21a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "fif" description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." -version = "0.2.10" +version = "0.2.11" authors = ["Lynnesbian "] edition = "2018" license = "GPL-3.0-or-later" @@ -42,9 +42,9 @@ smartstring = "0.2.6" [patch.crates-io] # use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd -xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" } +xdg-mime = { git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" } # forked version with many more mime types -mime_guess = {git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4"} +mime_guess = { git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4" } [dependencies.clap] version = "3.0.0-beta.2" diff --git a/clippy.sh b/clippy.sh index fb1dd2a..b060c01 100755 --- a/clippy.sh +++ b/clippy.sh @@ -13,10 +13,12 @@ cargo clippy --tests -- \ -W clippy::wrong_pub_self_convention \ -A clippy::unused_io_amount \ -A clippy::redundant_closure_for_method_calls \ - -A clippy::shadow_unrelated + -A clippy::shadow_unrelated \ + -A clippy::option_if_let_else # ALLOWS: # unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that # redundant_closure...: the alternative is often much more verbose # shadow_unrelated: sometimes things that seem unrelated are actually related ;) +# option_if_let_else: the suggested code is usually harder to read than the original diff --git a/src/formats.rs b/src/formats.rs index dac6cc3..0bd8b69 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -167,6 +167,8 @@ impl Format for Script { } } +// PowerShell is a noun, not a type +#[allow(clippy::doc_markdown)] /// PowerShell script. pub struct PowerShell {} diff --git a/src/main.rs b/src/main.rs index 6d1b67f..ff9ac99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -81,7 +81,7 @@ fn main() { debug!("Checking files with extensions: {:?}", extensions); - let entries = scan_directory(&args.dirs, &extensions, &args.get_scan_opts()); + let entries = scan_directory(&args.dirs, extensions.as_ref(), &args.get_scan_opts()); if entries.is_none() { // no need to log anything for fatal errors - fif will already have printed something obvious like @@ -112,8 +112,9 @@ fn main() { match result { Ok(r) => { debug!( - "{:?} should have file extension {}", + "{:?} is {}, should have file extension {}", r.file, + r.mime, r.recommended_extension().unwrap_or_else(|| "???".into()) ) } @@ -165,8 +166,8 @@ cfg_if! { } /// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in -/// `exts`, potentially skipping over hidden files, and so on. -fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool { +/// `exts` (if specified), potentially skipping over hidden files, and so on. +fn wanted_file(entry: &DirEntry, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> bool { if entry.depth() == 0 { // the root directory should always be scanned. return true; @@ -189,7 +190,13 @@ fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool { return false; } - exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str()) + if let Some(exts) = exts { + // only scan if the file has one of the specified extensions. + exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str()) + } else { + // no extensions specified - no reason not to scan this file. + true + } } /// Given a file path, returns its extension, using [`std::path::Path::extension`]. @@ -264,8 +271,8 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec> { /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of /// [DirEntry]s. -fn scan_directory(dirs: &Path, exts: &[&str], scan_opts: &ScanOpts) -> Option> { - let stepper = WalkDir::new(dirs).into_iter(); +fn scan_directory(dirs: &Path, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> Option> { + let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter(); let mut probably_fatal_error = false; let entries: Vec = stepper .filter_entry(|e| wanted_file(e, exts, scan_opts)) // filter out unwanted files diff --git a/src/parameters.rs b/src/parameters.rs index cc8b4da..ee0989d 100644 --- a/src/parameters.rs +++ b/src/parameters.rs @@ -42,17 +42,11 @@ pub enum OutputFormat { )] pub struct Parameters { /// Only examine files with these extensions (Comma-separated list) - #[clap( - short, - long, - use_delimiter = true, - require_delimiter = true, - required_unless_present = "ext-set" - )] + #[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")] pub exts: Option>, /// Use a preset list of extensions as the search filter - #[clap(short = 'E', long, arg_enum, required_unless_present = "exts")] + #[clap(short = 'E', long, arg_enum, group = "extensions")] pub ext_set: Option, /// Don't skip hidden files and directories @@ -67,6 +61,10 @@ pub struct Parameters { #[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)] pub output_format: OutputFormat, + /// Follow symlinks + #[clap(short, long)] + pub follow_symlinks: bool, + /// Directory to process // TODO: right now this can only take a single directory - should this be improved? #[clap(name = "DIR", default_value = ".", parse(from_os_str))] @@ -74,24 +72,27 @@ pub struct Parameters { } /// Further options relating to scanning. +#[derive(PartialEq, Debug)] pub struct ScanOpts { /// Whether hidden files and directories should be scanned. pub hidden: bool, /// Whether files without extensions should be scanned. pub extensionless: bool, + /// Should symlinks be followed? + pub follow_symlinks: bool, } impl Parameters { - pub fn extensions(&self) -> Vec<&str> { + pub fn extensions(&self) -> Option> { if let Some(exts) = &self.exts { // extensions supplied like "-e png,jpg,jpeg" - exts.iter().map(|s| s.as_str()).collect() + Some(exts.iter().map(|s| s.as_str()).collect()) } else if let Some(exts) = &self.ext_set { // extensions supplied like "-E images" - exts.extensions() + Some(exts.extensions()) } else { - // neither -E nor -e was passed - this should be impossible - unreachable!() + // neither -E nor -e was passed + None } } @@ -99,6 +100,7 @@ impl Parameters { ScanOpts { hidden: self.scan_hidden, extensionless: self.scan_extensionless, + follow_symlinks: self.follow_symlinks, } } } diff --git a/src/string_type.rs b/src/string_type.rs index 1561015..9794220 100644 --- a/src/string_type.rs +++ b/src/string_type.rs @@ -5,7 +5,7 @@ cfg_if! { // most architectures pub use smartstring::alias::String; } else { - // powerpc + // powerpc and other big endian 32-bit archs pub use std::string::String; } } diff --git a/src/tests/mod.rs b/src/tests/mod.rs index ade261b..cd90891 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -101,11 +101,12 @@ fn simple_directory() { let scan_opts = ScanOpts { hidden: true, extensionless: false, + follow_symlinks: false, }; let entries = scan_directory( &dir.path().to_path_buf(), - &["jpg", "jpeg", "png", "pdf", "zip"], + Some(&vec!["jpg", "jpeg", "png", "pdf", "zip"]), &scan_opts, ) .expect("Directory scan failed."); @@ -157,21 +158,51 @@ fn simple_directory() { fn argument_parsing() { use clap::Clap; - // check if "jpg" is in the list of extensions to be considered when passing "-E images" - let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "images"]); - assert!(args.extensions().contains(&"jpg")); + // pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions + let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]); + + // check if "jpg" is in the list of extensions to be scanned + assert!(args + .extensions() + .expect("args.extensions() should contain the `images` set!") + .contains(&"jpg")); // make sure "scan_hidden" is false assert!(!args.scan_hidden); // exts should be none assert!(args.exts.is_none()); + + // get the ScanOpts, and make sure they match expectations + assert_eq!( + args.get_scan_opts(), + ScanOpts { + hidden: false, + extensionless: false, + follow_symlinks: true + } + ) } #[test] fn rejects_bad_args() { use clap::Clap; - assert!(Parameters::try_parse_from(vec!["fif", "-abcdefg", "-E", "-e"]).is_err()); + let tests = [ + // Non-existent flags: + vec!["fif", "-abcdefghijklmnopqrstuvwxyz"], + // `-E` without specifying a set: + vec!["fif", "-E"], + // `-E` with an invalid set: + vec!["fif", "-E", "pebis"], + // `-E` and `-e`: + vec!["fif", "-E", "media", "-e", "jpg"], + // `-e` with nothing but commas: + vec!["fif", "-e", ",,,,,"], + ]; + + for test in &tests { + assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test); + } } #[test]