add option for following symlinks, make extensions optional, new version!!
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing

This commit is contained in:
Lynne Megido 2021-04-04 23:52:16 +10:00
parent 12d9001bb8
commit fb67c11eb6
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
9 changed files with 83 additions and 33 deletions

View file

@ -2,11 +2,17 @@
Dates are given in YYYY-MM-DD format. Dates are given in YYYY-MM-DD format.
## v0.2 ## v0.2
### v0.2.11 (Unreleased) ### v0.2.11 (0201-)
#### Features
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension
(files without extensions are still skipped unless the -S flag is used)
#### Bugfixes
- Fixed compilation on big endian 32-bit architectures (see - Fixed compilation on big endian 32-bit architectures (see
[here]https://github.com/bodil/smartstring/blob/v0.2.6/src/config.rs#L101-L103 for why that was a problem in the first [here]https://github.com/bodil/smartstring/blob/v0.2.6/src/config.rs#L101-L103 for why that was a problem in the first
place) place)
- Fixed broken tests - Fixed broken tests for the [`infer`] backend
#### Other
- Better mime type detection: - Better mime type detection:
- Consider "some/x-thing" and "some/thing" to be identical - Consider "some/x-thing" and "some/thing" to be identical
- Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension<->type mappings - Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension<->type mappings

2
Cargo.lock generated
View file

@ -177,7 +177,7 @@ dependencies = [
[[package]] [[package]]
name = "fif" name = "fif"
version = "0.2.10" version = "0.2.11"
dependencies = [ dependencies = [
"cached", "cached",
"cfg-if", "cfg-if",

View file

@ -1,7 +1,7 @@
[package] [package]
name = "fif" name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.2.10" version = "0.2.11"
authors = ["Lynnesbian <lynne@bune.city>"] authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018" edition = "2018"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
@ -42,9 +42,9 @@ smartstring = "0.2.6"
[patch.crates-io] [patch.crates-io]
# use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd # use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" } xdg-mime = { git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" }
# forked version with many more mime types # forked version with many more mime types
mime_guess = {git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4"} mime_guess = { git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4" }
[dependencies.clap] [dependencies.clap]
version = "3.0.0-beta.2" version = "3.0.0-beta.2"

View file

@ -13,10 +13,12 @@ cargo clippy --tests -- \
-W clippy::wrong_pub_self_convention \ -W clippy::wrong_pub_self_convention \
-A clippy::unused_io_amount \ -A clippy::unused_io_amount \
-A clippy::redundant_closure_for_method_calls \ -A clippy::redundant_closure_for_method_calls \
-A clippy::shadow_unrelated -A clippy::shadow_unrelated \
-A clippy::option_if_let_else
# ALLOWS: # ALLOWS:
# unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that # unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that
# redundant_closure...: the alternative is often much more verbose # redundant_closure...: the alternative is often much more verbose
# shadow_unrelated: sometimes things that seem unrelated are actually related ;) # shadow_unrelated: sometimes things that seem unrelated are actually related ;)
# option_if_let_else: the suggested code is usually harder to read than the original

View file

@ -167,6 +167,8 @@ impl Format for Script {
} }
} }
// PowerShell is a noun, not a type
#[allow(clippy::doc_markdown)]
/// PowerShell script. /// PowerShell script.
pub struct PowerShell {} pub struct PowerShell {}

View file

@ -81,7 +81,7 @@ fn main() {
debug!("Checking files with extensions: {:?}", extensions); debug!("Checking files with extensions: {:?}", extensions);
let entries = scan_directory(&args.dirs, &extensions, &args.get_scan_opts()); let entries = scan_directory(&args.dirs, extensions.as_ref(), &args.get_scan_opts());
if entries.is_none() { if entries.is_none() {
// no need to log anything for fatal errors - fif will already have printed something obvious like // no need to log anything for fatal errors - fif will already have printed something obvious like
@ -112,8 +112,9 @@ fn main() {
match result { match result {
Ok(r) => { Ok(r) => {
debug!( debug!(
"{:?} should have file extension {}", "{:?} is {}, should have file extension {}",
r.file, r.file,
r.mime,
r.recommended_extension().unwrap_or_else(|| "???".into()) r.recommended_extension().unwrap_or_else(|| "???".into())
) )
} }
@ -165,8 +166,8 @@ cfg_if! {
} }
/// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in /// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in
/// `exts`, potentially skipping over hidden files, and so on. /// `exts` (if specified), potentially skipping over hidden files, and so on.
fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool { fn wanted_file(entry: &DirEntry, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> bool {
if entry.depth() == 0 { if entry.depth() == 0 {
// the root directory should always be scanned. // the root directory should always be scanned.
return true; return true;
@ -189,7 +190,13 @@ fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool {
return false; return false;
} }
exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str()) if let Some(exts) = exts {
// only scan if the file has one of the specified extensions.
exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str())
} else {
// no extensions specified - no reason not to scan this file.
true
}
} }
/// Given a file path, returns its extension, using [`std::path::Path::extension`]. /// Given a file path, returns its extension, using [`std::path::Path::extension`].
@ -264,8 +271,8 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
/// [DirEntry]s. /// [DirEntry]s.
fn scan_directory(dirs: &Path, exts: &[&str], scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> { fn scan_directory(dirs: &Path, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
let stepper = WalkDir::new(dirs).into_iter(); let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter();
let mut probably_fatal_error = false; let mut probably_fatal_error = false;
let entries: Vec<DirEntry> = stepper let entries: Vec<DirEntry> = stepper
.filter_entry(|e| wanted_file(e, exts, scan_opts)) // filter out unwanted files .filter_entry(|e| wanted_file(e, exts, scan_opts)) // filter out unwanted files

View file

@ -42,17 +42,11 @@ pub enum OutputFormat {
)] )]
pub struct Parameters { pub struct Parameters {
/// Only examine files with these extensions (Comma-separated list) /// Only examine files with these extensions (Comma-separated list)
#[clap( #[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")]
short,
long,
use_delimiter = true,
require_delimiter = true,
required_unless_present = "ext-set"
)]
pub exts: Option<Vec<StringType>>, pub exts: Option<Vec<StringType>>,
/// Use a preset list of extensions as the search filter /// Use a preset list of extensions as the search filter
#[clap(short = 'E', long, arg_enum, required_unless_present = "exts")] #[clap(short = 'E', long, arg_enum, group = "extensions")]
pub ext_set: Option<ExtensionSet>, pub ext_set: Option<ExtensionSet>,
/// Don't skip hidden files and directories /// Don't skip hidden files and directories
@ -67,6 +61,10 @@ pub struct Parameters {
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)] #[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
pub output_format: OutputFormat, pub output_format: OutputFormat,
/// Follow symlinks
#[clap(short, long)]
pub follow_symlinks: bool,
/// Directory to process /// Directory to process
// TODO: right now this can only take a single directory - should this be improved? // TODO: right now this can only take a single directory - should this be improved?
#[clap(name = "DIR", default_value = ".", parse(from_os_str))] #[clap(name = "DIR", default_value = ".", parse(from_os_str))]
@ -74,24 +72,27 @@ pub struct Parameters {
} }
/// Further options relating to scanning. /// Further options relating to scanning.
#[derive(PartialEq, Debug)]
pub struct ScanOpts { pub struct ScanOpts {
/// Whether hidden files and directories should be scanned. /// Whether hidden files and directories should be scanned.
pub hidden: bool, pub hidden: bool,
/// Whether files without extensions should be scanned. /// Whether files without extensions should be scanned.
pub extensionless: bool, pub extensionless: bool,
/// Should symlinks be followed?
pub follow_symlinks: bool,
} }
impl Parameters { impl Parameters {
pub fn extensions(&self) -> Vec<&str> { pub fn extensions(&self) -> Option<Vec<&str>> {
if let Some(exts) = &self.exts { if let Some(exts) = &self.exts {
// extensions supplied like "-e png,jpg,jpeg" // extensions supplied like "-e png,jpg,jpeg"
exts.iter().map(|s| s.as_str()).collect() Some(exts.iter().map(|s| s.as_str()).collect())
} else if let Some(exts) = &self.ext_set { } else if let Some(exts) = &self.ext_set {
// extensions supplied like "-E images" // extensions supplied like "-E images"
exts.extensions() Some(exts.extensions())
} else { } else {
// neither -E nor -e was passed - this should be impossible // neither -E nor -e was passed
unreachable!() None
} }
} }
@ -99,6 +100,7 @@ impl Parameters {
ScanOpts { ScanOpts {
hidden: self.scan_hidden, hidden: self.scan_hidden,
extensionless: self.scan_extensionless, extensionless: self.scan_extensionless,
follow_symlinks: self.follow_symlinks,
} }
} }
} }

View file

@ -5,7 +5,7 @@ cfg_if! {
// most architectures // most architectures
pub use smartstring::alias::String; pub use smartstring::alias::String;
} else { } else {
// powerpc // powerpc and other big endian 32-bit archs
pub use std::string::String; pub use std::string::String;
} }
} }

View file

@ -101,11 +101,12 @@ fn simple_directory() {
let scan_opts = ScanOpts { let scan_opts = ScanOpts {
hidden: true, hidden: true,
extensionless: false, extensionless: false,
follow_symlinks: false,
}; };
let entries = scan_directory( let entries = scan_directory(
&dir.path().to_path_buf(), &dir.path().to_path_buf(),
&["jpg", "jpeg", "png", "pdf", "zip"], Some(&vec!["jpg", "jpeg", "png", "pdf", "zip"]),
&scan_opts, &scan_opts,
) )
.expect("Directory scan failed."); .expect("Directory scan failed.");
@ -157,21 +158,51 @@ fn simple_directory() {
fn argument_parsing() { fn argument_parsing() {
use clap::Clap; use clap::Clap;
// check if "jpg" is in the list of extensions to be considered when passing "-E images" // pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "images"]); let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]);
assert!(args.extensions().contains(&"jpg"));
// check if "jpg" is in the list of extensions to be scanned
assert!(args
.extensions()
.expect("args.extensions() should contain the `images` set!")
.contains(&"jpg"));
// make sure "scan_hidden" is false // make sure "scan_hidden" is false
assert!(!args.scan_hidden); assert!(!args.scan_hidden);
// exts should be none // exts should be none
assert!(args.exts.is_none()); assert!(args.exts.is_none());
// get the ScanOpts, and make sure they match expectations
assert_eq!(
args.get_scan_opts(),
ScanOpts {
hidden: false,
extensionless: false,
follow_symlinks: true
}
)
} }
#[test] #[test]
fn rejects_bad_args() { fn rejects_bad_args() {
use clap::Clap; use clap::Clap;
assert!(Parameters::try_parse_from(vec!["fif", "-abcdefg", "-E", "-e"]).is_err()); let tests = [
// Non-existent flags:
vec!["fif", "-abcdefghijklmnopqrstuvwxyz"],
// `-E` without specifying a set:
vec!["fif", "-E"],
// `-E` with an invalid set:
vec!["fif", "-E", "pebis"],
// `-E` and `-e`:
vec!["fif", "-E", "media", "-e", "jpg"],
// `-e` with nothing but commas:
vec!["fif", "-e", ",,,,,"],
];
for test in &tests {
assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test);
}
} }
#[test] #[test]