add option for following symlinks, make extensions optional, new version!!
All checks were successful
continuous-integration/drone/push Build is passing
continuous-integration/drone/tag Build is passing

This commit is contained in:
Lynne Megido 2021-04-04 23:52:16 +10:00
parent 12d9001bb8
commit fb67c11eb6
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
9 changed files with 83 additions and 33 deletions

View file

@ -2,11 +2,17 @@
Dates are given in YYYY-MM-DD format.
## v0.2
### v0.2.11 (Unreleased)
### v0.2.11 (0201-)
#### Features
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension
(files without extensions are still skipped unless the -S flag is used)
#### Bugfixes
- Fixed compilation on big endian 32-bit architectures (see
[here]https://github.com/bodil/smartstring/blob/v0.2.6/src/config.rs#L101-L103 for why that was a problem in the first
place)
- Fixed broken tests
- Fixed broken tests for the [`infer`] backend
#### Other
- Better mime type detection:
- Consider "some/x-thing" and "some/thing" to be identical
- Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension<->type mappings

2
Cargo.lock generated
View file

@ -177,7 +177,7 @@ dependencies = [
[[package]]
name = "fif"
version = "0.2.10"
version = "0.2.11"
dependencies = [
"cached",
"cfg-if",

View file

@ -1,7 +1,7 @@
[package]
name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.2.10"
version = "0.2.11"
authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018"
license = "GPL-3.0-or-later"
@ -42,9 +42,9 @@ smartstring = "0.2.6"
[patch.crates-io]
# use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" }
xdg-mime = { git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" }
# forked version with many more mime types
mime_guess = {git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4"}
mime_guess = { git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4" }
[dependencies.clap]
version = "3.0.0-beta.2"

View file

@ -13,10 +13,12 @@ cargo clippy --tests -- \
-W clippy::wrong_pub_self_convention \
-A clippy::unused_io_amount \
-A clippy::redundant_closure_for_method_calls \
-A clippy::shadow_unrelated
-A clippy::shadow_unrelated \
-A clippy::option_if_let_else
# ALLOWS:
# unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that
# redundant_closure...: the alternative is often much more verbose
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
# option_if_let_else: the suggested code is usually harder to read than the original

View file

@ -167,6 +167,8 @@ impl Format for Script {
}
}
// PowerShell is a noun, not a type
#[allow(clippy::doc_markdown)]
/// PowerShell script.
pub struct PowerShell {}

View file

@ -81,7 +81,7 @@ fn main() {
debug!("Checking files with extensions: {:?}", extensions);
let entries = scan_directory(&args.dirs, &extensions, &args.get_scan_opts());
let entries = scan_directory(&args.dirs, extensions.as_ref(), &args.get_scan_opts());
if entries.is_none() {
// no need to log anything for fatal errors - fif will already have printed something obvious like
@ -112,8 +112,9 @@ fn main() {
match result {
Ok(r) => {
debug!(
"{:?} should have file extension {}",
"{:?} is {}, should have file extension {}",
r.file,
r.mime,
r.recommended_extension().unwrap_or_else(|| "???".into())
)
}
@ -165,8 +166,8 @@ cfg_if! {
}
/// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in
/// `exts`, potentially skipping over hidden files, and so on.
fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool {
/// `exts` (if specified), potentially skipping over hidden files, and so on.
fn wanted_file(entry: &DirEntry, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> bool {
if entry.depth() == 0 {
// the root directory should always be scanned.
return true;
@ -189,7 +190,13 @@ fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool {
return false;
}
if let Some(exts) = exts {
// only scan if the file has one of the specified extensions.
exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str())
} else {
// no extensions specified - no reason not to scan this file.
true
}
}
/// Given a file path, returns its extension, using [`std::path::Path::extension`].
@ -264,8 +271,8 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
/// [DirEntry]s.
fn scan_directory(dirs: &Path, exts: &[&str], scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
let stepper = WalkDir::new(dirs).into_iter();
fn scan_directory(dirs: &Path, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter();
let mut probably_fatal_error = false;
let entries: Vec<DirEntry> = stepper
.filter_entry(|e| wanted_file(e, exts, scan_opts)) // filter out unwanted files

View file

@ -42,17 +42,11 @@ pub enum OutputFormat {
)]
pub struct Parameters {
/// Only examine files with these extensions (Comma-separated list)
#[clap(
short,
long,
use_delimiter = true,
require_delimiter = true,
required_unless_present = "ext-set"
)]
#[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")]
pub exts: Option<Vec<StringType>>,
/// Use a preset list of extensions as the search filter
#[clap(short = 'E', long, arg_enum, required_unless_present = "exts")]
#[clap(short = 'E', long, arg_enum, group = "extensions")]
pub ext_set: Option<ExtensionSet>,
/// Don't skip hidden files and directories
@ -67,6 +61,10 @@ pub struct Parameters {
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
pub output_format: OutputFormat,
/// Follow symlinks
#[clap(short, long)]
pub follow_symlinks: bool,
/// Directory to process
// TODO: right now this can only take a single directory - should this be improved?
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
@ -74,24 +72,27 @@ pub struct Parameters {
}
/// Further options relating to scanning.
#[derive(PartialEq, Debug)]
pub struct ScanOpts {
/// Whether hidden files and directories should be scanned.
pub hidden: bool,
/// Whether files without extensions should be scanned.
pub extensionless: bool,
/// Should symlinks be followed?
pub follow_symlinks: bool,
}
impl Parameters {
pub fn extensions(&self) -> Vec<&str> {
pub fn extensions(&self) -> Option<Vec<&str>> {
if let Some(exts) = &self.exts {
// extensions supplied like "-e png,jpg,jpeg"
exts.iter().map(|s| s.as_str()).collect()
Some(exts.iter().map(|s| s.as_str()).collect())
} else if let Some(exts) = &self.ext_set {
// extensions supplied like "-E images"
exts.extensions()
Some(exts.extensions())
} else {
// neither -E nor -e was passed - this should be impossible
unreachable!()
// neither -E nor -e was passed
None
}
}
@ -99,6 +100,7 @@ impl Parameters {
ScanOpts {
hidden: self.scan_hidden,
extensionless: self.scan_extensionless,
follow_symlinks: self.follow_symlinks,
}
}
}

View file

@ -5,7 +5,7 @@ cfg_if! {
// most architectures
pub use smartstring::alias::String;
} else {
// powerpc
// powerpc and other big endian 32-bit archs
pub use std::string::String;
}
}

View file

@ -101,11 +101,12 @@ fn simple_directory() {
let scan_opts = ScanOpts {
hidden: true,
extensionless: false,
follow_symlinks: false,
};
let entries = scan_directory(
&dir.path().to_path_buf(),
&["jpg", "jpeg", "png", "pdf", "zip"],
Some(&vec!["jpg", "jpeg", "png", "pdf", "zip"]),
&scan_opts,
)
.expect("Directory scan failed.");
@ -157,21 +158,51 @@ fn simple_directory() {
fn argument_parsing() {
use clap::Clap;
// check if "jpg" is in the list of extensions to be considered when passing "-E images"
let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "images"]);
assert!(args.extensions().contains(&"jpg"));
// pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]);
// check if "jpg" is in the list of extensions to be scanned
assert!(args
.extensions()
.expect("args.extensions() should contain the `images` set!")
.contains(&"jpg"));
// make sure "scan_hidden" is false
assert!(!args.scan_hidden);
// exts should be none
assert!(args.exts.is_none());
// get the ScanOpts, and make sure they match expectations
assert_eq!(
args.get_scan_opts(),
ScanOpts {
hidden: false,
extensionless: false,
follow_symlinks: true
}
)
}
#[test]
fn rejects_bad_args() {
use clap::Clap;
assert!(Parameters::try_parse_from(vec!["fif", "-abcdefg", "-E", "-e"]).is_err());
let tests = [
// Non-existent flags:
vec!["fif", "-abcdefghijklmnopqrstuvwxyz"],
// `-E` without specifying a set:
vec!["fif", "-E"],
// `-E` with an invalid set:
vec!["fif", "-E", "pebis"],
// `-E` and `-e`:
vec!["fif", "-E", "media", "-e", "jpg"],
// `-e` with nothing but commas:
vec!["fif", "-e", ",,,,,"],
];
for test in &tests {
assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test);
}
}
#[test]