add option for following symlinks, make extensions optional, new version!!
This commit is contained in:
parent
12d9001bb8
commit
fb67c11eb6
9 changed files with 83 additions and 33 deletions
10
CHANGELOG.md
10
CHANGELOG.md
|
@ -2,11 +2,17 @@
|
||||||
Dates are given in YYYY-MM-DD format.
|
Dates are given in YYYY-MM-DD format.
|
||||||
|
|
||||||
## v0.2
|
## v0.2
|
||||||
### v0.2.11 (Unreleased)
|
### v0.2.11 (0201-)
|
||||||
|
#### Features
|
||||||
|
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
|
||||||
|
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension
|
||||||
|
(files without extensions are still skipped unless the -S flag is used)
|
||||||
|
#### Bugfixes
|
||||||
- Fixed compilation on big endian 32-bit architectures (see
|
- Fixed compilation on big endian 32-bit architectures (see
|
||||||
[here]https://github.com/bodil/smartstring/blob/v0.2.6/src/config.rs#L101-L103 for why that was a problem in the first
|
[here]https://github.com/bodil/smartstring/blob/v0.2.6/src/config.rs#L101-L103 for why that was a problem in the first
|
||||||
place)
|
place)
|
||||||
- Fixed broken tests
|
- Fixed broken tests for the [`infer`] backend
|
||||||
|
#### Other
|
||||||
- Better mime type detection:
|
- Better mime type detection:
|
||||||
- Consider "some/x-thing" and "some/thing" to be identical
|
- Consider "some/x-thing" and "some/thing" to be identical
|
||||||
- Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension<->type mappings
|
- Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension<->type mappings
|
||||||
|
|
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -177,7 +177,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fif"
|
name = "fif"
|
||||||
version = "0.2.10"
|
version = "0.2.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cached",
|
"cached",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
name = "fif"
|
name = "fif"
|
||||||
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
||||||
version = "0.2.10"
|
version = "0.2.11"
|
||||||
authors = ["Lynnesbian <lynne@bune.city>"]
|
authors = ["Lynnesbian <lynne@bune.city>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "GPL-3.0-or-later"
|
license = "GPL-3.0-or-later"
|
||||||
|
|
|
@ -13,10 +13,12 @@ cargo clippy --tests -- \
|
||||||
-W clippy::wrong_pub_self_convention \
|
-W clippy::wrong_pub_self_convention \
|
||||||
-A clippy::unused_io_amount \
|
-A clippy::unused_io_amount \
|
||||||
-A clippy::redundant_closure_for_method_calls \
|
-A clippy::redundant_closure_for_method_calls \
|
||||||
-A clippy::shadow_unrelated
|
-A clippy::shadow_unrelated \
|
||||||
|
-A clippy::option_if_let_else
|
||||||
|
|
||||||
# ALLOWS:
|
# ALLOWS:
|
||||||
# unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that
|
# unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that
|
||||||
# redundant_closure...: the alternative is often much more verbose
|
# redundant_closure...: the alternative is often much more verbose
|
||||||
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
|
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
|
||||||
|
# option_if_let_else: the suggested code is usually harder to read than the original
|
||||||
|
|
||||||
|
|
|
@ -167,6 +167,8 @@ impl Format for Script {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PowerShell is a noun, not a type
|
||||||
|
#[allow(clippy::doc_markdown)]
|
||||||
/// PowerShell script.
|
/// PowerShell script.
|
||||||
pub struct PowerShell {}
|
pub struct PowerShell {}
|
||||||
|
|
||||||
|
|
19
src/main.rs
19
src/main.rs
|
@ -81,7 +81,7 @@ fn main() {
|
||||||
|
|
||||||
debug!("Checking files with extensions: {:?}", extensions);
|
debug!("Checking files with extensions: {:?}", extensions);
|
||||||
|
|
||||||
let entries = scan_directory(&args.dirs, &extensions, &args.get_scan_opts());
|
let entries = scan_directory(&args.dirs, extensions.as_ref(), &args.get_scan_opts());
|
||||||
|
|
||||||
if entries.is_none() {
|
if entries.is_none() {
|
||||||
// no need to log anything for fatal errors - fif will already have printed something obvious like
|
// no need to log anything for fatal errors - fif will already have printed something obvious like
|
||||||
|
@ -112,8 +112,9 @@ fn main() {
|
||||||
match result {
|
match result {
|
||||||
Ok(r) => {
|
Ok(r) => {
|
||||||
debug!(
|
debug!(
|
||||||
"{:?} should have file extension {}",
|
"{:?} is {}, should have file extension {}",
|
||||||
r.file,
|
r.file,
|
||||||
|
r.mime,
|
||||||
r.recommended_extension().unwrap_or_else(|| "???".into())
|
r.recommended_extension().unwrap_or_else(|| "???".into())
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -165,8 +166,8 @@ cfg_if! {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in
|
/// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in
|
||||||
/// `exts`, potentially skipping over hidden files, and so on.
|
/// `exts` (if specified), potentially skipping over hidden files, and so on.
|
||||||
fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool {
|
fn wanted_file(entry: &DirEntry, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> bool {
|
||||||
if entry.depth() == 0 {
|
if entry.depth() == 0 {
|
||||||
// the root directory should always be scanned.
|
// the root directory should always be scanned.
|
||||||
return true;
|
return true;
|
||||||
|
@ -189,7 +190,13 @@ fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(exts) = exts {
|
||||||
|
// only scan if the file has one of the specified extensions.
|
||||||
exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str())
|
exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str())
|
||||||
|
} else {
|
||||||
|
// no extensions specified - no reason not to scan this file.
|
||||||
|
true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a file path, returns its extension, using [`std::path::Path::extension`].
|
/// Given a file path, returns its extension, using [`std::path::Path::extension`].
|
||||||
|
@ -264,8 +271,8 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
|
||||||
|
|
||||||
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
||||||
/// [DirEntry]s.
|
/// [DirEntry]s.
|
||||||
fn scan_directory(dirs: &Path, exts: &[&str], scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
|
fn scan_directory(dirs: &Path, exts: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
|
||||||
let stepper = WalkDir::new(dirs).into_iter();
|
let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter();
|
||||||
let mut probably_fatal_error = false;
|
let mut probably_fatal_error = false;
|
||||||
let entries: Vec<DirEntry> = stepper
|
let entries: Vec<DirEntry> = stepper
|
||||||
.filter_entry(|e| wanted_file(e, exts, scan_opts)) // filter out unwanted files
|
.filter_entry(|e| wanted_file(e, exts, scan_opts)) // filter out unwanted files
|
||||||
|
|
|
@ -42,17 +42,11 @@ pub enum OutputFormat {
|
||||||
)]
|
)]
|
||||||
pub struct Parameters {
|
pub struct Parameters {
|
||||||
/// Only examine files with these extensions (Comma-separated list)
|
/// Only examine files with these extensions (Comma-separated list)
|
||||||
#[clap(
|
#[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")]
|
||||||
short,
|
|
||||||
long,
|
|
||||||
use_delimiter = true,
|
|
||||||
require_delimiter = true,
|
|
||||||
required_unless_present = "ext-set"
|
|
||||||
)]
|
|
||||||
pub exts: Option<Vec<StringType>>,
|
pub exts: Option<Vec<StringType>>,
|
||||||
|
|
||||||
/// Use a preset list of extensions as the search filter
|
/// Use a preset list of extensions as the search filter
|
||||||
#[clap(short = 'E', long, arg_enum, required_unless_present = "exts")]
|
#[clap(short = 'E', long, arg_enum, group = "extensions")]
|
||||||
pub ext_set: Option<ExtensionSet>,
|
pub ext_set: Option<ExtensionSet>,
|
||||||
|
|
||||||
/// Don't skip hidden files and directories
|
/// Don't skip hidden files and directories
|
||||||
|
@ -67,6 +61,10 @@ pub struct Parameters {
|
||||||
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
|
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
|
||||||
pub output_format: OutputFormat,
|
pub output_format: OutputFormat,
|
||||||
|
|
||||||
|
/// Follow symlinks
|
||||||
|
#[clap(short, long)]
|
||||||
|
pub follow_symlinks: bool,
|
||||||
|
|
||||||
/// Directory to process
|
/// Directory to process
|
||||||
// TODO: right now this can only take a single directory - should this be improved?
|
// TODO: right now this can only take a single directory - should this be improved?
|
||||||
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
|
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
|
||||||
|
@ -74,24 +72,27 @@ pub struct Parameters {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Further options relating to scanning.
|
/// Further options relating to scanning.
|
||||||
|
#[derive(PartialEq, Debug)]
|
||||||
pub struct ScanOpts {
|
pub struct ScanOpts {
|
||||||
/// Whether hidden files and directories should be scanned.
|
/// Whether hidden files and directories should be scanned.
|
||||||
pub hidden: bool,
|
pub hidden: bool,
|
||||||
/// Whether files without extensions should be scanned.
|
/// Whether files without extensions should be scanned.
|
||||||
pub extensionless: bool,
|
pub extensionless: bool,
|
||||||
|
/// Should symlinks be followed?
|
||||||
|
pub follow_symlinks: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parameters {
|
impl Parameters {
|
||||||
pub fn extensions(&self) -> Vec<&str> {
|
pub fn extensions(&self) -> Option<Vec<&str>> {
|
||||||
if let Some(exts) = &self.exts {
|
if let Some(exts) = &self.exts {
|
||||||
// extensions supplied like "-e png,jpg,jpeg"
|
// extensions supplied like "-e png,jpg,jpeg"
|
||||||
exts.iter().map(|s| s.as_str()).collect()
|
Some(exts.iter().map(|s| s.as_str()).collect())
|
||||||
} else if let Some(exts) = &self.ext_set {
|
} else if let Some(exts) = &self.ext_set {
|
||||||
// extensions supplied like "-E images"
|
// extensions supplied like "-E images"
|
||||||
exts.extensions()
|
Some(exts.extensions())
|
||||||
} else {
|
} else {
|
||||||
// neither -E nor -e was passed - this should be impossible
|
// neither -E nor -e was passed
|
||||||
unreachable!()
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,6 +100,7 @@ impl Parameters {
|
||||||
ScanOpts {
|
ScanOpts {
|
||||||
hidden: self.scan_hidden,
|
hidden: self.scan_hidden,
|
||||||
extensionless: self.scan_extensionless,
|
extensionless: self.scan_extensionless,
|
||||||
|
follow_symlinks: self.follow_symlinks,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,7 +5,7 @@ cfg_if! {
|
||||||
// most architectures
|
// most architectures
|
||||||
pub use smartstring::alias::String;
|
pub use smartstring::alias::String;
|
||||||
} else {
|
} else {
|
||||||
// powerpc
|
// powerpc and other big endian 32-bit archs
|
||||||
pub use std::string::String;
|
pub use std::string::String;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -101,11 +101,12 @@ fn simple_directory() {
|
||||||
let scan_opts = ScanOpts {
|
let scan_opts = ScanOpts {
|
||||||
hidden: true,
|
hidden: true,
|
||||||
extensionless: false,
|
extensionless: false,
|
||||||
|
follow_symlinks: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
let entries = scan_directory(
|
let entries = scan_directory(
|
||||||
&dir.path().to_path_buf(),
|
&dir.path().to_path_buf(),
|
||||||
&["jpg", "jpeg", "png", "pdf", "zip"],
|
Some(&vec!["jpg", "jpeg", "png", "pdf", "zip"]),
|
||||||
&scan_opts,
|
&scan_opts,
|
||||||
)
|
)
|
||||||
.expect("Directory scan failed.");
|
.expect("Directory scan failed.");
|
||||||
|
@ -157,21 +158,51 @@ fn simple_directory() {
|
||||||
fn argument_parsing() {
|
fn argument_parsing() {
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
|
|
||||||
// check if "jpg" is in the list of extensions to be considered when passing "-E images"
|
// pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
|
||||||
let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "images"]);
|
let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]);
|
||||||
assert!(args.extensions().contains(&"jpg"));
|
|
||||||
|
// check if "jpg" is in the list of extensions to be scanned
|
||||||
|
assert!(args
|
||||||
|
.extensions()
|
||||||
|
.expect("args.extensions() should contain the `images` set!")
|
||||||
|
.contains(&"jpg"));
|
||||||
|
|
||||||
// make sure "scan_hidden" is false
|
// make sure "scan_hidden" is false
|
||||||
assert!(!args.scan_hidden);
|
assert!(!args.scan_hidden);
|
||||||
|
|
||||||
// exts should be none
|
// exts should be none
|
||||||
assert!(args.exts.is_none());
|
assert!(args.exts.is_none());
|
||||||
|
|
||||||
|
// get the ScanOpts, and make sure they match expectations
|
||||||
|
assert_eq!(
|
||||||
|
args.get_scan_opts(),
|
||||||
|
ScanOpts {
|
||||||
|
hidden: false,
|
||||||
|
extensionless: false,
|
||||||
|
follow_symlinks: true
|
||||||
|
}
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn rejects_bad_args() {
|
fn rejects_bad_args() {
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
assert!(Parameters::try_parse_from(vec!["fif", "-abcdefg", "-E", "-e"]).is_err());
|
let tests = [
|
||||||
|
// Non-existent flags:
|
||||||
|
vec!["fif", "-abcdefghijklmnopqrstuvwxyz"],
|
||||||
|
// `-E` without specifying a set:
|
||||||
|
vec!["fif", "-E"],
|
||||||
|
// `-E` with an invalid set:
|
||||||
|
vec!["fif", "-E", "pebis"],
|
||||||
|
// `-E` and `-e`:
|
||||||
|
vec!["fif", "-E", "media", "-e", "jpg"],
|
||||||
|
// `-e` with nothing but commas:
|
||||||
|
vec!["fif", "-e", ",,,,,"],
|
||||||
|
];
|
||||||
|
|
||||||
|
for test in &tests {
|
||||||
|
assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in a new issue