Compare commits
3 commits
741048839c
...
db80955f24
Author | SHA1 | Date | |
---|---|---|---|
db80955f24 | |||
4c6163296c | |||
5e17e4efda |
10 changed files with 94 additions and 25 deletions
|
@ -6,6 +6,7 @@
|
||||||
<option name="requiredFeatures" value="true" />
|
<option name="requiredFeatures" value="true" />
|
||||||
<option name="allFeatures" value="false" />
|
<option name="allFeatures" value="false" />
|
||||||
<option name="emulateTerminal" value="false" />
|
<option name="emulateTerminal" value="false" />
|
||||||
|
<option name="withSudo" value="false" />
|
||||||
<option name="backtrace" value="SHORT" />
|
<option name="backtrace" value="SHORT" />
|
||||||
<envs>
|
<envs>
|
||||||
<env name="RUST_LOG" value="debug" />
|
<env name="RUST_LOG" value="debug" />
|
||||||
|
|
|
@ -2,5 +2,6 @@
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
<mapping directory="$PROJECT_DIR$/src/walkdir" vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
|
@ -5,9 +5,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
|
|
||||||
|
## v0.3.7 - 2021-09-25
|
||||||
### Added
|
### Added
|
||||||
|
- `-j`/`--jobs` flag for specifying the number of threads fif should use for scanning files
|
||||||
- AIFF (Audio Interchange File Format, a PCM audio format like WAV) detection to [`infer`] backend
|
- AIFF (Audio Interchange File Format, a PCM audio format like WAV) detection to [`infer`] backend
|
||||||
- `--version` output now includes the (short) hash of the git commit fif was built from
|
- `--version` output now includes the (short) hash of the git commit fif was built from
|
||||||
|
### Changed
|
||||||
|
- fif will no longer use multithreading when scanning less than 32 files - the overhead of spawning threads isn't really
|
||||||
|
worth it
|
||||||
### Other
|
### Other
|
||||||
- Refactoring - split fif into `main.rs` and `lib.rs`, moved file-related functionality (directory scanning, etc.) into
|
- Refactoring - split fif into `main.rs` and `lib.rs`, moved file-related functionality (directory scanning, etc.) into
|
||||||
files module, removed string module, etc.
|
files module, removed string module, etc.
|
||||||
|
|
5
Cargo.lock
generated
5
Cargo.lock
generated
|
@ -1,5 +1,7 @@
|
||||||
# This file is automatically @generated by Cargo.
|
# This file is automatically @generated by Cargo.
|
||||||
# It is not intended for manual editing.
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "arrayvec"
|
name = "arrayvec"
|
||||||
version = "0.5.2"
|
version = "0.5.2"
|
||||||
|
@ -173,7 +175,7 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fif"
|
name = "fif"
|
||||||
version = "0.3.6"
|
version = "0.3.7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags",
|
"bitflags",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
|
@ -186,6 +188,7 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"mime",
|
"mime",
|
||||||
"new_mime_guess",
|
"new_mime_guess",
|
||||||
|
"num_cpus",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"rand",
|
"rand",
|
||||||
"rayon",
|
"rayon",
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
name = "fif"
|
name = "fif"
|
||||||
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
||||||
version = "0.3.6"
|
version = "0.3.7"
|
||||||
authors = ["Lynnesbian <lynne@bune.city>"]
|
authors = ["Lynnesbian <lynne@bune.city>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "GPL-3.0-or-later"
|
license = "GPL-3.0-or-later"
|
||||||
|
@ -17,7 +17,7 @@ maintenance = { status = "experimental" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["multi-threaded", "json"]
|
default = ["multi-threaded", "json"]
|
||||||
multi-threaded = ["rayon"]
|
multi-threaded = ["rayon", "num_cpus"]
|
||||||
infer-backend = ["infer"]
|
infer-backend = ["infer"]
|
||||||
xdg-mime-backend = ["xdg-mime"]
|
xdg-mime-backend = ["xdg-mime"]
|
||||||
json = ["serde", "serde_json"]
|
json = ["serde", "serde_json"]
|
||||||
|
@ -36,6 +36,7 @@ itertools = "0.10.0"
|
||||||
serde = { version = "1.0", features = ["derive"], optional = true }
|
serde = { version = "1.0", features = ["derive"], optional = true }
|
||||||
serde_json = { version = "1.0", optional = true }
|
serde_json = { version = "1.0", optional = true }
|
||||||
bitflags = "~1.2.1" # 1.3+ requires Rust >= 1.46
|
bitflags = "~1.2.1" # 1.3+ requires Rust >= 1.46
|
||||||
|
num_cpus = { version = "1.13.0", optional = true }
|
||||||
|
|
||||||
[target.'cfg(not(unix))'.dependencies]
|
[target.'cfg(not(unix))'.dependencies]
|
||||||
xdg-mime = { version = "0.3.3", optional = true }
|
xdg-mime = { version = "0.3.3", optional = true }
|
||||||
|
|
29
src/files.rs
29
src/files.rs
|
@ -136,25 +136,42 @@ pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, Sc
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector.
|
/// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector.
|
||||||
pub fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec<Result<Findings, ScanError>> {
|
pub fn scan_from_walkdir(
|
||||||
|
entries: &[DirEntry],
|
||||||
|
canonical_paths: bool,
|
||||||
|
use_threads: bool,
|
||||||
|
) -> Vec<Result<Findings, ScanError>> {
|
||||||
cfg_if! {
|
cfg_if! {
|
||||||
if #[cfg(feature = "multi-threaded")] {
|
if #[cfg(feature = "multi-threaded")] {
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
const CHUNKS: usize = 32;
|
||||||
|
|
||||||
|
if use_threads && entries.len() > CHUNKS {
|
||||||
// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
|
// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
|
||||||
entries
|
return entries
|
||||||
.par_chunks(32)
|
.par_chunks(CHUNKS)
|
||||||
.flat_map(|chunk| {
|
.flat_map_iter(|chunk| {
|
||||||
chunk
|
chunk
|
||||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||||
.map(|entry| scan_file(entry, canonical_paths))
|
.map(|entry| scan_file(entry, canonical_paths))
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here?
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
entries.iter().map(|entry: &DirEntry| scan_file(entry, canonical_paths)).collect()
|
// should always be false when multi-threading is disabled at compile time
|
||||||
|
assert!(!use_threads)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if we end up here, either
|
||||||
|
// - there were less than CHUNKS files to scan, or
|
||||||
|
// - the user specified that only one thread should be used, by specifying `-j 1`
|
||||||
|
// - fif was compiled without the `multi-threading` feature
|
||||||
|
entries
|
||||||
|
.iter()
|
||||||
|
.map(|entry: &DirEntry| scan_file(entry, canonical_paths))
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
||||||
|
|
|
@ -7,7 +7,7 @@ use std::os::unix::ffi::OsStrExt;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use cfg_if::cfg_if;
|
use cfg_if::cfg_if;
|
||||||
use itertools::{Either, Itertools};
|
use itertools::Itertools;
|
||||||
use snailquote::escape;
|
use snailquote::escape;
|
||||||
|
|
||||||
use crate::findings::ScanError;
|
use crate::findings::ScanError;
|
||||||
|
@ -338,6 +338,8 @@ pub struct Json;
|
||||||
#[cfg(feature = "json")]
|
#[cfg(feature = "json")]
|
||||||
impl Format for Json {
|
impl Format for Json {
|
||||||
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
|
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
|
||||||
|
use itertools::Either;
|
||||||
|
|
||||||
#[derive(serde::Serialize)]
|
#[derive(serde::Serialize)]
|
||||||
struct SerdeEntries<'a> {
|
struct SerdeEntries<'a> {
|
||||||
errors: &'a Vec<&'a ScanError<'a>>,
|
errors: &'a Vec<&'a ScanError<'a>>,
|
||||||
|
|
28
src/main.rs
28
src/main.rs
|
@ -20,6 +20,7 @@
|
||||||
use std::io::{stdout, BufWriter, Write};
|
use std::io::{stdout, BufWriter, Write};
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
|
|
||||||
|
use cfg_if::cfg_if;
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
use fif::files::{scan_directory, scan_from_walkdir};
|
use fif::files::{scan_directory, scan_from_walkdir};
|
||||||
use fif::formats::Format;
|
use fif::formats::Format;
|
||||||
|
@ -87,7 +88,28 @@ fn main() {
|
||||||
|
|
||||||
trace!("Found {} items to check", entries.len());
|
trace!("Found {} items to check", entries.len());
|
||||||
|
|
||||||
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths)
|
cfg_if! {
|
||||||
|
if #[cfg(feature = "multi-threaded")] {
|
||||||
|
let use_threads = args.jobs != 1;
|
||||||
|
|
||||||
|
if use_threads {
|
||||||
|
// 0 is a special case - it should be understood to mean "all available host CPUs"
|
||||||
|
let jobs = if args.jobs == 0 { num_cpus::get() } else { args.jobs };
|
||||||
|
|
||||||
|
// set up the global thread pool with the requested number of threads
|
||||||
|
rayon::ThreadPoolBuilder::new().num_threads(jobs).build_global().unwrap();
|
||||||
|
trace!("Multithreading enabled, using {} threads", jobs);
|
||||||
|
} else {
|
||||||
|
trace!("Multithreading disabled at runtime");
|
||||||
|
}
|
||||||
|
|
||||||
|
} else { // `multi-threading` feature disabled
|
||||||
|
let use_threads = false;
|
||||||
|
trace!("Multithreading disabled at compile time");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths, use_threads)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(
|
.filter(
|
||||||
|result| result.is_err() || !result.as_ref().unwrap().valid,
|
|result| result.is_err() || !result.as_ref().unwrap().valid,
|
||||||
|
@ -101,6 +123,9 @@ fn main() {
|
||||||
for result in &results {
|
for result in &results {
|
||||||
match result {
|
match result {
|
||||||
Ok(r) => {
|
Ok(r) => {
|
||||||
|
// check to see if debug logging is enabled before invoking debug! macro
|
||||||
|
// https://github.com/rust-lang/log/pull/394#issuecomment-630490343
|
||||||
|
if log::max_level() >= log::Level::Debug {
|
||||||
debug!(
|
debug!(
|
||||||
"{:?} is of type {}, should have extension \"{}\"",
|
"{:?} is of type {}, should have extension \"{}\"",
|
||||||
r.file,
|
r.file,
|
||||||
|
@ -108,6 +133,7 @@ fn main() {
|
||||||
r.recommended_extension().unwrap_or_else(|| "???".into())
|
r.recommended_extension().unwrap_or_else(|| "???".into())
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Err(f) => warn!("{}", f),
|
Err(f) => warn!("{}", f),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,6 +133,14 @@ pub struct Parameters {
|
||||||
/// For example, with this option, fif will not rename "image.unknown" to "image.jpg"
|
/// For example, with this option, fif will not rename "image.unknown" to "image.jpg"
|
||||||
#[clap(short = 'I', long)]
|
#[clap(short = 'I', long)]
|
||||||
pub ignore_unknown_exts: bool,
|
pub ignore_unknown_exts: bool,
|
||||||
|
|
||||||
|
#[cfg(feature = "multi-threaded")]
|
||||||
|
/// Number of jobs (threads) to use when scanning results.
|
||||||
|
/// The default behaviour is to use one thread per CPU thread. This behaviour can be manually requested by setting
|
||||||
|
/// `-j 0`. Using `-j 1` will disable multi-threading behaviour, as if you had compiled fif with the multi-threading
|
||||||
|
/// feature disabled. Setting more jobs than you have CPU threads is not recommended.
|
||||||
|
#[clap(short = 'j', long, default_value = "0")]
|
||||||
|
pub jobs: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lowercase_exts(exts: &str) -> Result<(), String> {
|
fn lowercase_exts(exts: &str) -> Result<(), String> {
|
||||||
|
|
|
@ -114,8 +114,10 @@ fn simple_directory() {
|
||||||
// there should be one file missing: "ignore.fake_ext"
|
// there should be one file missing: "ignore.fake_ext"
|
||||||
assert_eq!(entries.len(), files.len() - 1);
|
assert_eq!(entries.len(), files.len() - 1);
|
||||||
|
|
||||||
let results = scan_from_walkdir(&entries, false);
|
let use_threads = cfg!(feature = "multi-threaded");
|
||||||
let canonical_results = scan_from_walkdir(&entries, true);
|
|
||||||
|
let results = scan_from_walkdir(&entries, false, use_threads);
|
||||||
|
let canonical_results = scan_from_walkdir(&entries, true, use_threads);
|
||||||
assert_eq!(results.len(), canonical_results.len());
|
assert_eq!(results.len(), canonical_results.len());
|
||||||
|
|
||||||
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
|
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
|
||||||
|
@ -291,6 +293,8 @@ fn rejects_bad_args() {
|
||||||
vec!["fif", "-X", "pebis"],
|
vec!["fif", "-X", "pebis"],
|
||||||
// `-e` with nothing but commas:
|
// `-e` with nothing but commas:
|
||||||
vec!["fif", "-e", ",,,,,"],
|
vec!["fif", "-e", ",,,,,"],
|
||||||
|
// `-j` with a negative value:
|
||||||
|
vec!["fif", "-j", "-1"],
|
||||||
];
|
];
|
||||||
|
|
||||||
for test in &tests {
|
for test in &tests {
|
||||||
|
|
Loading…
Reference in a new issue