From 3b731a7c6161a3c70507029598d4809593f634c3 Mon Sep 17 00:00:00 2001 From: Lynne Date: Fri, 18 Jun 2021 15:17:30 +1000 Subject: [PATCH] added --canonical-paths flag Findings now takes a PathBuf instead of a reference to a Path but there's no noticeable performance change --- CHANGELOG.md | 5 +++++ Cargo.lock | 16 ++++++++-------- src/findings.rs | 10 +++++----- src/formats.rs | 8 ++++---- src/main.rs | 21 +++++++++++++++------ src/parameters.rs | 8 ++++++++ src/tests/mod.rs | 25 +++++++++++++++++-------- 7 files changed, 62 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2de9168..3d0e221 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ Dates are given in YYYY-MM-DD format. ## v0.3 +### v0.3.3 (2021-mm-dd) +#### Features +- Added `--canonical-paths` flag for outputting canonical paths in output - for example, + `mv /home/lynne/file.jpg /home/lynne/file.mp3` instead of the default `mv file.jpg file.mp3` + ### v0.3.2 (2021-06-14) #### Bugfixes - Fixed PowerShell output regression introduced in v0.2.13, whoops diff --git a/Cargo.lock b/Cargo.lock index 02011fb..b18b744 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -429,9 +429,9 @@ dependencies = [ [[package]] name = "rand" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" dependencies = [ "libc", "rand_chacha", @@ -451,18 +451,18 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ "getrandom", ] [[package]] name = "rand_hc" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" dependencies = [ "rand_core", ] @@ -494,9 +494,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc" +checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" dependencies = [ "bitflags", ] diff --git a/src/findings.rs b/src/findings.rs index 4c9d8f8..de10c9e 100644 --- a/src/findings.rs +++ b/src/findings.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use mime_guess::Mime; @@ -11,9 +11,9 @@ use std::fmt::{Display, Formatter}; /// Information about a scanned file. #[derive(Ord, PartialOrd, Eq, PartialEq)] -pub struct Findings<'a> { +pub struct Findings { /// The location of the scanned file. - pub file: &'a Path, + pub file: PathBuf, /// Whether or not the file's extension is valid for its mimetype. pub valid: bool, /// The file's mimetype. @@ -21,7 +21,7 @@ pub struct Findings<'a> { } #[cfg(feature = "json")] -impl<'a> serde::Serialize for Findings<'a> { +impl serde::Serialize for Findings { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -36,7 +36,7 @@ impl<'a> serde::Serialize for Findings<'a> { } } -impl<'a> Findings<'a> { +impl Findings { pub fn recommended_extension(&self) -> Option { mime_extension_lookup(self.mime.essence_str().into()).map(|extensions| extensions[0].clone()) } diff --git a/src/formats.rs b/src/formats.rs index 13429f4..4b41827 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -51,7 +51,7 @@ macro_rules! writablesln { } #[doc(hidden)] -type Entries<'a> = [Result, ScanError<'a>>]; +type Entries<'a> = [Result>]; #[derive(Debug, PartialEq)] pub enum Writable<'a> { @@ -169,9 +169,9 @@ pub trait Format { for finding in findings { if let Some(ext) = finding.recommended_extension() { - self.rename(f, finding.file, &finding.file.with_extension(ext.as_str()))?; + self.rename(f, finding.file.as_path(), &finding.file.with_extension(ext.as_str()))?; } else { - self.no_known_extension(f, finding.file)?; + self.no_known_extension(f, finding.file.as_path())?; } } @@ -322,7 +322,7 @@ impl Format for Json { #[derive(serde::Serialize)] struct SerdeEntries<'a> { errors: &'a Vec<&'a ScanError<'a>>, - findings: &'a Vec<&'a Findings<'a>>, + findings: &'a Vec<&'a Findings>, } let result = serde_json::to_writer_pretty( diff --git a/src/main.rs b/src/main.rs index 908a202..56f9d78 100644 --- a/src/main.rs +++ b/src/main.rs @@ -112,7 +112,7 @@ fn main() { trace!("Found {} items to check", entries.len()); - let results: Vec<_> = scan_from_walkdir(&entries) + let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths) .into_iter() .filter( |result| result.is_err() || !result.as_ref().unwrap().valid, @@ -236,7 +236,7 @@ fn extension_from_path(path: &Path) -> Option<&OsStr> { path.extension() } /// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a /// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be /// determined. -fn scan_file(entry: &DirEntry) -> Result { +fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result { // try to determine mimetype for this entry let result = inspectors::mime_type(MIMEDB.get().unwrap(), entry.path()); @@ -265,15 +265,24 @@ fn scan_file(entry: &DirEntry) -> Result { Some(_) | None => false, }; + let path = if canonical_paths { + match std::fs::canonicalize(entry.path()) { + Ok(path) => path, + Err(_) => return Err(ScanError::File(entry.path())) + } + } else { + entry.path().to_path_buf() // :c + }; + Ok(Findings { - file: entry.path(), + file: path, valid, mime: result, }) } /// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector. -fn scan_from_walkdir(entries: &[DirEntry]) -> Vec> { +fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec> { cfg_if! { if #[cfg(feature = "multi-threaded")] { use rayon::prelude::*; @@ -284,12 +293,12 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec> { .flat_map(|chunk| { chunk .iter() // iter over the chunk, which is a slice of DirEntry structs - .map(|entry| scan_file(entry)) + .map(|entry| scan_file(entry, canonical_paths)) .collect::>() }) .collect() } else { - entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect() + entries.iter().map(|entry: &DirEntry| scan_file(entry, canonical_paths)).collect() } } } diff --git a/src/parameters.rs b/src/parameters.rs index 929f59c..d4cb06c 100644 --- a/src/parameters.rs +++ b/src/parameters.rs @@ -33,6 +33,7 @@ pub enum OutputFormat { // TODO: convert this to macro style?: https://docs.rs/clap/3.0.0-beta.2/clap/index.html#using-macros #[derive(Clap, Debug)] +#[allow(clippy::struct_excessive_bools)] #[clap( version = clap_version(), long_version = clap_long_version(), @@ -116,6 +117,13 @@ pub struct Parameters { #[clap(short, long, parse(from_occurrences), group = "verbosity")] pub quiet: u8, + /// Use canonical (absolute) paths in output. + /// A canonical path is the "one true path" to a given file, and is always an absolute path. While a file may have + /// many absolute paths (for example, on Windows, '\\?\C:\file.txt' and 'C:\file.txt' are both absolute paths to the + /// same file), but only one canonical path. This does not effect logged output. + #[clap(long)] + pub canonical_paths: bool, + /// The directory to process. #[clap(name = "DIR", default_value = ".", parse(from_os_str))] pub dir: PathBuf, diff --git a/src/tests/mod.rs b/src/tests/mod.rs index cb0275d..895c044 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -87,8 +87,9 @@ fn recommend_ext() { fn simple_directory() { use crate::parameters::ScanOpts; use std::borrow::Borrow; - use std::fs::File; + use std::fs::{File, canonicalize}; use std::io::Write; + use std::env::set_current_dir; use tempfile::tempdir; // set of files to scan. all but the last files have magic numbers corresponding to their extension, except for @@ -102,6 +103,7 @@ fn simple_directory() { files.insert("wrong.jpg", PNG_BYTES); let dir = tempdir().expect("Failed to create temporary directory."); + set_current_dir(dir.path()).expect("Failed to change directory."); for (name, bytes) in &files { let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name)); @@ -125,15 +127,22 @@ fn simple_directory() { // initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present. crate::init_db(); - let results = scan_from_walkdir(&entries); - for result in results { + let results = scan_from_walkdir(&entries, false); + let canonical_results = scan_from_walkdir(&entries, true); + assert_eq!(results.len(), canonical_results.len()); + + for (result, canonical_result) in results.iter().zip(canonical_results.iter()) { // there should be no IO errors during this test. any IO errors encountered are outside the scope of this test. - let result = result.expect("Error while scanning file"); + let result = result.as_ref().expect("Error while scanning file"); + let canonical_result = canonical_result.as_ref().expect("Error while scanning file"); + + // paths should be canonical + assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file); if !result.valid { // the only invalid file detected should be "wrong.jpg", which is a misnamed png file // 1. ensure detected extension is "jpg" - assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg")); + assert_eq!(extension_from_path(result.file.as_path()).unwrap(), OsStr::new("jpg")); // 2. ensure detected mime type is IMAGE_PNG assert_eq!(result.mime, IMAGE_PNG); // 3. ensure the recommended extension for "wrong.jpg" is "png" @@ -151,7 +160,7 @@ fn simple_directory() { // make sure the guessed mimetype is correct based on the extension of the scanned file // because we already know that the extensions match the mimetype (as we created these files ourselves earlier in // the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc. - let ext = extension_from_path(result.file); + let ext = extension_from_path(result.file.as_path()); assert!(ext.is_some()); assert_eq!( result.mime, @@ -333,7 +342,7 @@ fn outputs_move_commands() { // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file let entries = vec![Ok(Findings { - file: Path::new("misnamed_file.png"), + file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, })]; @@ -372,7 +381,7 @@ fn test_json() { use std::io::Read; // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file let entries = vec![Ok(Findings { - file: Path::new("misnamed_file.png"), + file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, })];