From c4fabbc0f4e4c0315d27fbfbfcab67fb06b9f636 Mon Sep 17 00:00:00 2001 From: Lynnesbian Date: Mon, 4 Oct 2021 20:22:15 +1000 Subject: [PATCH] refactoring, initial work on --fix feature --- Cargo.lock | 2 -- src/files.rs | 21 ++++++++---- src/formats.rs | 73 ++++++++++++++++------------------------- src/main.rs | 82 ++++++++++++++++++++++------------------------- src/parameters.rs | 7 ++++ src/tests/mod.rs | 22 ++++++------- 6 files changed, 98 insertions(+), 109 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 032a9bf..44822b6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,7 +1,5 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 - [[package]] name = "arrayvec" version = "0.5.2" diff --git a/src/files.rs b/src/files.rs index b41e897..569e049 100644 --- a/src/files.rs +++ b/src/files.rs @@ -8,6 +8,7 @@ use std::str::FromStr; use std::sync::RwLock; use cfg_if::cfg_if; +use itertools::{Either, Itertools}; use log::{debug, error}; use mime::Mime; use mime_guess::from_ext; @@ -140,7 +141,7 @@ pub fn scan_from_walkdir( entries: &[DirEntry], canonical_paths: bool, use_threads: bool, -) -> Vec> { +) -> (Vec, Vec) { cfg_if! { if #[cfg(feature = "multi-threaded")] { use rayon::prelude::*; @@ -150,13 +151,17 @@ pub fn scan_from_walkdir( // split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread return entries .par_chunks(CHUNKS) - .flat_map_iter(|chunk| { + .map(|chunk| { chunk .iter() // iter over the chunk, which is a slice of DirEntry structs - .map(|entry| scan_file(entry, canonical_paths)) - .collect::>() // TODO: is there a way to avoid having to collect here? + .partition_map::, Vec<_>, _, _, _>(|entry| match scan_file(entry, canonical_paths) { + Ok(f) => Either::Left(f), + Err(e) => Either::Right(e) + } + ) }) - .collect() + .flatten() + .collect() } } else { // should always be false when multi-threading is disabled at compile time @@ -170,8 +175,10 @@ pub fn scan_from_walkdir( // - fif was compiled without the `multi-threading` feature entries .iter() - .map(|entry: &DirEntry| scan_file(entry, canonical_paths)) - .collect() + .partition_map(|entry: &DirEntry| match scan_file(entry, canonical_paths) { + Ok(f) => Either::Left(f), + Err(e) => Either::Right(e), + }) } /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of diff --git a/src/formats.rs b/src/formats.rs index b50028f..a992ed0 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -7,7 +7,6 @@ use std::os::unix::ffi::OsStrExt; use std::path::Path; use cfg_if::cfg_if; -use itertools::{Either, Itertools}; use snailquote::escape; use crate::findings::ScanError; @@ -51,24 +50,6 @@ macro_rules! writablesln { }; } -#[doc(hidden)] -type Entries<'a> = [Result>]; - -/// Splits the given [`Entries`] into [`Vec`]s of [`Findings`] and [`ScanError`]s. [`Findings`] are sorted by whether -/// or not they have a known extension (unknown extensions coming first), and then by their filenames. [`ScanError`]s -/// are sorted such that [`ScanError::File`]s come before [`ScanError::Mime`]s. -#[inline] -fn sort_entries<'a>(entries: &'a Entries) -> (Vec<&'a Findings>, Vec<&'a ScanError<'a>>) { - let (mut findings, mut errors): (Vec<_>, Vec<_>) = entries.iter().partition_map(|entry| match entry { - Ok(f) => Either::Left(f), - Err(e) => Either::Right(e), - }); - - findings.sort_unstable(); - errors.sort_unstable(); - (findings, errors) -} - #[derive(Debug, PartialEq)] pub enum Writable<'a> { String(&'a str), @@ -141,12 +122,10 @@ pub trait FormatSteps { fn no_known_extension(&self, _f: &mut W, _path: &Path) -> io::Result<()>; fn unreadable(&self, _f: &mut W, _path: &Path) -> io::Result<()>; fn unknown_type(&self, _f: &mut W, _path: &Path) -> io::Result<()>; - fn header(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>; - fn footer(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>; - fn write_steps(&self, f: &mut W, entries: &Entries) -> io::Result<()> { - self.header(f, entries)?; - - let (findings, errors) = sort_entries(entries); + fn header(&self, _f: &mut W) -> io::Result<()>; + fn footer(&self, _f: &mut W) -> io::Result<()>; + fn write_steps(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> { + self.header(f)?; for error in errors { match error { @@ -157,8 +136,7 @@ pub trait FormatSteps { } } - if findings.len() != entries.len() { - // if these lengths aren't the same, there was at least one error + if !errors.is_empty() { // add a blank line between the errors and commands smart_write(f, writables![Newline])?; } @@ -171,19 +149,21 @@ pub trait FormatSteps { } } - self.footer(f, entries) + self.footer(f) } } pub trait Format { - fn write_all(&self, f: &mut W, entries: &Entries) -> io::Result<()>; + fn write_all(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()>; } /// Bourne-Shell compatible script. pub struct Shell; impl Format for Shell { - fn write_all(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) } + fn write_all(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> { + self.write_steps(f, findings, errors) + } } impl FormatSteps for Shell { @@ -213,7 +193,7 @@ impl FormatSteps for Shell { smart_write(f, writablesln!["# Failed to detect mime type for ", path]) } - fn header(&self, f: &mut W, _: &Entries) -> io::Result<()> { + fn header(&self, f: &mut W) -> io::Result<()> { smart_write( f, writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())], @@ -226,9 +206,7 @@ impl FormatSteps for Shell { smart_write(f, writablesln![Newline, "set -e", Newline]) } - fn footer(&self, f: &mut W, _: &Entries) -> io::Result<()> { - smart_write(f, writablesln![Newline, "echo 'Done.'"]) - } + fn footer(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![Newline, "echo 'Done.'"]) } } // PowerShell is a noun, not a type @@ -237,7 +215,9 @@ impl FormatSteps for Shell { pub struct PowerShell; impl Format for PowerShell { - fn write_all(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) } + fn write_all(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> { + self.write_steps(f, findings, errors) + } } impl FormatSteps for PowerShell { @@ -281,7 +261,7 @@ impl FormatSteps for PowerShell { smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"]) } - fn header(&self, f: &mut W, _: &Entries) -> io::Result<()> { + fn header(&self, f: &mut W) -> io::Result<()> { smart_write( f, writablesln!["#!/usr/bin/env pwsh", Newline, "<# ", (generated_by().as_str()), " #>"], @@ -294,14 +274,16 @@ impl FormatSteps for PowerShell { smart_write(f, writables![Newline]) } - fn footer(&self, f: &mut W, _: &Entries) -> io::Result<()> { + fn footer(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![Newline, "Write-Output 'Done!'"]) } } pub struct Text; impl Format for Text { - fn write_all(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) } + fn write_all(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> { + self.write_steps(f, findings, errors) + } } impl FormatSteps for Text { @@ -321,14 +303,15 @@ impl FormatSteps for Text { smart_write(f, writablesln!["Couldn't determine type for ", path]) } - fn header(&self, f: &mut W, _entries: &Entries) -> io::Result<()> { + fn header(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![(generated_by().as_str()), Newline]) } - fn footer(&self, f: &mut W, entries: &Entries) -> io::Result<()> { + fn footer(&self, f: &mut W) -> io::Result<()> { smart_write( f, - writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"], + // writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"], + writablesln![Newline, "Done."], ) } } @@ -338,15 +321,13 @@ pub struct Json; #[cfg(feature = "json")] impl Format for Json { - fn write_all(&self, f: &mut W, entries: &Entries) -> io::Result<()> { + fn write_all(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> { #[derive(serde::Serialize)] struct SerdeEntries<'a> { - errors: &'a Vec<&'a ScanError<'a>>, - findings: &'a Vec<&'a Findings>, + errors: &'a [ScanError<'a>], + findings: &'a [Findings], } - let (findings, errors) = &sort_entries(entries); - let result = serde_json::to_writer_pretty(f, &SerdeEntries { errors, findings }); if let Err(err) = result { diff --git a/src/main.rs b/src/main.rs index 5da5c3b..8da26da 100644 --- a/src/main.rs +++ b/src/main.rs @@ -27,6 +27,7 @@ use fif::formats::Format; use fif::parameters::OutputFormat; use fif::utils::{os_name, CLAP_LONG_VERSION}; use fif::{formats, parameters}; +use itertools::Itertools; use log::{debug, error, info, trace, warn, Level}; #[cfg(test)] @@ -109,58 +110,53 @@ fn main() { } } - let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths, use_threads) - .into_iter() - .filter( - |result| result.is_err() || !result.as_ref().unwrap().valid, - // TODO: find a way to trace! the valid files without doing ↓ - // || if result.as_ref().unwrap().valid { trace!("{:?} ok", result.as_ref().unwrap().file); false } else { true } - ) - .collect(); - + let (findings, errors) = scan_from_walkdir(&entries, args.canonical_paths, use_threads); trace!("Scanning complete"); - for result in &results { - match result { - Ok(r) => { - // check to see if debug logging is enabled before invoking debug! macro - // https://github.com/rust-lang/log/pull/394#issuecomment-630490343 - if log::max_level() >= log::Level::Debug { - debug!( - "{:?} is of type {}, should have extension \"{}\"", - r.file, - r.mime, - r.recommended_extension().unwrap_or_else(|| "???".into()) - ); - } - } - Err(f) => warn!("{}", f), - } - } - - if results.is_empty() { + if findings.is_empty() && errors.is_empty() { info!("All files have valid extensions!"); exit(exitcode::OK); } - let mut buffered_stdout = BufWriter::new(stdout()); + // remove files that already have the correct extension, then sort - first by whether or not they have a + // recommended_extension() (with None before Some(ext)), then by filename + let findings = findings + .into_iter() + .filter(|f| !f.valid) + .sorted_unstable() + .collect_vec(); + // sort errors (File errors before Mime errors), then log a warning for each error + let errors = errors + .into_iter() + .sorted_unstable() + .map(|e| { + warn!("{}", &e); + e + }) + .collect_vec(); - let result = match args.output_format { - OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &results), - OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &results), - #[cfg(feature = "json")] - OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &results), - OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &results), - }; + if args.fix { + } else { + let mut buffered_stdout = BufWriter::new(stdout()); - if result.is_err() { - error!("Failed to write to stdout."); - exit(exitcode::IOERR); - } + let result = match args.output_format { + // i want to simplify this to something like formats::write_all(args.output_format, ...) + OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors), + OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors), + #[cfg(feature = "json")] + OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors), + OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors), + }; - if buffered_stdout.flush().is_err() { - error!("Failed to flush stdout."); - exit(exitcode::IOERR); + if result.is_err() { + error!("Failed to write to stdout."); + exit(exitcode::IOERR); + } + + if buffered_stdout.flush().is_err() { + error!("Failed to flush stdout."); + exit(exitcode::IOERR); + } } debug!("Done"); diff --git a/src/parameters.rs b/src/parameters.rs index f9b96bc..9413c74 100644 --- a/src/parameters.rs +++ b/src/parameters.rs @@ -48,6 +48,13 @@ pub enum OutputFormat { max_term_width = 120 )] pub struct Parameters { + /// Automatically rename files to use the correct extension. + #[clap(long)] + pub fix: bool, + + #[clap(long)] + pub noconfirm: bool, + // NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name - // `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash, // anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen diff --git a/src/tests/mod.rs b/src/tests/mod.rs index ec9ad07..e8982d5 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -116,14 +116,14 @@ fn simple_directory() { let use_threads = cfg!(feature = "multi-threaded"); - let results = scan_from_walkdir(&entries, false, use_threads); - let canonical_results = scan_from_walkdir(&entries, true, use_threads); + let results = scan_from_walkdir(&entries, false, use_threads).0; + let canonical_results = scan_from_walkdir(&entries, true, use_threads).0; assert_eq!(results.len(), canonical_results.len()); for (result, canonical_result) in results.iter().zip(canonical_results.iter()) { // there should be no IO errors during this test. any IO errors encountered are outside the scope of this test. - let result = result.as_ref().expect("Error while scanning file"); - let canonical_result = canonical_result.as_ref().expect("Error while scanning file"); + // let result = result.as_ref().expect("Error while scanning file"); + // let canonical_result = canonical_result.as_ref().expect("Error while scanning file"); // paths should be canonical assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file); @@ -331,19 +331,19 @@ fn outputs_move_commands() { use std::io::Read; // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file - let entries = vec![Ok(Findings { + let findings = vec![Findings { file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, - })]; + }]; for format in &["Shell", "PowerShell"] { let mut cursor = std::io::Cursor::new(Vec::new()); let mut contents = std::string::String::new(); match *format { - "Shell" => Shell.write_all(&mut cursor, &entries), - "PowerShell" => PowerShell.write_all(&mut cursor, &entries), + "Shell" => Shell.write_all(&mut cursor, &findings, &[]), + "PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]), _ => unreachable!(), } .expect("Failed to write to cursor"); @@ -371,17 +371,17 @@ fn test_json() { use crate::formats::Json; // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file - let entries = vec![Ok(Findings { + let findings = vec![Findings { file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, - })]; + }]; let mut cursor = std::io::Cursor::new(Vec::new()); let mut contents = std::string::String::new(); Json - .write_all(&mut cursor, &entries) + .write_all(&mut cursor, &findings, &[]) .expect("Failed to write to cursor"); cursor.set_position(0);