refactoring, initial work on --fix feature

This commit is contained in:
Lynne Megido 2021-10-04 20:22:15 +10:00
parent 451ea3d5d9
commit c4fabbc0f4
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
6 changed files with 98 additions and 109 deletions

2
Cargo.lock generated
View file

@ -1,7 +1,5 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.5.2" version = "0.5.2"

View file

@ -8,6 +8,7 @@ use std::str::FromStr;
use std::sync::RwLock; use std::sync::RwLock;
use cfg_if::cfg_if; use cfg_if::cfg_if;
use itertools::{Either, Itertools};
use log::{debug, error}; use log::{debug, error};
use mime::Mime; use mime::Mime;
use mime_guess::from_ext; use mime_guess::from_ext;
@ -140,7 +141,7 @@ pub fn scan_from_walkdir(
entries: &[DirEntry], entries: &[DirEntry],
canonical_paths: bool, canonical_paths: bool,
use_threads: bool, use_threads: bool,
) -> Vec<Result<Findings, ScanError>> { ) -> (Vec<Findings>, Vec<ScanError>) {
cfg_if! { cfg_if! {
if #[cfg(feature = "multi-threaded")] { if #[cfg(feature = "multi-threaded")] {
use rayon::prelude::*; use rayon::prelude::*;
@ -150,13 +151,17 @@ pub fn scan_from_walkdir(
// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread // split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
return entries return entries
.par_chunks(CHUNKS) .par_chunks(CHUNKS)
.flat_map_iter(|chunk| { .map(|chunk| {
chunk chunk
.iter() // iter over the chunk, which is a slice of DirEntry structs .iter() // iter over the chunk, which is a slice of DirEntry structs
.map(|entry| scan_file(entry, canonical_paths)) .partition_map::<Vec<_>, Vec<_>, _, _, _>(|entry| match scan_file(entry, canonical_paths) {
.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here? Ok(f) => Either::Left(f),
Err(e) => Either::Right(e)
}
)
}) })
.collect() .flatten()
.collect()
} }
} else { } else {
// should always be false when multi-threading is disabled at compile time // should always be false when multi-threading is disabled at compile time
@ -170,8 +175,10 @@ pub fn scan_from_walkdir(
// - fif was compiled without the `multi-threading` feature // - fif was compiled without the `multi-threading` feature
entries entries
.iter() .iter()
.map(|entry: &DirEntry| scan_file(entry, canonical_paths)) .partition_map(|entry: &DirEntry| match scan_file(entry, canonical_paths) {
.collect() Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
})
} }
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of

View file

@ -7,7 +7,6 @@ use std::os::unix::ffi::OsStrExt;
use std::path::Path; use std::path::Path;
use cfg_if::cfg_if; use cfg_if::cfg_if;
use itertools::{Either, Itertools};
use snailquote::escape; use snailquote::escape;
use crate::findings::ScanError; use crate::findings::ScanError;
@ -51,24 +50,6 @@ macro_rules! writablesln {
}; };
} }
#[doc(hidden)]
type Entries<'a> = [Result<Findings, ScanError<'a>>];
/// Splits the given [`Entries`] into [`Vec`]s of [`Findings`] and [`ScanError`]s. [`Findings`] are sorted by whether
/// or not they have a known extension (unknown extensions coming first), and then by their filenames. [`ScanError`]s
/// are sorted such that [`ScanError::File`]s come before [`ScanError::Mime`]s.
#[inline]
fn sort_entries<'a>(entries: &'a Entries) -> (Vec<&'a Findings>, Vec<&'a ScanError<'a>>) {
let (mut findings, mut errors): (Vec<_>, Vec<_>) = entries.iter().partition_map(|entry| match entry {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
});
findings.sort_unstable();
errors.sort_unstable();
(findings, errors)
}
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Writable<'a> { pub enum Writable<'a> {
String(&'a str), String(&'a str),
@ -141,12 +122,10 @@ pub trait FormatSteps {
fn no_known_extension<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>; fn no_known_extension<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn unreadable<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>; fn unreadable<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn unknown_type<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>; fn unknown_type<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn header<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>; fn header<W: Write>(&self, _f: &mut W) -> io::Result<()>;
fn footer<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>; fn footer<W: Write>(&self, _f: &mut W) -> io::Result<()>;
fn write_steps<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { fn write_steps<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.header(f, entries)?; self.header(f)?;
let (findings, errors) = sort_entries(entries);
for error in errors { for error in errors {
match error { match error {
@ -157,8 +136,7 @@ pub trait FormatSteps {
} }
} }
if findings.len() != entries.len() { if !errors.is_empty() {
// if these lengths aren't the same, there was at least one error
// add a blank line between the errors and commands // add a blank line between the errors and commands
smart_write(f, writables![Newline])?; smart_write(f, writables![Newline])?;
} }
@ -171,19 +149,21 @@ pub trait FormatSteps {
} }
} }
self.footer(f, entries) self.footer(f)
} }
} }
pub trait Format { pub trait Format {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()>; fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()>;
} }
/// Bourne-Shell compatible script. /// Bourne-Shell compatible script.
pub struct Shell; pub struct Shell;
impl Format for Shell { impl Format for Shell {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) } fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
} }
impl FormatSteps for Shell { impl FormatSteps for Shell {
@ -213,7 +193,7 @@ impl FormatSteps for Shell {
smart_write(f, writablesln!["# Failed to detect mime type for ", path]) smart_write(f, writablesln!["# Failed to detect mime type for ", path])
} }
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> { fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write( smart_write(
f, f,
writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())], writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())],
@ -226,9 +206,7 @@ impl FormatSteps for Shell {
smart_write(f, writablesln![Newline, "set -e", Newline]) smart_write(f, writablesln![Newline, "set -e", Newline])
} }
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> { fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![Newline, "echo 'Done.'"]) }
smart_write(f, writablesln![Newline, "echo 'Done.'"])
}
} }
// PowerShell is a noun, not a type // PowerShell is a noun, not a type
@ -237,7 +215,9 @@ impl FormatSteps for Shell {
pub struct PowerShell; pub struct PowerShell;
impl Format for PowerShell { impl Format for PowerShell {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) } fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
} }
impl FormatSteps for PowerShell { impl FormatSteps for PowerShell {
@ -281,7 +261,7 @@ impl FormatSteps for PowerShell {
smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"]) smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"])
} }
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> { fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write( smart_write(
f, f,
writablesln!["#!/usr/bin/env pwsh", Newline, "<# ", (generated_by().as_str()), " #>"], writablesln!["#!/usr/bin/env pwsh", Newline, "<# ", (generated_by().as_str()), " #>"],
@ -294,14 +274,16 @@ impl FormatSteps for PowerShell {
smart_write(f, writables![Newline]) smart_write(f, writables![Newline])
} }
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> { fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![Newline, "Write-Output 'Done!'"]) smart_write(f, writablesln![Newline, "Write-Output 'Done!'"])
} }
} }
pub struct Text; pub struct Text;
impl Format for Text { impl Format for Text {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) } fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
} }
impl FormatSteps for Text { impl FormatSteps for Text {
@ -321,14 +303,15 @@ impl FormatSteps for Text {
smart_write(f, writablesln!["Couldn't determine type for ", path]) smart_write(f, writablesln!["Couldn't determine type for ", path])
} }
fn header<W: Write>(&self, f: &mut W, _entries: &Entries) -> io::Result<()> { fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![(generated_by().as_str()), Newline]) smart_write(f, writablesln![(generated_by().as_str()), Newline])
} }
fn footer<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write( smart_write(
f, f,
writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"], // writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
writablesln![Newline, "Done."],
) )
} }
} }
@ -338,15 +321,13 @@ pub struct Json;
#[cfg(feature = "json")] #[cfg(feature = "json")]
impl Format for Json { impl Format for Json {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
#[derive(serde::Serialize)] #[derive(serde::Serialize)]
struct SerdeEntries<'a> { struct SerdeEntries<'a> {
errors: &'a Vec<&'a ScanError<'a>>, errors: &'a [ScanError<'a>],
findings: &'a Vec<&'a Findings>, findings: &'a [Findings],
} }
let (findings, errors) = &sort_entries(entries);
let result = serde_json::to_writer_pretty(f, &SerdeEntries { errors, findings }); let result = serde_json::to_writer_pretty(f, &SerdeEntries { errors, findings });
if let Err(err) = result { if let Err(err) = result {

View file

@ -27,6 +27,7 @@ use fif::formats::Format;
use fif::parameters::OutputFormat; use fif::parameters::OutputFormat;
use fif::utils::{os_name, CLAP_LONG_VERSION}; use fif::utils::{os_name, CLAP_LONG_VERSION};
use fif::{formats, parameters}; use fif::{formats, parameters};
use itertools::Itertools;
use log::{debug, error, info, trace, warn, Level}; use log::{debug, error, info, trace, warn, Level};
#[cfg(test)] #[cfg(test)]
@ -109,58 +110,53 @@ fn main() {
} }
} }
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths, use_threads) let (findings, errors) = scan_from_walkdir(&entries, args.canonical_paths, use_threads);
.into_iter()
.filter(
|result| result.is_err() || !result.as_ref().unwrap().valid,
// TODO: find a way to trace! the valid files without doing ↓
// || if result.as_ref().unwrap().valid { trace!("{:?} ok", result.as_ref().unwrap().file); false } else { true }
)
.collect();
trace!("Scanning complete"); trace!("Scanning complete");
for result in &results { if findings.is_empty() && errors.is_empty() {
match result {
Ok(r) => {
// check to see if debug logging is enabled before invoking debug! macro
// https://github.com/rust-lang/log/pull/394#issuecomment-630490343
if log::max_level() >= log::Level::Debug {
debug!(
"{:?} is of type {}, should have extension \"{}\"",
r.file,
r.mime,
r.recommended_extension().unwrap_or_else(|| "???".into())
);
}
}
Err(f) => warn!("{}", f),
}
}
if results.is_empty() {
info!("All files have valid extensions!"); info!("All files have valid extensions!");
exit(exitcode::OK); exit(exitcode::OK);
} }
let mut buffered_stdout = BufWriter::new(stdout()); // remove files that already have the correct extension, then sort - first by whether or not they have a
// recommended_extension() (with None before Some(ext)), then by filename
let findings = findings
.into_iter()
.filter(|f| !f.valid)
.sorted_unstable()
.collect_vec();
// sort errors (File errors before Mime errors), then log a warning for each error
let errors = errors
.into_iter()
.sorted_unstable()
.map(|e| {
warn!("{}", &e);
e
})
.collect_vec();
let result = match args.output_format { if args.fix {
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &results), } else {
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &results), let mut buffered_stdout = BufWriter::new(stdout());
#[cfg(feature = "json")]
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &results),
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &results),
};
if result.is_err() { let result = match args.output_format {
error!("Failed to write to stdout."); // i want to simplify this to something like formats::write_all(args.output_format, ...)
exit(exitcode::IOERR); OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors),
} OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors),
#[cfg(feature = "json")]
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors),
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors),
};
if buffered_stdout.flush().is_err() { if result.is_err() {
error!("Failed to flush stdout."); error!("Failed to write to stdout.");
exit(exitcode::IOERR); exit(exitcode::IOERR);
}
if buffered_stdout.flush().is_err() {
error!("Failed to flush stdout.");
exit(exitcode::IOERR);
}
} }
debug!("Done"); debug!("Done");

View file

@ -48,6 +48,13 @@ pub enum OutputFormat {
max_term_width = 120 max_term_width = 120
)] )]
pub struct Parameters { pub struct Parameters {
/// Automatically rename files to use the correct extension.
#[clap(long)]
pub fix: bool,
#[clap(long)]
pub noconfirm: bool,
// NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name - // NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name -
// `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash, // `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash,
// anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen // anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen

View file

@ -116,14 +116,14 @@ fn simple_directory() {
let use_threads = cfg!(feature = "multi-threaded"); let use_threads = cfg!(feature = "multi-threaded");
let results = scan_from_walkdir(&entries, false, use_threads); let results = scan_from_walkdir(&entries, false, use_threads).0;
let canonical_results = scan_from_walkdir(&entries, true, use_threads); let canonical_results = scan_from_walkdir(&entries, true, use_threads).0;
assert_eq!(results.len(), canonical_results.len()); assert_eq!(results.len(), canonical_results.len());
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) { for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test. // there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
let result = result.as_ref().expect("Error while scanning file"); // let result = result.as_ref().expect("Error while scanning file");
let canonical_result = canonical_result.as_ref().expect("Error while scanning file"); // let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
// paths should be canonical // paths should be canonical
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file); assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
@ -331,19 +331,19 @@ fn outputs_move_commands() {
use std::io::Read; use std::io::Read;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings { let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(), file: Path::new("misnamed_file.png").to_path_buf(),
valid: false, valid: false,
mime: IMAGE_JPEG, mime: IMAGE_JPEG,
})]; }];
for format in &["Shell", "PowerShell"] { for format in &["Shell", "PowerShell"] {
let mut cursor = std::io::Cursor::new(Vec::new()); let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new(); let mut contents = std::string::String::new();
match *format { match *format {
"Shell" => Shell.write_all(&mut cursor, &entries), "Shell" => Shell.write_all(&mut cursor, &findings, &[]),
"PowerShell" => PowerShell.write_all(&mut cursor, &entries), "PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]),
_ => unreachable!(), _ => unreachable!(),
} }
.expect("Failed to write to cursor"); .expect("Failed to write to cursor");
@ -371,17 +371,17 @@ fn test_json() {
use crate::formats::Json; use crate::formats::Json;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings { let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(), file: Path::new("misnamed_file.png").to_path_buf(),
valid: false, valid: false,
mime: IMAGE_JPEG, mime: IMAGE_JPEG,
})]; }];
let mut cursor = std::io::Cursor::new(Vec::new()); let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new(); let mut contents = std::string::String::new();
Json Json
.write_all(&mut cursor, &entries) .write_all(&mut cursor, &findings, &[])
.expect("Failed to write to cursor"); .expect("Failed to write to cursor");
cursor.set_position(0); cursor.set_position(0);