refactoring, initial work on --fix feature

This commit is contained in:
Lynne Megido 2021-10-04 20:22:15 +10:00
parent 451ea3d5d9
commit c4fabbc0f4
Signed by: lynnesbian
GPG Key ID: F0A184B5213D9F90
6 changed files with 98 additions and 109 deletions

2
Cargo.lock generated
View File

@ -1,7 +1,5 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "arrayvec"
version = "0.5.2"

View File

@ -8,6 +8,7 @@ use std::str::FromStr;
use std::sync::RwLock;
use cfg_if::cfg_if;
use itertools::{Either, Itertools};
use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
@ -140,7 +141,7 @@ pub fn scan_from_walkdir(
entries: &[DirEntry],
canonical_paths: bool,
use_threads: bool,
) -> Vec<Result<Findings, ScanError>> {
) -> (Vec<Findings>, Vec<ScanError>) {
cfg_if! {
if #[cfg(feature = "multi-threaded")] {
use rayon::prelude::*;
@ -150,12 +151,16 @@ pub fn scan_from_walkdir(
// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
return entries
.par_chunks(CHUNKS)
.flat_map_iter(|chunk| {
.map(|chunk| {
chunk
.iter() // iter over the chunk, which is a slice of DirEntry structs
.map(|entry| scan_file(entry, canonical_paths))
.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here?
.partition_map::<Vec<_>, Vec<_>, _, _, _>(|entry| match scan_file(entry, canonical_paths) {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e)
}
)
})
.flatten()
.collect()
}
} else {
@ -170,8 +175,10 @@ pub fn scan_from_walkdir(
// - fif was compiled without the `multi-threading` feature
entries
.iter()
.map(|entry: &DirEntry| scan_file(entry, canonical_paths))
.collect()
.partition_map(|entry: &DirEntry| match scan_file(entry, canonical_paths) {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
})
}
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of

View File

@ -7,7 +7,6 @@ use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use cfg_if::cfg_if;
use itertools::{Either, Itertools};
use snailquote::escape;
use crate::findings::ScanError;
@ -51,24 +50,6 @@ macro_rules! writablesln {
};
}
#[doc(hidden)]
type Entries<'a> = [Result<Findings, ScanError<'a>>];
/// Splits the given [`Entries`] into [`Vec`]s of [`Findings`] and [`ScanError`]s. [`Findings`] are sorted by whether
/// or not they have a known extension (unknown extensions coming first), and then by their filenames. [`ScanError`]s
/// are sorted such that [`ScanError::File`]s come before [`ScanError::Mime`]s.
#[inline]
fn sort_entries<'a>(entries: &'a Entries) -> (Vec<&'a Findings>, Vec<&'a ScanError<'a>>) {
let (mut findings, mut errors): (Vec<_>, Vec<_>) = entries.iter().partition_map(|entry| match entry {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
});
findings.sort_unstable();
errors.sort_unstable();
(findings, errors)
}
#[derive(Debug, PartialEq)]
pub enum Writable<'a> {
String(&'a str),
@ -141,12 +122,10 @@ pub trait FormatSteps {
fn no_known_extension<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn unreadable<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn unknown_type<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn header<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
fn footer<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
fn write_steps<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
self.header(f, entries)?;
let (findings, errors) = sort_entries(entries);
fn header<W: Write>(&self, _f: &mut W) -> io::Result<()>;
fn footer<W: Write>(&self, _f: &mut W) -> io::Result<()>;
fn write_steps<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.header(f)?;
for error in errors {
match error {
@ -157,8 +136,7 @@ pub trait FormatSteps {
}
}
if findings.len() != entries.len() {
// if these lengths aren't the same, there was at least one error
if !errors.is_empty() {
// add a blank line between the errors and commands
smart_write(f, writables![Newline])?;
}
@ -171,19 +149,21 @@ pub trait FormatSteps {
}
}
self.footer(f, entries)
self.footer(f)
}
}
pub trait Format {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()>;
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()>;
}
/// Bourne-Shell compatible script.
pub struct Shell;
impl Format for Shell {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
}
impl FormatSteps for Shell {
@ -213,7 +193,7 @@ impl FormatSteps for Shell {
smart_write(f, writablesln!["# Failed to detect mime type for ", path])
}
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(
f,
writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())],
@ -226,9 +206,7 @@ impl FormatSteps for Shell {
smart_write(f, writablesln![Newline, "set -e", Newline])
}
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
smart_write(f, writablesln![Newline, "echo 'Done.'"])
}
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![Newline, "echo 'Done.'"]) }
}
// PowerShell is a noun, not a type
@ -237,7 +215,9 @@ impl FormatSteps for Shell {
pub struct PowerShell;
impl Format for PowerShell {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
}
impl FormatSteps for PowerShell {
@ -281,7 +261,7 @@ impl FormatSteps for PowerShell {
smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"])
}
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(
f,
writablesln!["#!/usr/bin/env pwsh", Newline, "<# ", (generated_by().as_str()), " #>"],
@ -294,14 +274,16 @@ impl FormatSteps for PowerShell {
smart_write(f, writables![Newline])
}
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![Newline, "Write-Output 'Done!'"])
}
}
pub struct Text;
impl Format for Text {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
}
impl FormatSteps for Text {
@ -321,14 +303,15 @@ impl FormatSteps for Text {
smart_write(f, writablesln!["Couldn't determine type for ", path])
}
fn header<W: Write>(&self, f: &mut W, _entries: &Entries) -> io::Result<()> {
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![(generated_by().as_str()), Newline])
}
fn footer<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(
f,
writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
// writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
writablesln![Newline, "Done."],
)
}
}
@ -338,15 +321,13 @@ pub struct Json;
#[cfg(feature = "json")]
impl Format for Json {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
#[derive(serde::Serialize)]
struct SerdeEntries<'a> {
errors: &'a Vec<&'a ScanError<'a>>,
findings: &'a Vec<&'a Findings>,
errors: &'a [ScanError<'a>],
findings: &'a [Findings],
}
let (findings, errors) = &sort_entries(entries);
let result = serde_json::to_writer_pretty(f, &SerdeEntries { errors, findings });
if let Err(err) = result {

View File

@ -27,6 +27,7 @@ use fif::formats::Format;
use fif::parameters::OutputFormat;
use fif::utils::{os_name, CLAP_LONG_VERSION};
use fif::{formats, parameters};
use itertools::Itertools;
use log::{debug, error, info, trace, warn, Level};
#[cfg(test)]
@ -109,48 +110,42 @@ fn main() {
}
}
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths, use_threads)
.into_iter()
.filter(
|result| result.is_err() || !result.as_ref().unwrap().valid,
// TODO: find a way to trace! the valid files without doing ↓
// || if result.as_ref().unwrap().valid { trace!("{:?} ok", result.as_ref().unwrap().file); false } else { true }
)
.collect();
let (findings, errors) = scan_from_walkdir(&entries, args.canonical_paths, use_threads);
trace!("Scanning complete");
for result in &results {
match result {
Ok(r) => {
// check to see if debug logging is enabled before invoking debug! macro
// https://github.com/rust-lang/log/pull/394#issuecomment-630490343
if log::max_level() >= log::Level::Debug {
debug!(
"{:?} is of type {}, should have extension \"{}\"",
r.file,
r.mime,
r.recommended_extension().unwrap_or_else(|| "???".into())
);
}
}
Err(f) => warn!("{}", f),
}
}
if results.is_empty() {
if findings.is_empty() && errors.is_empty() {
info!("All files have valid extensions!");
exit(exitcode::OK);
}
// remove files that already have the correct extension, then sort - first by whether or not they have a
// recommended_extension() (with None before Some(ext)), then by filename
let findings = findings
.into_iter()
.filter(|f| !f.valid)
.sorted_unstable()
.collect_vec();
// sort errors (File errors before Mime errors), then log a warning for each error
let errors = errors
.into_iter()
.sorted_unstable()
.map(|e| {
warn!("{}", &e);
e
})
.collect_vec();
if args.fix {
} else {
let mut buffered_stdout = BufWriter::new(stdout());
let result = match args.output_format {
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &results),
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &results),
// i want to simplify this to something like formats::write_all(args.output_format, ...)
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors),
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors),
#[cfg(feature = "json")]
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &results),
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &results),
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors),
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors),
};
if result.is_err() {
@ -162,6 +157,7 @@ fn main() {
error!("Failed to flush stdout.");
exit(exitcode::IOERR);
}
}
debug!("Done");
}

View File

@ -48,6 +48,13 @@ pub enum OutputFormat {
max_term_width = 120
)]
pub struct Parameters {
/// Automatically rename files to use the correct extension.
#[clap(long)]
pub fix: bool,
#[clap(long)]
pub noconfirm: bool,
// NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name -
// `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash,
// anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen

View File

@ -116,14 +116,14 @@ fn simple_directory() {
let use_threads = cfg!(feature = "multi-threaded");
let results = scan_from_walkdir(&entries, false, use_threads);
let canonical_results = scan_from_walkdir(&entries, true, use_threads);
let results = scan_from_walkdir(&entries, false, use_threads).0;
let canonical_results = scan_from_walkdir(&entries, true, use_threads).0;
assert_eq!(results.len(), canonical_results.len());
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
let result = result.as_ref().expect("Error while scanning file");
let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
// let result = result.as_ref().expect("Error while scanning file");
// let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
// paths should be canonical
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
@ -331,19 +331,19 @@ fn outputs_move_commands() {
use std::io::Read;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
})];
}];
for format in &["Shell", "PowerShell"] {
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
match *format {
"Shell" => Shell.write_all(&mut cursor, &entries),
"PowerShell" => PowerShell.write_all(&mut cursor, &entries),
"Shell" => Shell.write_all(&mut cursor, &findings, &[]),
"PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]),
_ => unreachable!(),
}
.expect("Failed to write to cursor");
@ -371,17 +371,17 @@ fn test_json() {
use crate::formats::Json;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
})];
}];
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
Json
.write_all(&mut cursor, &entries)
.write_all(&mut cursor, &findings, &[])
.expect("Failed to write to cursor");
cursor.set_position(0);