2021-10-04 20:22:15 +10:00 · 2021-10-04 20:22:15 +10:00 · c4fabbc0f4
commit c4fabbc0f4
parent 451ea3d5d9
6 changed files with 98 additions and 109 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1,7 +1,5 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
-
 [[package]]
 name = "arrayvec"
 version = "0.5.2"
--- a/src/files.rs
+++ b/src/files.rs
@ -8,6 +8,7 @@ use std::str::FromStr;
 use std::sync::RwLock;

 use cfg_if::cfg_if;
+use itertools::{Either, Itertools};
 use log::{debug, error};
 use mime::Mime;
 use mime_guess::from_ext;
@ -140,7 +141,7 @@ pub fn scan_from_walkdir(
 	entries: &[DirEntry],
 	canonical_paths: bool,
 	use_threads: bool,
-) -> Vec<Result<Findings, ScanError>> {
+) -> (Vec<Findings>, Vec<ScanError>) {
 	cfg_if! {
 		if #[cfg(feature = "multi-threaded")] {
 			use rayon::prelude::*;
@ -150,13 +151,17 @@ pub fn scan_from_walkdir(
 				// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
 				return entries
 					.par_chunks(CHUNKS)
-					.flat_map_iter(|chunk| {
+					.map(|chunk| {
 						chunk
 							.iter() // iter over the chunk, which is a slice of DirEntry structs
-							.map(|entry| scan_file(entry, canonical_paths))
-							.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here?
+							.partition_map::<Vec<_>, Vec<_>, _, _, _>(|entry| match scan_file(entry, canonical_paths) {
+								Ok(f) => Either::Left(f),
+								Err(e) => Either::Right(e)
+							}
+						)
 					})
-					.collect()
+				.flatten()
+				.collect()
 			}
 		} else {
 			// should always be false when multi-threading is disabled at compile time
@ -170,8 +175,10 @@ pub fn scan_from_walkdir(
 	// - fif was compiled without the `multi-threading` feature
 	entries
 		.iter()
-		.map(|entry: &DirEntry| scan_file(entry, canonical_paths))
-		.collect()
+		.partition_map(|entry: &DirEntry| match scan_file(entry, canonical_paths) {
+			Ok(f) => Either::Left(f),
+			Err(e) => Either::Right(e),
+		})
 }

 /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
--- a/src/formats.rs
+++ b/src/formats.rs
@ -7,7 +7,6 @@ use std::os::unix::ffi::OsStrExt;
 use std::path::Path;

 use cfg_if::cfg_if;
-use itertools::{Either, Itertools};
 use snailquote::escape;

 use crate::findings::ScanError;
@ -51,24 +50,6 @@ macro_rules! writablesln {
 	};
 }

-#[doc(hidden)]
-type Entries<'a> = [Result<Findings, ScanError<'a>>];
-
-/// Splits the given [`Entries`] into [`Vec`]s of [`Findings`] and [`ScanError`]s. [`Findings`] are sorted by whether
-/// or not they have a known extension (unknown extensions coming first), and then by their filenames. [`ScanError`]s
-/// are sorted such that [`ScanError::File`]s come before [`ScanError::Mime`]s.
-#[inline]
-fn sort_entries<'a>(entries: &'a Entries) -> (Vec<&'a Findings>, Vec<&'a ScanError<'a>>) {
-	let (mut findings, mut errors): (Vec<_>, Vec<_>) = entries.iter().partition_map(|entry| match entry {
-		Ok(f) => Either::Left(f),
-		Err(e) => Either::Right(e),
-	});
-
-	findings.sort_unstable();
-	errors.sort_unstable();
-	(findings, errors)
-}
-
 #[derive(Debug, PartialEq)]
 pub enum Writable<'a> {
 	String(&'a str),
@ -141,12 +122,10 @@ pub trait FormatSteps {
 	fn no_known_extension<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
 	fn unreadable<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
 	fn unknown_type<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
-	fn header<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
-	fn footer<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
-	fn write_steps<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
-		self.header(f, entries)?;
-
-		let (findings, errors) = sort_entries(entries);
+	fn header<W: Write>(&self, _f: &mut W) -> io::Result<()>;
+	fn footer<W: Write>(&self, _f: &mut W) -> io::Result<()>;
+	fn write_steps<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
+		self.header(f)?;

 		for error in errors {
 			match error {
@ -157,8 +136,7 @@ pub trait FormatSteps {
 			}
 		}

-		if findings.len() != entries.len() {
-			// if these lengths aren't the same, there was at least one error
+		if !errors.is_empty() {
 			// add a blank line between the errors and commands
 			smart_write(f, writables![Newline])?;
 		}
@ -171,19 +149,21 @@ pub trait FormatSteps {
 			}
 		}

-		self.footer(f, entries)
+		self.footer(f)
 	}
 }

 pub trait Format {
-	fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()>;
+	fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()>;
 }

 /// Bourne-Shell compatible script.
 pub struct Shell;

 impl Format for Shell {
-	fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
+	fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
+		self.write_steps(f, findings, errors)
+	}
 }

 impl FormatSteps for Shell {
@ -213,7 +193,7 @@ impl FormatSteps for Shell {
 		smart_write(f, writablesln!["# Failed to detect mime type for ", path])
 	}

-	fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
+	fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
 		smart_write(
 			f,
 			writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())],
@ -226,9 +206,7 @@ impl FormatSteps for Shell {
 		smart_write(f, writablesln![Newline, "set -e", Newline])
 	}

-	fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
-		smart_write(f, writablesln![Newline, "echo 'Done.'"])
-	}
+	fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![Newline, "echo 'Done.'"]) }
 }

 // PowerShell is a noun, not a type
@ -237,7 +215,9 @@ impl FormatSteps for Shell {
 pub struct PowerShell;

 impl Format for PowerShell {
-	fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
+	fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
+		self.write_steps(f, findings, errors)
+	}
 }

 impl FormatSteps for PowerShell {
@ -281,7 +261,7 @@ impl FormatSteps for PowerShell {
 		smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"])
 	}

-	fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
+	fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
 		smart_write(
 			f,
 			writablesln!["#!/usr/bin/env pwsh", Newline, "<# ", (generated_by().as_str()), " #>"],
@ -294,14 +274,16 @@ impl FormatSteps for PowerShell {
 		smart_write(f, writables![Newline])
 	}

-	fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
+	fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
 		smart_write(f, writablesln![Newline, "Write-Output 'Done!'"])
 	}
 }

 pub struct Text;
 impl Format for Text {
-	fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
+	fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
+		self.write_steps(f, findings, errors)
+	}
 }

 impl FormatSteps for Text {
@ -321,14 +303,15 @@ impl FormatSteps for Text {
 		smart_write(f, writablesln!["Couldn't determine type for ", path])
 	}

-	fn header<W: Write>(&self, f: &mut W, _entries: &Entries) -> io::Result<()> {
+	fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
 		smart_write(f, writablesln![(generated_by().as_str()), Newline])
 	}

-	fn footer<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
+	fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
 		smart_write(
 			f,
-			writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
+			// writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
+			writablesln![Newline, "Done."],
 		)
 	}
 }
@ -338,15 +321,13 @@ pub struct Json;

 #[cfg(feature = "json")]
 impl Format for Json {
-	fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
+	fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
 		#[derive(serde::Serialize)]
 		struct SerdeEntries<'a> {
-			errors: &'a Vec<&'a ScanError<'a>>,
-			findings: &'a Vec<&'a Findings>,
+			errors: &'a [ScanError<'a>],
+			findings: &'a [Findings],
 		}

-		let (findings, errors) = &sort_entries(entries);
-
 		let result = serde_json::to_writer_pretty(f, &SerdeEntries { errors, findings });

 		if let Err(err) = result {
--- a/src/main.rs
+++ b/src/main.rs
@ -27,6 +27,7 @@ use fif::formats::Format;
 use fif::parameters::OutputFormat;
 use fif::utils::{os_name, CLAP_LONG_VERSION};
 use fif::{formats, parameters};
+use itertools::Itertools;
 use log::{debug, error, info, trace, warn, Level};

 #[cfg(test)]
@ -109,58 +110,53 @@ fn main() {
 		}
 	}

-	let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths, use_threads)
-		.into_iter()
-		.filter(
-			|result| result.is_err() || !result.as_ref().unwrap().valid,
-			// TODO: find a way to trace! the valid files without doing ↓
-			// || if result.as_ref().unwrap().valid { trace!("{:?} ok", result.as_ref().unwrap().file); false } else { true }
-		)
-		.collect();
-
+	let (findings, errors) = scan_from_walkdir(&entries, args.canonical_paths, use_threads);
 	trace!("Scanning complete");

-	for result in &results {
-		match result {
-			Ok(r) => {
-				// check to see if debug logging is enabled before invoking debug! macro
-				// https://github.com/rust-lang/log/pull/394#issuecomment-630490343
-				if log::max_level() >= log::Level::Debug {
-					debug!(
-						"{:?} is of type {}, should have extension \"{}\"",
-						r.file,
-						r.mime,
-						r.recommended_extension().unwrap_or_else(|| "???".into())
-					);
-				}
-			}
-			Err(f) => warn!("{}", f),
-		}
-	}
-
-	if results.is_empty() {
+	if findings.is_empty() && errors.is_empty() {
 		info!("All files have valid extensions!");
 		exit(exitcode::OK);
 	}

-	let mut buffered_stdout = BufWriter::new(stdout());
+	// remove files that already have the correct extension, then sort - first by whether or not they have a
+	// recommended_extension() (with None before Some(ext)), then by filename
+	let findings = findings
+		.into_iter()
+		.filter(|f| !f.valid)
+		.sorted_unstable()
+		.collect_vec();
+	// sort errors (File errors before Mime errors), then log a warning for each error
+	let errors = errors
+		.into_iter()
+		.sorted_unstable()
+		.map(|e| {
+			warn!("{}", &e);
+			e
+		})
+		.collect_vec();

-	let result = match args.output_format {
-		OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &results),
-		OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &results),
-		#[cfg(feature = "json")]
-		OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &results),
-		OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &results),
-	};
+	if args.fix {
+	} else {
+		let mut buffered_stdout = BufWriter::new(stdout());

-	if result.is_err() {
-		error!("Failed to write to stdout.");
-		exit(exitcode::IOERR);
-	}
+		let result = match args.output_format {
+			// i want to simplify this to something like formats::write_all(args.output_format, ...)
+			OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors),
+			OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors),
+			#[cfg(feature = "json")]
+			OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors),
+			OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors),
+		};

-	if buffered_stdout.flush().is_err() {
-		error!("Failed to flush stdout.");
-		exit(exitcode::IOERR);
+		if result.is_err() {
+			error!("Failed to write to stdout.");
+			exit(exitcode::IOERR);
+		}
+
+		if buffered_stdout.flush().is_err() {
+			error!("Failed to flush stdout.");
+			exit(exitcode::IOERR);
+		}
 	}

 	debug!("Done");
--- a/src/parameters.rs
+++ b/src/parameters.rs
@ -48,6 +48,13 @@ pub enum OutputFormat {
 	max_term_width = 120
 )]
 pub struct Parameters {
+	/// Automatically rename files to use the correct extension.
+	#[clap(long)]
+	pub fix: bool,
+
+	#[clap(long)]
+	pub noconfirm: bool,
+
 	// NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name -
 	// `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash,
 	// anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen
--- a/src/tests/mod.rs
+++ b/src/tests/mod.rs
@ -116,14 +116,14 @@ fn simple_directory() {

 	let use_threads = cfg!(feature = "multi-threaded");

-	let results = scan_from_walkdir(&entries, false, use_threads);
-	let canonical_results = scan_from_walkdir(&entries, true, use_threads);
+	let results = scan_from_walkdir(&entries, false, use_threads).0;
+	let canonical_results = scan_from_walkdir(&entries, true, use_threads).0;
 	assert_eq!(results.len(), canonical_results.len());

 	for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
 		// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
-		let result = result.as_ref().expect("Error while scanning file");
-		let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
+		// let result = result.as_ref().expect("Error while scanning file");
+		// let canonical_result = canonical_result.as_ref().expect("Error while scanning file");

 		// paths should be canonical
 		assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
@ -331,19 +331,19 @@ fn outputs_move_commands() {
 	use std::io::Read;

 	// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
-	let entries = vec![Ok(Findings {
+	let findings = vec![Findings {
 		file: Path::new("misnamed_file.png").to_path_buf(),
 		valid: false,
 		mime: IMAGE_JPEG,
-	})];
+	}];

 	for format in &["Shell", "PowerShell"] {
 		let mut cursor = std::io::Cursor::new(Vec::new());
 		let mut contents = std::string::String::new();

 		match *format {
-			"Shell" => Shell.write_all(&mut cursor, &entries),
-			"PowerShell" => PowerShell.write_all(&mut cursor, &entries),
+			"Shell" => Shell.write_all(&mut cursor, &findings, &[]),
+			"PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]),
 			_ => unreachable!(),
 		}
 		.expect("Failed to write to cursor");
@ -371,17 +371,17 @@ fn test_json() {

 	use crate::formats::Json;
 	// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
-	let entries = vec![Ok(Findings {
+	let findings = vec![Findings {
 		file: Path::new("misnamed_file.png").to_path_buf(),
 		valid: false,
 		mime: IMAGE_JPEG,
-	})];
+	}];

 	let mut cursor = std::io::Cursor::new(Vec::new());
 	let mut contents = std::string::String::new();

 	Json
-		.write_all(&mut cursor, &entries)
+		.write_all(&mut cursor, &findings, &[])
 		.expect("Failed to write to cursor");

 	cursor.set_position(0);
editor.table_modal.label.rows
editor.table_modal.label.columns