refactoring, initial work on --fix feature
This commit is contained in:
parent
451ea3d5d9
commit
c4fabbc0f4
6 changed files with 98 additions and 109 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1,7 +1,5 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.5.2"
|
||||
|
|
21
src/files.rs
21
src/files.rs
|
@ -8,6 +8,7 @@ use std::str::FromStr;
|
|||
use std::sync::RwLock;
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use itertools::{Either, Itertools};
|
||||
use log::{debug, error};
|
||||
use mime::Mime;
|
||||
use mime_guess::from_ext;
|
||||
|
@ -140,7 +141,7 @@ pub fn scan_from_walkdir(
|
|||
entries: &[DirEntry],
|
||||
canonical_paths: bool,
|
||||
use_threads: bool,
|
||||
) -> Vec<Result<Findings, ScanError>> {
|
||||
) -> (Vec<Findings>, Vec<ScanError>) {
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "multi-threaded")] {
|
||||
use rayon::prelude::*;
|
||||
|
@ -150,13 +151,17 @@ pub fn scan_from_walkdir(
|
|||
// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
|
||||
return entries
|
||||
.par_chunks(CHUNKS)
|
||||
.flat_map_iter(|chunk| {
|
||||
.map(|chunk| {
|
||||
chunk
|
||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||
.map(|entry| scan_file(entry, canonical_paths))
|
||||
.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here?
|
||||
.partition_map::<Vec<_>, Vec<_>, _, _, _>(|entry| match scan_file(entry, canonical_paths) {
|
||||
Ok(f) => Either::Left(f),
|
||||
Err(e) => Either::Right(e)
|
||||
}
|
||||
)
|
||||
})
|
||||
.collect()
|
||||
.flatten()
|
||||
.collect()
|
||||
}
|
||||
} else {
|
||||
// should always be false when multi-threading is disabled at compile time
|
||||
|
@ -170,8 +175,10 @@ pub fn scan_from_walkdir(
|
|||
// - fif was compiled without the `multi-threading` feature
|
||||
entries
|
||||
.iter()
|
||||
.map(|entry: &DirEntry| scan_file(entry, canonical_paths))
|
||||
.collect()
|
||||
.partition_map(|entry: &DirEntry| match scan_file(entry, canonical_paths) {
|
||||
Ok(f) => Either::Left(f),
|
||||
Err(e) => Either::Right(e),
|
||||
})
|
||||
}
|
||||
|
||||
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
||||
|
|
|
@ -7,7 +7,6 @@ use std::os::unix::ffi::OsStrExt;
|
|||
use std::path::Path;
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use itertools::{Either, Itertools};
|
||||
use snailquote::escape;
|
||||
|
||||
use crate::findings::ScanError;
|
||||
|
@ -51,24 +50,6 @@ macro_rules! writablesln {
|
|||
};
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
type Entries<'a> = [Result<Findings, ScanError<'a>>];
|
||||
|
||||
/// Splits the given [`Entries`] into [`Vec`]s of [`Findings`] and [`ScanError`]s. [`Findings`] are sorted by whether
|
||||
/// or not they have a known extension (unknown extensions coming first), and then by their filenames. [`ScanError`]s
|
||||
/// are sorted such that [`ScanError::File`]s come before [`ScanError::Mime`]s.
|
||||
#[inline]
|
||||
fn sort_entries<'a>(entries: &'a Entries) -> (Vec<&'a Findings>, Vec<&'a ScanError<'a>>) {
|
||||
let (mut findings, mut errors): (Vec<_>, Vec<_>) = entries.iter().partition_map(|entry| match entry {
|
||||
Ok(f) => Either::Left(f),
|
||||
Err(e) => Either::Right(e),
|
||||
});
|
||||
|
||||
findings.sort_unstable();
|
||||
errors.sort_unstable();
|
||||
(findings, errors)
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Writable<'a> {
|
||||
String(&'a str),
|
||||
|
@ -141,12 +122,10 @@ pub trait FormatSteps {
|
|||
fn no_known_extension<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
|
||||
fn unreadable<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
|
||||
fn unknown_type<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
|
||||
fn header<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
|
||||
fn footer<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
|
||||
fn write_steps<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
|
||||
self.header(f, entries)?;
|
||||
|
||||
let (findings, errors) = sort_entries(entries);
|
||||
fn header<W: Write>(&self, _f: &mut W) -> io::Result<()>;
|
||||
fn footer<W: Write>(&self, _f: &mut W) -> io::Result<()>;
|
||||
fn write_steps<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
|
||||
self.header(f)?;
|
||||
|
||||
for error in errors {
|
||||
match error {
|
||||
|
@ -157,8 +136,7 @@ pub trait FormatSteps {
|
|||
}
|
||||
}
|
||||
|
||||
if findings.len() != entries.len() {
|
||||
// if these lengths aren't the same, there was at least one error
|
||||
if !errors.is_empty() {
|
||||
// add a blank line between the errors and commands
|
||||
smart_write(f, writables![Newline])?;
|
||||
}
|
||||
|
@ -171,19 +149,21 @@ pub trait FormatSteps {
|
|||
}
|
||||
}
|
||||
|
||||
self.footer(f, entries)
|
||||
self.footer(f)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Format {
|
||||
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()>;
|
||||
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()>;
|
||||
}
|
||||
|
||||
/// Bourne-Shell compatible script.
|
||||
pub struct Shell;
|
||||
|
||||
impl Format for Shell {
|
||||
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
|
||||
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
|
||||
self.write_steps(f, findings, errors)
|
||||
}
|
||||
}
|
||||
|
||||
impl FormatSteps for Shell {
|
||||
|
@ -213,7 +193,7 @@ impl FormatSteps for Shell {
|
|||
smart_write(f, writablesln!["# Failed to detect mime type for ", path])
|
||||
}
|
||||
|
||||
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
|
||||
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||
smart_write(
|
||||
f,
|
||||
writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())],
|
||||
|
@ -226,9 +206,7 @@ impl FormatSteps for Shell {
|
|||
smart_write(f, writablesln![Newline, "set -e", Newline])
|
||||
}
|
||||
|
||||
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
|
||||
smart_write(f, writablesln![Newline, "echo 'Done.'"])
|
||||
}
|
||||
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![Newline, "echo 'Done.'"]) }
|
||||
}
|
||||
|
||||
// PowerShell is a noun, not a type
|
||||
|
@ -237,7 +215,9 @@ impl FormatSteps for Shell {
|
|||
pub struct PowerShell;
|
||||
|
||||
impl Format for PowerShell {
|
||||
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
|
||||
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
|
||||
self.write_steps(f, findings, errors)
|
||||
}
|
||||
}
|
||||
|
||||
impl FormatSteps for PowerShell {
|
||||
|
@ -281,7 +261,7 @@ impl FormatSteps for PowerShell {
|
|||
smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"])
|
||||
}
|
||||
|
||||
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
|
||||
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||
smart_write(
|
||||
f,
|
||||
writablesln!["#!/usr/bin/env pwsh", Newline, "<# ", (generated_by().as_str()), " #>"],
|
||||
|
@ -294,14 +274,16 @@ impl FormatSteps for PowerShell {
|
|||
smart_write(f, writables![Newline])
|
||||
}
|
||||
|
||||
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
|
||||
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||
smart_write(f, writablesln![Newline, "Write-Output 'Done!'"])
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Text;
|
||||
impl Format for Text {
|
||||
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
|
||||
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
|
||||
self.write_steps(f, findings, errors)
|
||||
}
|
||||
}
|
||||
|
||||
impl FormatSteps for Text {
|
||||
|
@ -321,14 +303,15 @@ impl FormatSteps for Text {
|
|||
smart_write(f, writablesln!["Couldn't determine type for ", path])
|
||||
}
|
||||
|
||||
fn header<W: Write>(&self, f: &mut W, _entries: &Entries) -> io::Result<()> {
|
||||
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||
smart_write(f, writablesln![(generated_by().as_str()), Newline])
|
||||
}
|
||||
|
||||
fn footer<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
|
||||
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||
smart_write(
|
||||
f,
|
||||
writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
|
||||
// writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
|
||||
writablesln![Newline, "Done."],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -338,15 +321,13 @@ pub struct Json;
|
|||
|
||||
#[cfg(feature = "json")]
|
||||
impl Format for Json {
|
||||
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
|
||||
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
|
||||
#[derive(serde::Serialize)]
|
||||
struct SerdeEntries<'a> {
|
||||
errors: &'a Vec<&'a ScanError<'a>>,
|
||||
findings: &'a Vec<&'a Findings>,
|
||||
errors: &'a [ScanError<'a>],
|
||||
findings: &'a [Findings],
|
||||
}
|
||||
|
||||
let (findings, errors) = &sort_entries(entries);
|
||||
|
||||
let result = serde_json::to_writer_pretty(f, &SerdeEntries { errors, findings });
|
||||
|
||||
if let Err(err) = result {
|
||||
|
|
82
src/main.rs
82
src/main.rs
|
@ -27,6 +27,7 @@ use fif::formats::Format;
|
|||
use fif::parameters::OutputFormat;
|
||||
use fif::utils::{os_name, CLAP_LONG_VERSION};
|
||||
use fif::{formats, parameters};
|
||||
use itertools::Itertools;
|
||||
use log::{debug, error, info, trace, warn, Level};
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -109,58 +110,53 @@ fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths, use_threads)
|
||||
.into_iter()
|
||||
.filter(
|
||||
|result| result.is_err() || !result.as_ref().unwrap().valid,
|
||||
// TODO: find a way to trace! the valid files without doing ↓
|
||||
// || if result.as_ref().unwrap().valid { trace!("{:?} ok", result.as_ref().unwrap().file); false } else { true }
|
||||
)
|
||||
.collect();
|
||||
|
||||
let (findings, errors) = scan_from_walkdir(&entries, args.canonical_paths, use_threads);
|
||||
trace!("Scanning complete");
|
||||
|
||||
for result in &results {
|
||||
match result {
|
||||
Ok(r) => {
|
||||
// check to see if debug logging is enabled before invoking debug! macro
|
||||
// https://github.com/rust-lang/log/pull/394#issuecomment-630490343
|
||||
if log::max_level() >= log::Level::Debug {
|
||||
debug!(
|
||||
"{:?} is of type {}, should have extension \"{}\"",
|
||||
r.file,
|
||||
r.mime,
|
||||
r.recommended_extension().unwrap_or_else(|| "???".into())
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(f) => warn!("{}", f),
|
||||
}
|
||||
}
|
||||
|
||||
if results.is_empty() {
|
||||
if findings.is_empty() && errors.is_empty() {
|
||||
info!("All files have valid extensions!");
|
||||
exit(exitcode::OK);
|
||||
}
|
||||
|
||||
let mut buffered_stdout = BufWriter::new(stdout());
|
||||
// remove files that already have the correct extension, then sort - first by whether or not they have a
|
||||
// recommended_extension() (with None before Some(ext)), then by filename
|
||||
let findings = findings
|
||||
.into_iter()
|
||||
.filter(|f| !f.valid)
|
||||
.sorted_unstable()
|
||||
.collect_vec();
|
||||
// sort errors (File errors before Mime errors), then log a warning for each error
|
||||
let errors = errors
|
||||
.into_iter()
|
||||
.sorted_unstable()
|
||||
.map(|e| {
|
||||
warn!("{}", &e);
|
||||
e
|
||||
})
|
||||
.collect_vec();
|
||||
|
||||
let result = match args.output_format {
|
||||
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &results),
|
||||
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &results),
|
||||
#[cfg(feature = "json")]
|
||||
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &results),
|
||||
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &results),
|
||||
};
|
||||
if args.fix {
|
||||
} else {
|
||||
let mut buffered_stdout = BufWriter::new(stdout());
|
||||
|
||||
if result.is_err() {
|
||||
error!("Failed to write to stdout.");
|
||||
exit(exitcode::IOERR);
|
||||
}
|
||||
let result = match args.output_format {
|
||||
// i want to simplify this to something like formats::write_all(args.output_format, ...)
|
||||
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
#[cfg(feature = "json")]
|
||||
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
};
|
||||
|
||||
if buffered_stdout.flush().is_err() {
|
||||
error!("Failed to flush stdout.");
|
||||
exit(exitcode::IOERR);
|
||||
if result.is_err() {
|
||||
error!("Failed to write to stdout.");
|
||||
exit(exitcode::IOERR);
|
||||
}
|
||||
|
||||
if buffered_stdout.flush().is_err() {
|
||||
error!("Failed to flush stdout.");
|
||||
exit(exitcode::IOERR);
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Done");
|
||||
|
|
|
@ -48,6 +48,13 @@ pub enum OutputFormat {
|
|||
max_term_width = 120
|
||||
)]
|
||||
pub struct Parameters {
|
||||
/// Automatically rename files to use the correct extension.
|
||||
#[clap(long)]
|
||||
pub fix: bool,
|
||||
|
||||
#[clap(long)]
|
||||
pub noconfirm: bool,
|
||||
|
||||
// NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name -
|
||||
// `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash,
|
||||
// anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen
|
||||
|
|
|
@ -116,14 +116,14 @@ fn simple_directory() {
|
|||
|
||||
let use_threads = cfg!(feature = "multi-threaded");
|
||||
|
||||
let results = scan_from_walkdir(&entries, false, use_threads);
|
||||
let canonical_results = scan_from_walkdir(&entries, true, use_threads);
|
||||
let results = scan_from_walkdir(&entries, false, use_threads).0;
|
||||
let canonical_results = scan_from_walkdir(&entries, true, use_threads).0;
|
||||
assert_eq!(results.len(), canonical_results.len());
|
||||
|
||||
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
|
||||
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
|
||||
let result = result.as_ref().expect("Error while scanning file");
|
||||
let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
|
||||
// let result = result.as_ref().expect("Error while scanning file");
|
||||
// let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
|
||||
|
||||
// paths should be canonical
|
||||
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
|
||||
|
@ -331,19 +331,19 @@ fn outputs_move_commands() {
|
|||
use std::io::Read;
|
||||
|
||||
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
||||
let entries = vec![Ok(Findings {
|
||||
let findings = vec![Findings {
|
||||
file: Path::new("misnamed_file.png").to_path_buf(),
|
||||
valid: false,
|
||||
mime: IMAGE_JPEG,
|
||||
})];
|
||||
}];
|
||||
|
||||
for format in &["Shell", "PowerShell"] {
|
||||
let mut cursor = std::io::Cursor::new(Vec::new());
|
||||
let mut contents = std::string::String::new();
|
||||
|
||||
match *format {
|
||||
"Shell" => Shell.write_all(&mut cursor, &entries),
|
||||
"PowerShell" => PowerShell.write_all(&mut cursor, &entries),
|
||||
"Shell" => Shell.write_all(&mut cursor, &findings, &[]),
|
||||
"PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
.expect("Failed to write to cursor");
|
||||
|
@ -371,17 +371,17 @@ fn test_json() {
|
|||
|
||||
use crate::formats::Json;
|
||||
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
||||
let entries = vec![Ok(Findings {
|
||||
let findings = vec![Findings {
|
||||
file: Path::new("misnamed_file.png").to_path_buf(),
|
||||
valid: false,
|
||||
mime: IMAGE_JPEG,
|
||||
})];
|
||||
}];
|
||||
|
||||
let mut cursor = std::io::Cursor::new(Vec::new());
|
||||
let mut contents = std::string::String::new();
|
||||
|
||||
Json
|
||||
.write_all(&mut cursor, &entries)
|
||||
.write_all(&mut cursor, &findings, &[])
|
||||
.expect("Failed to write to cursor");
|
||||
|
||||
cursor.set_position(0);
|
||||
|
|
Loading…
Reference in a new issue