From 82bdbebec517bd54bf3031b64126291a5ba549d6 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 10 Feb 2021 19:20:22 +1000 Subject: [PATCH] rudimentary script output, lots of small stuff, performance improvements --- Cargo.lock | 175 ++++++++++------------------------------------ Cargo.toml | 12 +++- src/formats.rs | 82 ++++++++++++++++++---- src/main.rs | 38 ++++++---- src/parameters.rs | 16 +++-- src/scanerror.rs | 4 ++ 6 files changed, 155 insertions(+), 172 deletions(-) create mode 100644 src/scanerror.rs diff --git a/Cargo.lock b/Cargo.lock index fc0be96..57944a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,17 +15,6 @@ dependencies = [ "event-listener", ] -[[package]] -name = "async-trait" -version = "0.1.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3a45e77e34375a7923b1e8febb049bb011f064714a8e17a1a616fef01da13d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "atty" version = "0.2.14" @@ -56,10 +45,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e2afe73808fbaac302e39c9754bfc3c4b4d0f99c9c240b9f4e4efc841ad1b74" dependencies = [ "async-mutex", - "async-trait", "cached_proc_macro", "cached_proc_macro_types", - "futures", "hashbrown", "once_cell", ] @@ -270,6 +257,7 @@ dependencies = [ "mime_guess", "rayon", "smartstring", + "snailquote", "walkdir", "xdg-mime", ] @@ -280,101 +268,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "futures" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65" - -[[package]] -name = "futures-executor" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500" - -[[package]] -name = "futures-macro" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6" - -[[package]] -name = "futures-task" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86" -dependencies = [ - "once_cell", -] - -[[package]] -name = "futures-util" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "proc-macro-hack", - "proc-macro-nested", - "slab", -] - [[package]] name = "getrandom" version = "0.2.2" @@ -530,18 +423,6 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" -[[package]] -name = "pin-project-lite" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439697af366c49a6d0a010c56a0d97685bc140ce0d377b13a2ea2aa42d64a827" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "proc-macro-error" version = "1.0.4" @@ -566,18 +447,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro-nested" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086" - [[package]] name = "proc-macro2" version = "1.0.24" @@ -661,12 +530,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "slab" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" - [[package]] name = "smartstring" version = "0.2.6" @@ -676,6 +539,16 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "snailquote" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f34b729d802f52194598858ac852c3fb3b33f6e026cd03195072ccb7bf3fc810" +dependencies = [ + "thiserror", + "unicode_categories", +] + [[package]] name = "static_assertions" version = "1.1.0" @@ -734,6 +607,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicase" version = "2.6.0" @@ -761,6 +654,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "vec_map" version = "0.8.2" diff --git a/Cargo.toml b/Cargo.toml index 0c7b905..ed42934 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ smartstring = "0.2.6" xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3"} mime_guess = "2.0.3" rayon = "1.5.0" -cached = "0.23.0" +snailquote = "0.3.0" [dependencies.clap] version = "3.0.0-beta.2" @@ -30,5 +30,15 @@ version = "0.8.2" default-features = false features = ["termcolor", "atty"] +[dependencies.cached] +version = "0.23.0" +default-features = false +features = ["proc_macro"] + + [profile.release] lto = "thin" + +# optimise dependencies, even when producing debug builds +[profile.dev.package."*"] +opt-level = 3 diff --git a/src/formats.rs b/src/formats.rs index 2e0e7a9..7ec1af9 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -1,23 +1,79 @@ -use std::fmt; -use std::fmt::Formatter; +use std::io; +use std::io::Write; +use crate::Findings; +use crate::scanerror::ScanError; +use std::path::PathBuf; +use snailquote::escape; -trait Format { - fn rename(f: &mut fmt::Formatter<'_>, from: &str, to: &str) -> fmt::Result; - fn unreadable(f: &mut fmt::Formatter<'_>, path: &str) -> fmt::Result; - fn unknown_type(f: &mut fmt::Formatter<'_>, path: &str) -> fmt::Result; +type Entries = [Result]; + +pub trait Format { + fn new() -> Self; + fn rename(&self, f: &mut T, from: &str, to: &str) -> io::Result<()>; + fn no_known_extension(&self, f: &mut T, path: &str) -> io::Result<()>; + fn unreadable(&self, f: &mut T, path: &str) -> io::Result<()>; + fn unknown_type(&self, f: &mut T, path: &str) -> io::Result<()>; + + fn write_all(&self, entries: &Entries, f: &mut T) -> io::Result<()> { + // TODO: clean this up - it's horrifying + for entry in entries { + match entry { + Ok(finding) => { + // the file was successfully scanned, and a mimetype was detected + if !finding.valid { + // the file's extension is wrong! + match finding.recommended_extension() { + Some(ext) => { + // there's a known extension for this mimetype!! + self.rename( + f, + &finding.file.to_string_lossy(), + &finding.file.with_extension(ext.as_str()).to_string_lossy() + )? + } + None => { + // unfortunately, there's no known extension for this mimetype :( + self.no_known_extension(f, &finding.file.to_string_lossy())? + } + } + + } + } + Err(error) => { + // something went wrong 0uo + match error.0 { + // failed to read the file + ScanError::File => self.unreadable(f, &error.1.to_string_lossy())?, + // file was read successfully, but we couldn't determine a mimetype + ScanError::Mime => self.unknown_type(f, &error.1.to_string_lossy())? + } + } + } + } + Ok(()) + } } -struct Script {} +pub struct Script {} impl Format for Script { - fn rename(f: &mut Formatter<'_>, from: &str, to: &str) -> fmt::Result { - write!(f, "mv {} {}", from, to) + fn new() -> Self { + return Script {} } - fn unreadable(f: &mut Formatter<'_>, path: &str) -> fmt::Result { - write!(f, "# Failed to read {}", path) + fn rename(&self, f: &mut T, from: &str, to: &str) -> io::Result<()> { + // TODO: string escaping aaaaaaAAAAAAAAAA + writeln!(f, "mv -v -i -- {} {}", escape(from), escape(to)) } - fn unknown_type(f: &mut Formatter<'_>, path: &str) -> fmt::Result { - write!(f, "# Failed to detect mime type for {}", path) + fn no_known_extension(&self, f: &mut T, path: &str) -> io::Result<()> { + writeln!(f, "echo No known extension for {}!", escape(path)) + } + + fn unreadable(&self, f: &mut T, path: &str) -> io::Result<()> { + writeln!(f, "# Failed to read {}", escape(path)) + } + + fn unknown_type(&self, f: &mut T, path: &str) -> io::Result<()> { + writeln!(f, "# Failed to detect mime type for {}", escape(path)) } } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 2a62330..5a271bb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ mod parameters; mod inspectors; mod formats; +mod scanerror; use std::path::{Path, PathBuf}; use walkdir::{WalkDir, DirEntry}; @@ -27,8 +28,12 @@ use log::{debug, trace, info, warn}; use rayon::prelude::*; use std::fmt::{self, Display}; use xdg_mime::SharedMimeInfo; +use crate::parameters::OutputFormat; +use crate::scanerror::ScanError; +use crate::formats::{Script, Format}; +use std::io::stdout; -struct Findings { +pub struct Findings { file: PathBuf, valid: bool, mime: Mime, @@ -41,11 +46,6 @@ impl Findings { } } -enum ScanError { - File, - Mime -} - impl Display for ScanError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", @@ -57,14 +57,15 @@ impl Display for ScanError { } } -// TODO: test if this actually works on a windows machine +// TODO: test if this actually works on a windows machine - not there's much of a point right now, considering +// xdg-mime-rs doesn't support windows #[cfg(windows)] fn is_hidden(entry: &DirEntry) -> bool { use std::os::windows::prelude::*; std::fs::metadata(entry) // try to get metadata for file .map_or( false, // if getting metadata/attributes fails, assume it's not hidden - |f| f.file_attributes() & 0x2 > 0 // flag for hidden - https://docs.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants + |f| f.file_attributes() & 0x2 > 0 // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants ) } @@ -102,9 +103,9 @@ fn extension_from_path(path: &Path) -> Option { fn scan_file(db: &SharedMimeInfo, entry: &DirEntry) -> Result { // try to determine mimetype for this entry - let result = inspectors::mime_type(&db, entry.path()); + let result = inspectors::mime_type(db, entry.path()); - if let Err(_) = result { + if result.is_err() { // an error occurred while trying to read the file // error!("{}: {}", entry.path().to_string_lossy(), error); return Err((ScanError::File, entry.path().to_path_buf())); @@ -138,16 +139,15 @@ fn scan_file(db: &SharedMimeInfo, entry: &DirEntry) -> Result) -> Vec> { #[cfg(feature = "multi-threaded")] { - // rather than using a standard par_iter, split the entries into chunks of 16 first. + // rather than using a standard par_iter, split the entries into chunks of 32 first. // this allows each spawned thread to handle 16 files before before closing, rather than creating a new thread for // each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0 entries - .par_chunks(16) // split into chunks of 16 + .par_chunks(32) // split into chunks of 32 .flat_map(|chunk| chunk // return Vec<...> instead of Chunk> .iter() // iter over the chunk, which is a slice of DirEntry structs .map(|entry| scan_file(db, entry)) @@ -171,7 +171,7 @@ fn main() { // .format(|buf, r| writeln!(buf, "{} - {}", r.level(), r.args())) .format_module_path(false) // don't include module in logs, as it's not necessary .format_timestamp(None) // don't include timestamps (unnecessary, and the feature flag is disabled anyway) - .target(env_logger::Target::Stdout) // log to stdout rather than stderr + // .target(env_logger::Target::Stdout) // log to stdout rather than stderr .init(); let db = xdg_mime::SharedMimeInfo::new(); @@ -188,7 +188,7 @@ fn main() { let results = scan_from_walkdir(&db, entries); - for result in results { + for result in &results { match result { Ok(r) => { if !r.valid { @@ -201,5 +201,13 @@ fn main() { } } + match args.output_format { + OutputFormat::Script => { + let s = Script::new(); + s.write_all(&results, &mut stdout().lock()).expect("failed to ouptput"); + }, + OutputFormat::Text => debug!("eewr") + } + debug!("Done"); } diff --git a/src/parameters.rs b/src/parameters.rs index b6b73e7..8957db6 100644 --- a/src/parameters.rs +++ b/src/parameters.rs @@ -1,20 +1,26 @@ -use clap::Clap; +use clap::{Clap}; use std::path::PathBuf; -use smartstring::alias::String; +use smartstring::{LazyCompact, SmartString}; + +#[derive(Clap, PartialEq, Debug)] +pub enum OutputFormat { + Script, + Text +} #[derive(Clap, Debug)] pub struct Parameters { /// Only examine files with these extensions (Comma-separated list) #[clap(short, long, use_delimiter = true)] - pub extensions: Option>, + pub extensions: Option>>, /// Don't skip hidden files and directories #[clap(short, long)] pub scan_hidden: bool, /// Output format to use. See "--help formats" for more information. - #[clap(short, long, default_value="script", possible_values = &["script", "text"])] - pub output_format: String, + #[clap(short, long, default_value="script", arg_enum)] + pub output_format: OutputFormat, /// Directory to process // TODO: right now this can only take a single directory - should this be improved? diff --git a/src/scanerror.rs b/src/scanerror.rs new file mode 100644 index 0000000..34af30b --- /dev/null +++ b/src/scanerror.rs @@ -0,0 +1,4 @@ +pub enum ScanError { + File, + Mime +} \ No newline at end of file