diff --git a/Cargo.lock b/Cargo.lock index 2374655..5aef7a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,6 +15,26 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" +[[package]] +name = "async-mutex" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479db852db25d9dbf6204e6cb6253698f175c15726470f78af0d918e99d6156e" +dependencies = [ + "event-listener", +] + +[[package]] +name = "async-trait" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d3a45e77e34375a7923b1e8febb049bb011f064714a8e17a1a616fef01da13d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atty" version = "0.2.14" @@ -38,6 +58,40 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +[[package]] +name = "cached" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e2afe73808fbaac302e39c9754bfc3c4b4d0f99c9c240b9f4e4efc841ad1b74" +dependencies = [ + "async-mutex", + "async-trait", + "cached_proc_macro", + "cached_proc_macro_types", + "futures", + "hashbrown", + "once_cell", +] + +[[package]] +name = "cached_proc_macro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf857ae42d910aede5c5186e62684b0d7a597ce2fe3bd14448ab8f7ef439848c" +dependencies = [ + "async-mutex", + "cached_proc_macro_types", + "darling", + "quote", + "syn", +] + +[[package]] +name = "cached_proc_macro_types" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a4f925191b4367301851c6d99b09890311d74b0d43f274c0b34c86d308a3663" + [[package]] name = "cfg-if" version = "0.1.10" @@ -62,8 +116,9 @@ dependencies = [ "indexmap", "lazy_static", "os_str_bytes", - "strsim", + "strsim 0.10.0", "termcolor", + "terminal_size", "textwrap", "unicode-width", "vec_map", @@ -82,6 +137,93 @@ dependencies = [ "syn", ] +[[package]] +name = "const_fn" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6" + +[[package]] +name = "crossbeam-channel" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d" +dependencies = [ + "cfg-if 1.0.0", + "const_fn", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "lazy_static", +] + +[[package]] +name = "darling" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.9.3", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -103,6 +245,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + [[package]] name = "env_logger" version = "0.8.2" @@ -116,19 +264,128 @@ dependencies = [ "termcolor", ] +[[package]] +name = "event-listener" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7531096570974c3a9dcf9e4b8e1cede1ec26cf5046219fb3b9d897503b9be59" + [[package]] name = "fif" version = "0.1.0" dependencies = [ + "cached", "clap", "env_logger", "log", "mime_guess", + "rayon", "smartstring", "walkdir", "xdg-mime", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "futures" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65" + +[[package]] +name = "futures-executor" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500" + +[[package]] +name = "futures-macro" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6" + +[[package]] +name = "futures-task" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86" +dependencies = [ + "once_cell", +] + +[[package]] +name = "futures-util" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "proc-macro-hack", + "proc-macro-nested", + "slab", +] + [[package]] name = "getrandom" version = "0.2.2" @@ -176,6 +433,12 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "indexmap" version = "1.6.1" @@ -226,6 +489,15 @@ version = "2.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +[[package]] +name = "memoffset" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.16" @@ -253,6 +525,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + [[package]] name = "once_cell" version = "1.5.2" @@ -265,6 +547,18 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" +[[package]] +name = "pin-project-lite" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439697af366c49a6d0a010c56a0d97685bc140ce0d377b13a2ea2aa42d64a827" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -289,6 +583,18 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro-nested" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086" + [[package]] name = "proc-macro2" version = "1.0.24" @@ -307,6 +613,31 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + [[package]] name = "redox_syscall" version = "0.2.4" @@ -359,6 +690,18 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "slab" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8" + [[package]] name = "smartstring" version = "0.2.6" @@ -374,6 +717,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "strsim" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" + [[package]] name = "strsim" version = "0.10.0" @@ -400,12 +749,23 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ca8ced750734db02076f44132d802af0b33b09942331f4459dde8636fd2406" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "textwrap" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "203008d98caf094106cfaba70acfed15e18ed3ddb7d94e49baec153a2b462789" dependencies = [ + "terminal_size", "unicode-width", ] diff --git a/Cargo.toml b/Cargo.toml index b461190..4b38b6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,11 +8,15 @@ license = "GPL-3.0-or-later" [dependencies] walkdir = "2.3.1" -#structopt = "0.3.21" -clap = "3.0.0-beta.2" log = "0.4.14" env_logger = "0.8.2" smartstring = "0.2.6" # use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3"} mime_guess = "2.0.3" +rayon = "1.5.0" +cached = "0.23.0" + +[dependencies.clap] +version = "3.0.0-beta.2" +features = ["wrap_help"] diff --git a/src/inspectors.rs b/src/inspectors.rs index a9b489e..bd9b5ed 100644 --- a/src/inspectors.rs +++ b/src/inspectors.rs @@ -1,28 +1,36 @@ -// use xdg_mime::SharedMimeInfo; -// use std::path::Path; -// use std::io; -// use mime_guess::Mime; -// use std::fs::File; -// use std::io::Read; +use xdg_mime::SharedMimeInfo; +use std::path::Path; +use std::io; +use mime_guess::Mime; +use std::fs::File; +use std::io::Read; +use smartstring::alias::String; +use cached::proc_macro::cached; -// pub fn mime_type(db: &SharedMimeInfo, filepath: &Path) -> io::Result, > { -// // attempt to read up to the 256 bytes of the file -// let mut buffer = [0; 256]; -// let mut file = File::open(filepath)?; -// -// file.read(&mut buffer)?; -// -// Ok(db.get_mime_type_for_data(&buffer).map(|m| m.0)) -// } +pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result, > { + // attempt to read up to the 256 bytes of the file + let mut buffer = [0; 256]; + let mut file = File::open(path)?; -// pub fn get_ext_from_mime(mime: &Mime) -> Option { -// match mime_guess::get_mime_extensions(mime) // get a list of possible extensions for this mime type -// .map(|g| g[0]) { // take the first option in the list and return it as a string -// // jpeg files are given the primary extension "jpe", due to the extension list being stored in alphabetical order. -// // to handle this particular case, swap "jpe" out for "jpg", and leave everything else the same, making sure we -// // convert the &strs to Strings. -// Some("jpe") => Some(String::from("jpg")), -// Some(ext) => Some(String::from(ext)), -// None => None -// } -// } \ No newline at end of file + // this can be ignored because it's perfectly okay if the file is less than 256 bytes long - we only care about the + // first few bytes for the purpose of mime sniffing + #[allow(clippy::unused_io_amount)] + file.read(&mut buffer)?; + + Ok(db.get_mime_type_for_data(&buffer).map(|m| m.0)) +} + +#[cached] +// TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime +// object, pass a hash of it? +pub fn mime_extension_lookup(mime: Mime) -> Option> { + if mime == mime_guess::mime::IMAGE_JPEG { + // jpeg files are given the primary extension "jpe", due to the extension list being stored in alphabetical order. + // to handle this particular case, return a custom vector consisting of just "jpg" and "jpeg". + return Some(vec![String::from("jpg"), String::from("jpeg")]); + } + match mime_guess::get_mime_extensions(&mime) { // get a list of possible extensions for this mime type + Some(exts) => Some(exts.iter().map(|e| String::from(*e)).collect()), + None => None + } +} diff --git a/src/main.rs b/src/main.rs index 355f63c..67e5ff1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,12 +17,26 @@ mod parameters; mod inspectors; -use std::path::{Path}; +use std::path::{Path, PathBuf}; use walkdir::{WalkDir, DirEntry}; use smartstring::alias::String; -// use structopt::StructOpt; use clap::Clap; -use log::{info}; +use log::{debug, info, warn, error}; +use rayon::prelude::*; +use mime_guess::Mime; + +struct Findings { + file: PathBuf, + valid: bool, + mime: Mime, +} + +impl Findings { + fn recommended_extension(&self) -> Option { + inspectors::mime_extension_lookup(self.mime.clone()) + .map(|extensions| extensions[0].to_owned()) + } +} // TODO: test if this actually works on a windows machine #[cfg(windows)] @@ -52,35 +66,86 @@ fn wanted_file(args: ¶meters::Parameters, entry: &DirEntry) -> bool { return true; } - let ext = Path::new(entry.file_name()) // create a Path from the entry... - .extension() // get its extension... - .map(|e| String::from(e.to_string_lossy())); // and convert it from an OsStr to a String. + let ext = extension_from_path(entry.path()); if ext.is_none() { return false } // don't scan files without extensions. TODO - this should be configurable if let Some(extensions) = &args.extensions { // if the user has specified a list of extensions to check against, make sure this file ends in one of them. - // TODO - maybe use ascii_lowercase instead? - return extensions.contains(&ext.unwrap().to_ascii_lowercase().into()) + return extensions.contains(&ext.unwrap().to_lowercase().into()) } true } +fn extension_from_path(path: &Path) -> Option { + path.extension(). // Get the path's extension + map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String +} + fn main() { let args = parameters::Parameters::parse(); - // env_logger::init(); + env_logger::init(); let db = xdg_mime::SharedMimeInfo::new(); - println!("{:#?}", args); - // println!("{:#?}", args.dirs); - println!("=====\nIterating directory: {:?}\n=====", args.dirs); + debug!("=====\nIterating directory: {:?}\n=====", args.dirs); let stepper = WalkDir::new(&args.dirs).into_iter(); let entries: Vec = stepper .filter_entry(|e| wanted_file(&args, e)) // filter out unwanted files .filter_map(|e| e.ok()) // ignore anything that fails, e.g. files we don't have read access on + .filter(|e| !e.file_type().is_dir()) // remove directories from the final list .collect(); info!("Found {} items to check", entries.len()); - // println!("{:#?}", entries); + let results: Vec> = entries + .par_iter() + .map(|entry: &DirEntry | { + // try to determine mimetype for this entry + let result = inspectors::mime_type(&db, entry.path()); + + if let Err(error) = result { + // an error occurred while trying to read the file + error!("{}: {}", entry.path().to_string_lossy(), error); + return Err(entry.path().to_path_buf()); + } + + let result = result.unwrap(); + if result.is_none() { + // the file was read successfully, but we were unable to determine its mimetype + warn!("Couldn't determine mimetype for {}", entry.path().to_string_lossy()); + return Err(entry.path().to_path_buf()); + } + + let result = result.unwrap(); + + // set of known extensions for the given mimetype + let known_exts = inspectors::mime_extension_lookup(result.clone()); + // file extension for this particular file + let entry_ext = extension_from_path(entry.path()); + + let valid = match known_exts { + // there is a known set of extensions for this mimetype, and the file has an extension + Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_lowercase().into()), + // there is a known set of extensions for this mimetype, but the file has no extension + Some(_) => false, + // there is no known set of extensions for this mimetype -- assume it's correct + None => true + }; + + Ok(Findings { + file: entry.path().to_path_buf(), + valid, // make this a function + mime: result, + }) + }) + .collect(); + + for result in results { + match result { + Ok(r) => info!("{:#?}: {:#?} - {:?} - {:?}", r.file, r.mime, r.valid, r.recommended_extension()), + Err(f) => warn!("{:#?}: Error 0uo", f) + } + } + + debug!("Done"); }