diff --git a/Cargo.lock b/Cargo.lock index f26e6cb..a4bbaa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -166,15 +166,25 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193" +[[package]] +name = "fastrand" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca5faf057445ce5c9d4329e382b2ce7ca38550ef3b73a5348362d5f24e0c7fe3" +dependencies = [ + "instant", +] + [[package]] name = "fif" -version = "0.2.9" +version = "0.2.10" dependencies = [ "cached", "cfg-if", "clap", "env_logger", "exitcode", + "fastrand", "infer", "log", "mime_guess", @@ -244,6 +254,15 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0803735b9511d0956c68902a6513ca867819d6e43397adb6a5e903e2f09db734" +[[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if", +] + [[package]] name = "lazy_static" version = "1.4.0" diff --git a/Cargo.toml b/Cargo.toml index 297b257..c9b4d7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "fif" description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." -version = "0.2.9" +version = "0.2.10" authors = ["Lynnesbian "] edition = "2018" license = "GPL-3.0-or-later" @@ -58,6 +58,7 @@ default-features = false [dev-dependencies] tempfile = "3.2.0" +fastrand = "1.4.0" [profile.release] lto = "thin" diff --git a/clippy.sh b/clippy.sh index 78a3a58..41c7e6e 100755 --- a/clippy.sh +++ b/clippy.sh @@ -1,6 +1,6 @@ #!/bin/bash fd -e rs -x touch {} -cargo clippy -- \ +cargo clippy --tests -- \ -W clippy::nursery \ -W clippy::perf \ -W clippy::pedantic \ diff --git a/src/findings.rs b/src/findings.rs index 2fbbee1..dfd30de 100644 --- a/src/findings.rs +++ b/src/findings.rs @@ -1,4 +1,4 @@ -use std::path::PathBuf; +use std::path::Path; use mime_guess::Mime; use smartstring::alias::String; @@ -6,16 +6,16 @@ use smartstring::alias::String; use crate::inspectors::mime_extension_lookup; /// Information about a scanned file. -pub struct Findings { +pub struct Findings<'a> { /// The location of the scanned file. - pub file: PathBuf, // TODO: replace with Path???? <'a> and all that + pub file: &'a Path, /// Whether or not the file's extension is valid for its mimetype. pub valid: bool, /// The file's mimetype. pub mime: Mime, } -impl Findings { +impl<'a> Findings<'a> { pub fn recommended_extension(&self) -> Option { mime_extension_lookup(self.mime.clone()).map(|extensions| extensions[0].to_owned()) } diff --git a/src/formats.rs b/src/formats.rs index db09466..dac6cc3 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -1,8 +1,8 @@ //! The various formats that [fif](crate) can output to. +use std::io::{self, Write}; #[cfg(unix)] use std::os::unix::ffi::OsStrExt; -use std::io::{self, Write}; use std::path::Path; use snailquote::escape; @@ -15,7 +15,7 @@ use std::ffi::OsStr; const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); #[doc(hidden)] -type Entries<'a> = [Result>]; +type Entries<'a> = [Result, ScanError<'a>>]; enum Writable<'a> { String(&'a str), @@ -87,9 +87,9 @@ pub trait Format { match entry { Ok(finding) => { if let Some(ext) = finding.recommended_extension() { - self.rename(f, &finding.file, &finding.file.with_extension(ext.as_str()))? + self.rename(f, finding.file, &finding.file.with_extension(ext.as_str()))? } else { - self.no_known_extension(f, &finding.file)? + self.no_known_extension(f, finding.file)? } } @@ -109,7 +109,6 @@ pub trait Format { } } -// TODO: maybe make a batch script version for windows /// Bourne-Shell compatible script. pub struct Script {} @@ -195,7 +194,11 @@ impl Format for PowerShell { fn no_known_extension(&self, f: &mut W, path: &Path) -> io::Result<()> { smart_write( f, - &["Write-Output @'\nNo known extension for ".into(), path.into(), "\n'@".into()], + &[ + "Write-Output @'\nNo known extension for ".into(), + path.into(), + "\n'@".into(), + ], ) } @@ -225,4 +228,4 @@ impl Format for PowerShell { fn footer(&self, _: &Entries, f: &mut W) -> io::Result<()> { writeln!(f, "\nWrite-Output 'Done!'") } -} \ No newline at end of file +} diff --git a/src/inspectors.rs b/src/inspectors.rs index 613c7bf..476af0b 100644 --- a/src/inspectors.rs +++ b/src/inspectors.rs @@ -17,10 +17,10 @@ use crate::mime_db::MimeDb; /// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small /// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats /// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases. -const INITIAL_BUF_SIZE: usize = 128; +pub const INITIAL_BUF_SIZE: usize = 128; /// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes. -const BUF_SIZE: usize = 4096; +pub const BUF_SIZE: usize = 4096; /// Tries to identify the mimetype of a file from a given path. pub fn mime_type(db: &T, path: &Path) -> io::Result> { diff --git a/src/main.rs b/src/main.rs index 98d9b7a..c6c201e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,7 @@ // along with this program. If not, see . use std::io::{stdout, BufWriter}; -use std::path::{Path, PathBuf}; +use std::path::Path; use cfg_if::cfg_if; use clap::Clap; @@ -23,15 +23,15 @@ use log::{debug, error, info, trace, warn}; use once_cell::sync::OnceCell; #[cfg(feature = "multi-threaded")] use rayon::prelude::*; -use smartstring::alias::String; use walkdir::{DirEntry, WalkDir}; use crate::findings::Findings; -use crate::formats::{Format, Script, PowerShell}; +use crate::formats::{Format, PowerShell, Script}; use crate::mime_db::MimeDb; use crate::parameters::{OutputFormat, ScanOpts}; use crate::scan_error::ScanError; use env_logger::Env; +use std::ffi::OsStr; use std::process::exit; mod extension_set; @@ -130,7 +130,7 @@ fn main() { let result = match args.output_format { OutputFormat::Script => Script::new().write_all(&results, &mut buffered_stdout), OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout), - OutputFormat::Text => todo!() + OutputFormat::Text => todo!(), }; if result.is_err() { @@ -188,17 +188,12 @@ fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool { return false; } - exts.contains(&ext.unwrap().to_lowercase().as_str()) + exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str()) } /// Given a file path, returns its extension, using [`std::path::Path::extension`]. -/// -/// The extension is currently [converted to a lossy string](std::ffi::OsStr::to_string_lossy), although it will -/// (eventually) in future return an `OsStr` instead. -// TODO: ↑ -fn extension_from_path(path: &Path) -> Option { - path.extension(). // Get the path's extension - map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String +fn extension_from_path(path: &Path) -> Option<&OsStr> { + path.extension() } /// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure. @@ -232,13 +227,13 @@ fn scan_file(entry: &DirEntry) -> Result { let valid = match known_exts { // there is a known set of extensions for this mimetype, and the file has an extension - Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_lowercase().into()), + Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()), // either this file has no extension, or there is no known set of extensions for this mimetype :( Some(_) | None => false, }; Ok(Findings { - file: entry.path().to_path_buf(), + file: entry.path(), valid, mime: result, }) @@ -268,7 +263,7 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec> { /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of /// [DirEntry]s. -fn scan_directory(dirs: &PathBuf, exts: &[&str], scan_opts: &ScanOpts) -> Option> { +fn scan_directory(dirs: &Path, exts: &[&str], scan_opts: &ScanOpts) -> Option> { let stepper = WalkDir::new(dirs).into_iter(); let mut probably_fatal_error = false; let entries: Vec = stepper diff --git a/src/parameters.rs b/src/parameters.rs index b6dd6ae..3366f72 100644 --- a/src/parameters.rs +++ b/src/parameters.rs @@ -3,9 +3,9 @@ use std::path::PathBuf; use crate::extension_set::ExtensionSet; +use cfg_if::cfg_if; use clap::{AppSettings, Clap}; use smartstring::{LazyCompact, SmartString}; -use cfg_if::cfg_if; cfg_if! { if #[cfg(windows)] { diff --git a/src/tests/mod.rs b/src/tests/mod.rs index d1dfd4a..9abdc9f 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -1,5 +1,5 @@ -use crate::inspectors::mime_extension_lookup; -use crate::mime_db::*; +use crate::inspectors::{mime_extension_lookup, BUF_SIZE}; +use crate::mime_db::{MimeDb, XdgDb}; use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir}; use crate::parameters::{Parameters, ScanOpts}; @@ -7,7 +7,9 @@ use cfg_if::cfg_if; use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use mime_guess::Mime; use smartstring::alias::String; +use std::borrow::Borrow; use std::collections::HashMap; +use std::ffi::OsStr; use std::path::Path; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; @@ -34,11 +36,11 @@ fn application_zip() -> Mime { #[test] fn get_ext() { - let mut ext_checks = HashMap::new(); - ext_checks.insert(Path::new("test.txt"), Some(String::from("txt"))); - ext_checks.insert(Path::new("test.zip"), Some(String::from("zip"))); - ext_checks.insert(Path::new("test.tar.gz"), Some(String::from("gz"))); - ext_checks.insert(Path::new("test."), Some(String::from(""))); + let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new(); + ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt"))); + ext_checks.insert(Path::new("test.zip"), Some(OsStr::new("zip"))); + ext_checks.insert(Path::new("test.tar.gz"), Some(OsStr::new("gz"))); + ext_checks.insert(Path::new("test."), Some(OsStr::new(""))); ext_checks.insert(Path::new("test"), None); ext_checks.insert(Path::new(".hidden"), None); @@ -102,7 +104,7 @@ fn simple_directory() { let entries = scan_directory( &dir.path().to_path_buf(), - &vec!["jpg", "jpeg", "png", "pdf", "zip"], + &["jpg", "jpeg", "png", "pdf", "zip"], &scan_opts, ) .expect("Directory scan failed."); @@ -119,30 +121,32 @@ fn simple_directory() { if !result.valid { // this should be "wrong.jpg", which is a misnamed png file // 1. ensure extension is "png" - assert_eq!(extension_from_path(&*result.file).unwrap(), String::from("jpg")); + assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg")); // 2. ensure mime type detected is IMAGE_PNG assert_eq!(result.mime, IMAGE_PNG); // 3. ensure recommended extension is in the list of known extensions for PNG files assert!(mime_extension_lookup(IMAGE_PNG) .unwrap() - .contains(&result.recommended_extension().unwrap().into())); + .contains(&result.recommended_extension().unwrap())); continue; } // check if the recommended extension for this file is in the list of known extensions for its mimetype assert!(mime_extension_lookup(result.mime.clone()) .unwrap() - .contains(&result.recommended_extension().unwrap().into())); + .contains(&result.recommended_extension().unwrap())); // make sure the guessed mimetype is correct based on the extension of the scanned file + let ext = extension_from_path(result.file); + assert!(ext.is_some()); assert_eq!( result.mime, - match extension_from_path(&*result.file).as_deref() { - Some("jpg") | Some("jpeg") => IMAGE_JPEG, - Some("png") => IMAGE_PNG, - Some("pdf") => APPLICATION_PDF, - Some("zip") => application_zip(), - Some(_) | None => APPLICATION_OCTET_STREAM, // general "fallback" type + match ext.unwrap().to_string_lossy().borrow() { + "jpg" | "jpeg" => IMAGE_JPEG, + "png" => IMAGE_PNG, + "pdf" => APPLICATION_PDF, + "zip" => application_zip(), + _ => APPLICATION_OCTET_STREAM, // general "fallback" type } ); } @@ -162,3 +166,29 @@ fn argument_parsing() { // exts should be none assert!(args.exts.is_none()); } + +#[test] +fn rejects_bad_args() { + use clap::Clap; + assert!(Parameters::try_parse_from(vec!["fif", "-abcdefg", "-E", "-e"]).is_err()); +} + +#[test] +fn identify_random_bytes() { + let db = get_mime_db(); + let rng = fastrand::Rng::new(); + let mut bytes: Vec; + let mut results: HashMap = HashMap::new(); + + for _ in 1..500 { + bytes = std::iter::repeat_with(|| rng.u8(..)).take(BUF_SIZE * 2).collect(); + if let Some(detected_type) = db.get_type(&*bytes) { + *results.entry(detected_type).or_insert(0) += 1; + } + } + + for (mime, count) in &results { + println!("{}:\t{} counts", mime, count); + } + println!("No type found:\t{} counts", 500 - results.values().sum::()) +}