lots of stuff! and a version bump!! 0u0
All checks were successful
continuous-integration/drone/tag Build is passing

- extension_from_path now returns Option<&OsStr>
- two new tests
- Findings uses a Path instead of a PathBuf, reducing allocations
- some unnecessary stuff removed, thanks clippy
- that is all
This commit is contained in:
Lynne Megido 2021-03-26 04:46:07 +10:00
parent 40a90308a5
commit 129aa83ade
Signed by: lynnesbian
GPG Key ID: F0A184B5213D9F90
9 changed files with 97 additions and 49 deletions

21
Cargo.lock generated
View File

@ -166,15 +166,25 @@ version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193" checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
[[package]]
name = "fastrand"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca5faf057445ce5c9d4329e382b2ce7ca38550ef3b73a5348362d5f24e0c7fe3"
dependencies = [
"instant",
]
[[package]] [[package]]
name = "fif" name = "fif"
version = "0.2.9" version = "0.2.10"
dependencies = [ dependencies = [
"cached", "cached",
"cfg-if", "cfg-if",
"clap", "clap",
"env_logger", "env_logger",
"exitcode", "exitcode",
"fastrand",
"infer", "infer",
"log", "log",
"mime_guess", "mime_guess",
@ -244,6 +254,15 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0803735b9511d0956c68902a6513ca867819d6e43397adb6a5e903e2f09db734" checksum = "0803735b9511d0956c68902a6513ca867819d6e43397adb6a5e903e2f09db734"
[[package]]
name = "instant"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
dependencies = [
"cfg-if",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"

View File

@ -1,7 +1,7 @@
[package] [package]
name = "fif" name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.2.9" version = "0.2.10"
authors = ["Lynnesbian <lynne@bune.city>"] authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018" edition = "2018"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
@ -58,6 +58,7 @@ default-features = false
[dev-dependencies] [dev-dependencies]
tempfile = "3.2.0" tempfile = "3.2.0"
fastrand = "1.4.0"
[profile.release] [profile.release]
lto = "thin" lto = "thin"

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
fd -e rs -x touch {} fd -e rs -x touch {}
cargo clippy -- \ cargo clippy --tests -- \
-W clippy::nursery \ -W clippy::nursery \
-W clippy::perf \ -W clippy::perf \
-W clippy::pedantic \ -W clippy::pedantic \

View File

@ -1,4 +1,4 @@
use std::path::PathBuf; use std::path::Path;
use mime_guess::Mime; use mime_guess::Mime;
use smartstring::alias::String; use smartstring::alias::String;
@ -6,16 +6,16 @@ use smartstring::alias::String;
use crate::inspectors::mime_extension_lookup; use crate::inspectors::mime_extension_lookup;
/// Information about a scanned file. /// Information about a scanned file.
pub struct Findings { pub struct Findings<'a> {
/// The location of the scanned file. /// The location of the scanned file.
pub file: PathBuf, // TODO: replace with Path???? <'a> and all that pub file: &'a Path,
/// Whether or not the file's extension is valid for its mimetype. /// Whether or not the file's extension is valid for its mimetype.
pub valid: bool, pub valid: bool,
/// The file's mimetype. /// The file's mimetype.
pub mime: Mime, pub mime: Mime,
} }
impl Findings { impl<'a> Findings<'a> {
pub fn recommended_extension(&self) -> Option<String> { pub fn recommended_extension(&self) -> Option<String> {
mime_extension_lookup(self.mime.clone()).map(|extensions| extensions[0].to_owned()) mime_extension_lookup(self.mime.clone()).map(|extensions| extensions[0].to_owned())
} }

View File

@ -1,8 +1,8 @@
//! The various formats that [fif](crate) can output to. //! The various formats that [fif](crate) can output to.
use std::io::{self, Write};
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
use std::io::{self, Write};
use std::path::Path; use std::path::Path;
use snailquote::escape; use snailquote::escape;
@ -15,7 +15,7 @@ use std::ffi::OsStr;
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
#[doc(hidden)] #[doc(hidden)]
type Entries<'a> = [Result<Findings, ScanError<'a>>]; type Entries<'a> = [Result<Findings<'a>, ScanError<'a>>];
enum Writable<'a> { enum Writable<'a> {
String(&'a str), String(&'a str),
@ -87,9 +87,9 @@ pub trait Format {
match entry { match entry {
Ok(finding) => { Ok(finding) => {
if let Some(ext) = finding.recommended_extension() { if let Some(ext) = finding.recommended_extension() {
self.rename(f, &finding.file, &finding.file.with_extension(ext.as_str()))? self.rename(f, finding.file, &finding.file.with_extension(ext.as_str()))?
} else { } else {
self.no_known_extension(f, &finding.file)? self.no_known_extension(f, finding.file)?
} }
} }
@ -109,7 +109,6 @@ pub trait Format {
} }
} }
// TODO: maybe make a batch script version for windows
/// Bourne-Shell compatible script. /// Bourne-Shell compatible script.
pub struct Script {} pub struct Script {}
@ -195,7 +194,11 @@ impl Format for PowerShell {
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> { fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
smart_write( smart_write(
f, f,
&["Write-Output @'\nNo known extension for ".into(), path.into(), "\n'@".into()], &[
"Write-Output @'\nNo known extension for ".into(),
path.into(),
"\n'@".into(),
],
) )
} }
@ -225,4 +228,4 @@ impl Format for PowerShell {
fn footer<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> { fn footer<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> {
writeln!(f, "\nWrite-Output 'Done!'") writeln!(f, "\nWrite-Output 'Done!'")
} }
} }

View File

@ -17,10 +17,10 @@ use crate::mime_db::MimeDb;
/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small /// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats /// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases. /// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
const INITIAL_BUF_SIZE: usize = 128; pub const INITIAL_BUF_SIZE: usize = 128;
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes. /// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes.
const BUF_SIZE: usize = 4096; pub const BUF_SIZE: usize = 4096;
/// Tries to identify the mimetype of a file from a given path. /// Tries to identify the mimetype of a file from a given path.
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> { pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {

View File

@ -15,7 +15,7 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>. // along with this program. If not, see <https://www.gnu.org/licenses/>.
use std::io::{stdout, BufWriter}; use std::io::{stdout, BufWriter};
use std::path::{Path, PathBuf}; use std::path::Path;
use cfg_if::cfg_if; use cfg_if::cfg_if;
use clap::Clap; use clap::Clap;
@ -23,15 +23,15 @@ use log::{debug, error, info, trace, warn};
use once_cell::sync::OnceCell; use once_cell::sync::OnceCell;
#[cfg(feature = "multi-threaded")] #[cfg(feature = "multi-threaded")]
use rayon::prelude::*; use rayon::prelude::*;
use smartstring::alias::String;
use walkdir::{DirEntry, WalkDir}; use walkdir::{DirEntry, WalkDir};
use crate::findings::Findings; use crate::findings::Findings;
use crate::formats::{Format, Script, PowerShell}; use crate::formats::{Format, PowerShell, Script};
use crate::mime_db::MimeDb; use crate::mime_db::MimeDb;
use crate::parameters::{OutputFormat, ScanOpts}; use crate::parameters::{OutputFormat, ScanOpts};
use crate::scan_error::ScanError; use crate::scan_error::ScanError;
use env_logger::Env; use env_logger::Env;
use std::ffi::OsStr;
use std::process::exit; use std::process::exit;
mod extension_set; mod extension_set;
@ -130,7 +130,7 @@ fn main() {
let result = match args.output_format { let result = match args.output_format {
OutputFormat::Script => Script::new().write_all(&results, &mut buffered_stdout), OutputFormat::Script => Script::new().write_all(&results, &mut buffered_stdout),
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout), OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
OutputFormat::Text => todo!() OutputFormat::Text => todo!(),
}; };
if result.is_err() { if result.is_err() {
@ -188,17 +188,12 @@ fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool {
return false; return false;
} }
exts.contains(&ext.unwrap().to_lowercase().as_str()) exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str())
} }
/// Given a file path, returns its extension, using [`std::path::Path::extension`]. /// Given a file path, returns its extension, using [`std::path::Path::extension`].
/// fn extension_from_path(path: &Path) -> Option<&OsStr> {
/// The extension is currently [converted to a lossy string](std::ffi::OsStr::to_string_lossy), although it will path.extension()
/// (eventually) in future return an `OsStr` instead.
// TODO: ↑
fn extension_from_path(path: &Path) -> Option<String> {
path.extension(). // Get the path's extension
map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String
} }
/// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure. /// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure.
@ -232,13 +227,13 @@ fn scan_file(entry: &DirEntry) -> Result<Findings, ScanError> {
let valid = match known_exts { let valid = match known_exts {
// there is a known set of extensions for this mimetype, and the file has an extension // there is a known set of extensions for this mimetype, and the file has an extension
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_lowercase().into()), Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()),
// either this file has no extension, or there is no known set of extensions for this mimetype :( // either this file has no extension, or there is no known set of extensions for this mimetype :(
Some(_) | None => false, Some(_) | None => false,
}; };
Ok(Findings { Ok(Findings {
file: entry.path().to_path_buf(), file: entry.path(),
valid, valid,
mime: result, mime: result,
}) })
@ -268,7 +263,7 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
/// [DirEntry]s. /// [DirEntry]s.
fn scan_directory(dirs: &PathBuf, exts: &[&str], scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> { fn scan_directory(dirs: &Path, exts: &[&str], scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
let stepper = WalkDir::new(dirs).into_iter(); let stepper = WalkDir::new(dirs).into_iter();
let mut probably_fatal_error = false; let mut probably_fatal_error = false;
let entries: Vec<DirEntry> = stepper let entries: Vec<DirEntry> = stepper

View File

@ -3,9 +3,9 @@
use std::path::PathBuf; use std::path::PathBuf;
use crate::extension_set::ExtensionSet; use crate::extension_set::ExtensionSet;
use cfg_if::cfg_if;
use clap::{AppSettings, Clap}; use clap::{AppSettings, Clap};
use smartstring::{LazyCompact, SmartString}; use smartstring::{LazyCompact, SmartString};
use cfg_if::cfg_if;
cfg_if! { cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {

View File

@ -1,5 +1,5 @@
use crate::inspectors::mime_extension_lookup; use crate::inspectors::{mime_extension_lookup, BUF_SIZE};
use crate::mime_db::*; use crate::mime_db::{MimeDb, XdgDb};
use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir}; use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir};
use crate::parameters::{Parameters, ScanOpts}; use crate::parameters::{Parameters, ScanOpts};
@ -7,7 +7,9 @@ use cfg_if::cfg_if;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime; use mime_guess::Mime;
use smartstring::alias::String; use smartstring::alias::String;
use std::borrow::Borrow;
use std::collections::HashMap; use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::Path; use std::path::Path;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
@ -34,11 +36,11 @@ fn application_zip() -> Mime {
#[test] #[test]
fn get_ext() { fn get_ext() {
let mut ext_checks = HashMap::new(); let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new();
ext_checks.insert(Path::new("test.txt"), Some(String::from("txt"))); ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt")));
ext_checks.insert(Path::new("test.zip"), Some(String::from("zip"))); ext_checks.insert(Path::new("test.zip"), Some(OsStr::new("zip")));
ext_checks.insert(Path::new("test.tar.gz"), Some(String::from("gz"))); ext_checks.insert(Path::new("test.tar.gz"), Some(OsStr::new("gz")));
ext_checks.insert(Path::new("test."), Some(String::from(""))); ext_checks.insert(Path::new("test."), Some(OsStr::new("")));
ext_checks.insert(Path::new("test"), None); ext_checks.insert(Path::new("test"), None);
ext_checks.insert(Path::new(".hidden"), None); ext_checks.insert(Path::new(".hidden"), None);
@ -102,7 +104,7 @@ fn simple_directory() {
let entries = scan_directory( let entries = scan_directory(
&dir.path().to_path_buf(), &dir.path().to_path_buf(),
&vec!["jpg", "jpeg", "png", "pdf", "zip"], &["jpg", "jpeg", "png", "pdf", "zip"],
&scan_opts, &scan_opts,
) )
.expect("Directory scan failed."); .expect("Directory scan failed.");
@ -119,30 +121,32 @@ fn simple_directory() {
if !result.valid { if !result.valid {
// this should be "wrong.jpg", which is a misnamed png file // this should be "wrong.jpg", which is a misnamed png file
// 1. ensure extension is "png" // 1. ensure extension is "png"
assert_eq!(extension_from_path(&*result.file).unwrap(), String::from("jpg")); assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg"));
// 2. ensure mime type detected is IMAGE_PNG // 2. ensure mime type detected is IMAGE_PNG
assert_eq!(result.mime, IMAGE_PNG); assert_eq!(result.mime, IMAGE_PNG);
// 3. ensure recommended extension is in the list of known extensions for PNG files // 3. ensure recommended extension is in the list of known extensions for PNG files
assert!(mime_extension_lookup(IMAGE_PNG) assert!(mime_extension_lookup(IMAGE_PNG)
.unwrap() .unwrap()
.contains(&result.recommended_extension().unwrap().into())); .contains(&result.recommended_extension().unwrap()));
continue; continue;
} }
// check if the recommended extension for this file is in the list of known extensions for its mimetype // check if the recommended extension for this file is in the list of known extensions for its mimetype
assert!(mime_extension_lookup(result.mime.clone()) assert!(mime_extension_lookup(result.mime.clone())
.unwrap() .unwrap()
.contains(&result.recommended_extension().unwrap().into())); .contains(&result.recommended_extension().unwrap()));
// make sure the guessed mimetype is correct based on the extension of the scanned file // make sure the guessed mimetype is correct based on the extension of the scanned file
let ext = extension_from_path(result.file);
assert!(ext.is_some());
assert_eq!( assert_eq!(
result.mime, result.mime,
match extension_from_path(&*result.file).as_deref() { match ext.unwrap().to_string_lossy().borrow() {
Some("jpg") | Some("jpeg") => IMAGE_JPEG, "jpg" | "jpeg" => IMAGE_JPEG,
Some("png") => IMAGE_PNG, "png" => IMAGE_PNG,
Some("pdf") => APPLICATION_PDF, "pdf" => APPLICATION_PDF,
Some("zip") => application_zip(), "zip" => application_zip(),
Some(_) | None => APPLICATION_OCTET_STREAM, // general "fallback" type _ => APPLICATION_OCTET_STREAM, // general "fallback" type
} }
); );
} }
@ -162,3 +166,29 @@ fn argument_parsing() {
// exts should be none // exts should be none
assert!(args.exts.is_none()); assert!(args.exts.is_none());
} }
#[test]
fn rejects_bad_args() {
use clap::Clap;
assert!(Parameters::try_parse_from(vec!["fif", "-abcdefg", "-E", "-e"]).is_err());
}
#[test]
fn identify_random_bytes() {
let db = get_mime_db();
let rng = fastrand::Rng::new();
let mut bytes: Vec<u8>;
let mut results: HashMap<Mime, i32> = HashMap::new();
for _ in 1..500 {
bytes = std::iter::repeat_with(|| rng.u8(..)).take(BUF_SIZE * 2).collect();
if let Some(detected_type) = db.get_type(&*bytes) {
*results.entry(detected_type).or_insert(0) += 1;
}
}
for (mime, count) in &results {
println!("{}:\t{} counts", mime, count);
}
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>())
}