Compare commits
3 commits
62562a0b69
...
7115692f9d
Author | SHA1 | Date | |
---|---|---|---|
7115692f9d | |||
129aa83ade | |||
40a90308a5 |
9 changed files with 196 additions and 65 deletions
29
Cargo.lock
generated
29
Cargo.lock
generated
|
@ -166,15 +166,25 @@ version = "1.1.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
|
checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastrand"
|
||||||
|
version = "1.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ca5faf057445ce5c9d4329e382b2ce7ca38550ef3b73a5348362d5f24e0c7fe3"
|
||||||
|
dependencies = [
|
||||||
|
"instant",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fif"
|
name = "fif"
|
||||||
version = "0.2.9"
|
version = "0.2.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cached",
|
"cached",
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"clap",
|
"clap",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"exitcode",
|
"exitcode",
|
||||||
|
"fastrand",
|
||||||
"infer",
|
"infer",
|
||||||
"log",
|
"log",
|
||||||
"mime_guess",
|
"mime_guess",
|
||||||
|
@ -244,6 +254,15 @@ version = "0.3.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "0803735b9511d0956c68902a6513ca867819d6e43397adb6a5e903e2f09db734"
|
checksum = "0803735b9511d0956c68902a6513ca867819d6e43397adb6a5e903e2f09db734"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "instant"
|
||||||
|
version = "0.1.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lazy_static"
|
name = "lazy_static"
|
||||||
version = "1.4.0"
|
version = "1.4.0"
|
||||||
|
@ -265,9 +284,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.89"
|
version = "0.2.91"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "538c092e5586f4cdd7dd8078c4a79220e3e168880218124dcbce860f0ea938c6"
|
checksum = "8916b1f6ca17130ec6568feccee27c156ad12037880833a3b842a823236502e7"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
|
@ -650,9 +669,9 @@ checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "2.3.1"
|
version = "2.3.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d"
|
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"same-file",
|
"same-file",
|
||||||
"winapi",
|
"winapi",
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
name = "fif"
|
name = "fif"
|
||||||
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
||||||
version = "0.2.9"
|
version = "0.2.10"
|
||||||
authors = ["Lynnesbian <lynne@bune.city>"]
|
authors = ["Lynnesbian <lynne@bune.city>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "GPL-3.0-or-later"
|
license = "GPL-3.0-or-later"
|
||||||
|
@ -58,6 +58,7 @@ default-features = false
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.2.0"
|
tempfile = "3.2.0"
|
||||||
|
fastrand = "1.4.0"
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
lto = "thin"
|
lto = "thin"
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
fd -e rs -x touch {}
|
fd -e rs -x touch {}
|
||||||
cargo clippy -- \
|
cargo clippy --tests -- \
|
||||||
-W clippy::nursery \
|
-W clippy::nursery \
|
||||||
-W clippy::perf \
|
-W clippy::perf \
|
||||||
-W clippy::pedantic \
|
-W clippy::pedantic \
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use std::path::PathBuf;
|
use std::path::Path;
|
||||||
|
|
||||||
use mime_guess::Mime;
|
use mime_guess::Mime;
|
||||||
use smartstring::alias::String;
|
use smartstring::alias::String;
|
||||||
|
@ -6,16 +6,16 @@ use smartstring::alias::String;
|
||||||
use crate::inspectors::mime_extension_lookup;
|
use crate::inspectors::mime_extension_lookup;
|
||||||
|
|
||||||
/// Information about a scanned file.
|
/// Information about a scanned file.
|
||||||
pub struct Findings {
|
pub struct Findings<'a> {
|
||||||
/// The location of the scanned file.
|
/// The location of the scanned file.
|
||||||
pub file: PathBuf, // TODO: replace with Path???? <'a> and all that
|
pub file: &'a Path,
|
||||||
/// Whether or not the file's extension is valid for its mimetype.
|
/// Whether or not the file's extension is valid for its mimetype.
|
||||||
pub valid: bool,
|
pub valid: bool,
|
||||||
/// The file's mimetype.
|
/// The file's mimetype.
|
||||||
pub mime: Mime,
|
pub mime: Mime,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Findings {
|
impl<'a> Findings<'a> {
|
||||||
pub fn recommended_extension(&self) -> Option<String> {
|
pub fn recommended_extension(&self) -> Option<String> {
|
||||||
mime_extension_lookup(self.mime.clone()).map(|extensions| extensions[0].to_owned())
|
mime_extension_lookup(self.mime.clone()).map(|extensions| extensions[0].to_owned())
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,12 +9,13 @@ use snailquote::escape;
|
||||||
|
|
||||||
use crate::scan_error::ScanError;
|
use crate::scan_error::ScanError;
|
||||||
use crate::{Findings, BACKEND};
|
use crate::{Findings, BACKEND};
|
||||||
|
use std::ffi::OsStr;
|
||||||
|
|
||||||
/// The current version of fif, as defined in Cargo.toml.
|
/// The current version of fif, as defined in Cargo.toml.
|
||||||
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
|
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
type Entries<'a> = [Result<Findings, ScanError<'a>>];
|
type Entries<'a> = [Result<Findings<'a>, ScanError<'a>>];
|
||||||
|
|
||||||
enum Writable<'a> {
|
enum Writable<'a> {
|
||||||
String(&'a str),
|
String(&'a str),
|
||||||
|
@ -36,6 +37,12 @@ impl<'a> From<&'a Path> for Writable<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> From<&'a OsStr> for Writable<'a> {
|
||||||
|
fn from(p: &'a OsStr) -> Writable<'a> {
|
||||||
|
Writable::Path(p.as_ref())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
|
fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
|
||||||
// ehhhh
|
// ehhhh
|
||||||
for writeable in writeables {
|
for writeable in writeables {
|
||||||
|
@ -47,13 +54,15 @@ fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
|
||||||
if let Some(string) = path.to_str() {
|
if let Some(string) = path.to_str() {
|
||||||
write!(f, "{}", escape(string))?
|
write!(f, "{}", escape(string))?
|
||||||
} else {
|
} else {
|
||||||
write!(f, "'''")?;
|
write!(f, "'")?;
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
f.write_all(&*path.as_os_str().as_bytes())?;
|
f.write_all(&*path.as_os_str().as_bytes())?;
|
||||||
#[cfg(windows)]
|
// TODO: implement bonked strings for windows
|
||||||
write!(f, "{}", path.as_os_str().to_string_lossy())?; // TODO: implement bonked strings for windows
|
// something like:
|
||||||
// f.write_all(&*path.as_os_str().encode_wide().collect::<Vec<u16>>())?;
|
// f.write_all(&*path.as_os_str().encode_wide().collect::<Vec<u16>>())?;
|
||||||
write!(f, "'''")?
|
#[cfg(windows)]
|
||||||
|
write!(f, "{}", path.as_os_str().to_string_lossy())?;
|
||||||
|
write!(f, "'")?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -78,9 +87,9 @@ pub trait Format {
|
||||||
match entry {
|
match entry {
|
||||||
Ok(finding) => {
|
Ok(finding) => {
|
||||||
if let Some(ext) = finding.recommended_extension() {
|
if let Some(ext) = finding.recommended_extension() {
|
||||||
self.rename(f, &finding.file, &finding.file.with_extension(ext.as_str()))?
|
self.rename(f, finding.file, &finding.file.with_extension(ext.as_str()))?
|
||||||
} else {
|
} else {
|
||||||
self.no_known_extension(f, &finding.file)?
|
self.no_known_extension(f, finding.file)?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,7 +109,6 @@ pub trait Format {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: maybe make a batch script version for windows
|
|
||||||
/// Bourne-Shell compatible script.
|
/// Bourne-Shell compatible script.
|
||||||
pub struct Script {}
|
pub struct Script {}
|
||||||
|
|
||||||
|
@ -158,3 +166,66 @@ impl Format for Script {
|
||||||
writeln!(f, "\necho 'Done.'")
|
writeln!(f, "\necho 'Done.'")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// PowerShell script.
|
||||||
|
pub struct PowerShell {}
|
||||||
|
|
||||||
|
impl Format for PowerShell {
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rename<W: Write>(&self, f: &mut W, from: &Path, to: &Path) -> io::Result<()> {
|
||||||
|
// unfortunately there doesn't seem to be an equivalent of sh's `mv -i` -- passing the '-Confirm' flag will prompt
|
||||||
|
// the user to confirm every single rename, and using Move-Item -Force will always overwrite without prompting.
|
||||||
|
// there doesn't seem to be a way to rename the file, prompting only if the target already exists.
|
||||||
|
smart_write(
|
||||||
|
f,
|
||||||
|
&[
|
||||||
|
"Rename-Item -Path ".into(),
|
||||||
|
from.into(),
|
||||||
|
" -NewName ".into(),
|
||||||
|
to.file_name().unwrap().into(),
|
||||||
|
Writable::Newline,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||||
|
smart_write(
|
||||||
|
f,
|
||||||
|
&[
|
||||||
|
"Write-Output @'\nNo known extension for ".into(),
|
||||||
|
path.into(),
|
||||||
|
"\n'@".into(),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unreadable<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||||
|
smart_write(
|
||||||
|
f,
|
||||||
|
&["Write-Output @'\nFailed to read ".into(), path.into(), "\n'@".into()],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unknown_type<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||||
|
smart_write(
|
||||||
|
f,
|
||||||
|
&["<# Failed to detect mime type for ".into(), path.into(), "#>".into()],
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn header<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> {
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"#!/usr/bin/env pwsh\n# Generated by fif {} ({} backend)",
|
||||||
|
VERSION.unwrap_or("???"),
|
||||||
|
BACKEND
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn footer<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> {
|
||||||
|
writeln!(f, "\nWrite-Output 'Done!'")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -17,10 +17,10 @@ use crate::mime_db::MimeDb;
|
||||||
/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small
|
/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small
|
||||||
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
|
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
|
||||||
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
|
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
|
||||||
const INITIAL_BUF_SIZE: usize = 128;
|
pub const INITIAL_BUF_SIZE: usize = 128;
|
||||||
|
|
||||||
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes.
|
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes.
|
||||||
const BUF_SIZE: usize = 4096;
|
pub const BUF_SIZE: usize = 4096;
|
||||||
|
|
||||||
/// Tries to identify the mimetype of a file from a given path.
|
/// Tries to identify the mimetype of a file from a given path.
|
||||||
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
||||||
|
|
39
src/main.rs
39
src/main.rs
|
@ -15,7 +15,7 @@
|
||||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
use std::io::{stdout, BufWriter};
|
use std::io::{stdout, BufWriter};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
|
|
||||||
use cfg_if::cfg_if;
|
use cfg_if::cfg_if;
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
|
@ -23,15 +23,15 @@ use log::{debug, error, info, trace, warn};
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
#[cfg(feature = "multi-threaded")]
|
#[cfg(feature = "multi-threaded")]
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use smartstring::alias::String;
|
|
||||||
use walkdir::{DirEntry, WalkDir};
|
use walkdir::{DirEntry, WalkDir};
|
||||||
|
|
||||||
use crate::findings::Findings;
|
use crate::findings::Findings;
|
||||||
use crate::formats::{Format, Script};
|
use crate::formats::{Format, PowerShell, Script};
|
||||||
use crate::mime_db::MimeDb;
|
use crate::mime_db::MimeDb;
|
||||||
use crate::parameters::{OutputFormat, ScanOpts};
|
use crate::parameters::{OutputFormat, ScanOpts};
|
||||||
use crate::scan_error::ScanError;
|
use crate::scan_error::ScanError;
|
||||||
use env_logger::Env;
|
use env_logger::Env;
|
||||||
|
use std::ffi::OsStr;
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
|
|
||||||
mod extension_set;
|
mod extension_set;
|
||||||
|
@ -125,16 +125,18 @@ fn main() {
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
match args.output_format {
|
let mut buffered_stdout = BufWriter::new(stdout());
|
||||||
OutputFormat::Script => {
|
|
||||||
let s = Script::new();
|
let result = match args.output_format {
|
||||||
if s.write_all(&results, &mut BufWriter::new(stdout().lock())).is_err() {
|
OutputFormat::Script => Script::new().write_all(&results, &mut buffered_stdout),
|
||||||
|
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
|
||||||
|
OutputFormat::Text => todo!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if result.is_err() {
|
||||||
error!("Failed to write to stdout.");
|
error!("Failed to write to stdout.");
|
||||||
exit(exitcode::IOERR);
|
exit(exitcode::IOERR);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
OutputFormat::Text => todo!(),
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("Done");
|
debug!("Done");
|
||||||
}
|
}
|
||||||
|
@ -186,17 +188,12 @@ fn wanted_file(entry: &DirEntry, exts: &[&str], scan_opts: &ScanOpts) -> bool {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
exts.contains(&ext.unwrap().to_lowercase().as_str())
|
exts.contains(&ext.unwrap().to_string_lossy().to_lowercase().as_str())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a file path, returns its extension, using [`std::path::Path::extension`].
|
/// Given a file path, returns its extension, using [`std::path::Path::extension`].
|
||||||
///
|
fn extension_from_path(path: &Path) -> Option<&OsStr> {
|
||||||
/// The extension is currently [converted to a lossy string](std::ffi::OsStr::to_string_lossy), although it will
|
path.extension()
|
||||||
/// (eventually) in future return an `OsStr` instead.
|
|
||||||
// TODO: ↑
|
|
||||||
fn extension_from_path(path: &Path) -> Option<String> {
|
|
||||||
path.extension(). // Get the path's extension
|
|
||||||
map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure.
|
/// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure.
|
||||||
|
@ -230,13 +227,13 @@ fn scan_file(entry: &DirEntry) -> Result<Findings, ScanError> {
|
||||||
|
|
||||||
let valid = match known_exts {
|
let valid = match known_exts {
|
||||||
// there is a known set of extensions for this mimetype, and the file has an extension
|
// there is a known set of extensions for this mimetype, and the file has an extension
|
||||||
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_lowercase().into()),
|
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()),
|
||||||
// either this file has no extension, or there is no known set of extensions for this mimetype :(
|
// either this file has no extension, or there is no known set of extensions for this mimetype :(
|
||||||
Some(_) | None => false,
|
Some(_) | None => false,
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Findings {
|
Ok(Findings {
|
||||||
file: entry.path().to_path_buf(),
|
file: entry.path(),
|
||||||
valid,
|
valid,
|
||||||
mime: result,
|
mime: result,
|
||||||
})
|
})
|
||||||
|
@ -266,7 +263,7 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
|
||||||
|
|
||||||
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
||||||
/// [DirEntry]s.
|
/// [DirEntry]s.
|
||||||
fn scan_directory(dirs: &PathBuf, exts: &[&str], scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
|
fn scan_directory(dirs: &Path, exts: &[&str], scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
|
||||||
let stepper = WalkDir::new(dirs).into_iter();
|
let stepper = WalkDir::new(dirs).into_iter();
|
||||||
let mut probably_fatal_error = false;
|
let mut probably_fatal_error = false;
|
||||||
let entries: Vec<DirEntry> = stepper
|
let entries: Vec<DirEntry> = stepper
|
||||||
|
|
|
@ -3,13 +3,26 @@
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use crate::extension_set::ExtensionSet;
|
use crate::extension_set::ExtensionSet;
|
||||||
|
use cfg_if::cfg_if;
|
||||||
use clap::{AppSettings, Clap};
|
use clap::{AppSettings, Clap};
|
||||||
use smartstring::{LazyCompact, SmartString};
|
use smartstring::{LazyCompact, SmartString};
|
||||||
|
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(windows)] {
|
||||||
|
const DEFAULT_FORMAT: &str = "powershell";
|
||||||
|
} else {
|
||||||
|
const DEFAULT_FORMAT: &str = "script";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clap, PartialEq, Debug)]
|
#[derive(Clap, PartialEq, Debug)]
|
||||||
pub enum OutputFormat {
|
pub enum OutputFormat {
|
||||||
/// A Bourne shell compatible script.
|
/// A Bourne shell compatible script.
|
||||||
Script,
|
Script,
|
||||||
|
/// A PowerShell script.
|
||||||
|
PowerShell,
|
||||||
|
/// Also a PowerShell script, with different casing to allow for `fif -o powershell`.
|
||||||
|
Powershell,
|
||||||
/// Plain text.
|
/// Plain text.
|
||||||
Text,
|
Text,
|
||||||
}
|
}
|
||||||
|
@ -52,7 +65,7 @@ pub struct Parameters {
|
||||||
pub scan_extensionless: bool,
|
pub scan_extensionless: bool,
|
||||||
|
|
||||||
/// Output format to use
|
/// Output format to use
|
||||||
#[clap(short, long, default_value = "script", arg_enum)]
|
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
|
||||||
pub output_format: OutputFormat,
|
pub output_format: OutputFormat,
|
||||||
|
|
||||||
/// Directory to process
|
/// Directory to process
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
use crate::inspectors::mime_extension_lookup;
|
use crate::inspectors::{mime_extension_lookup, BUF_SIZE};
|
||||||
use crate::mime_db::*;
|
|
||||||
use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir};
|
use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir};
|
||||||
|
|
||||||
use crate::parameters::{Parameters, ScanOpts};
|
use crate::parameters::{Parameters, ScanOpts};
|
||||||
|
use crate::mime_db::MimeDb;
|
||||||
use cfg_if::cfg_if;
|
use cfg_if::cfg_if;
|
||||||
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
||||||
use mime_guess::Mime;
|
use mime_guess::Mime;
|
||||||
use smartstring::alias::String;
|
use smartstring::alias::String;
|
||||||
|
use std::borrow::Borrow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::ffi::OsStr;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
|
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
|
||||||
|
@ -17,12 +19,12 @@ const ZIP_BYTES: &[u8] = b"PK\x03\x04";
|
||||||
|
|
||||||
cfg_if! {
|
cfg_if! {
|
||||||
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
||||||
fn get_mime_db() -> InferDb {
|
fn get_mime_db() -> crate::mime_db::InferDb {
|
||||||
InferDb::init()
|
crate::mime_db::InferDb::init()
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fn get_mime_db() -> XdgDb {
|
fn get_mime_db() -> crate::mime_db::XdgDb {
|
||||||
XdgDb::init()
|
crate::mime_db::XdgDb::init()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -34,11 +36,11 @@ fn application_zip() -> Mime {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn get_ext() {
|
fn get_ext() {
|
||||||
let mut ext_checks = HashMap::new();
|
let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new();
|
||||||
ext_checks.insert(Path::new("test.txt"), Some(String::from("txt")));
|
ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt")));
|
||||||
ext_checks.insert(Path::new("test.zip"), Some(String::from("zip")));
|
ext_checks.insert(Path::new("test.zip"), Some(OsStr::new("zip")));
|
||||||
ext_checks.insert(Path::new("test.tar.gz"), Some(String::from("gz")));
|
ext_checks.insert(Path::new("test.tar.gz"), Some(OsStr::new("gz")));
|
||||||
ext_checks.insert(Path::new("test."), Some(String::from("")));
|
ext_checks.insert(Path::new("test."), Some(OsStr::new("")));
|
||||||
ext_checks.insert(Path::new("test"), None);
|
ext_checks.insert(Path::new("test"), None);
|
||||||
ext_checks.insert(Path::new(".hidden"), None);
|
ext_checks.insert(Path::new(".hidden"), None);
|
||||||
|
|
||||||
|
@ -102,7 +104,7 @@ fn simple_directory() {
|
||||||
|
|
||||||
let entries = scan_directory(
|
let entries = scan_directory(
|
||||||
&dir.path().to_path_buf(),
|
&dir.path().to_path_buf(),
|
||||||
&vec!["jpg", "jpeg", "png", "pdf", "zip"],
|
&["jpg", "jpeg", "png", "pdf", "zip"],
|
||||||
&scan_opts,
|
&scan_opts,
|
||||||
)
|
)
|
||||||
.expect("Directory scan failed.");
|
.expect("Directory scan failed.");
|
||||||
|
@ -119,30 +121,32 @@ fn simple_directory() {
|
||||||
if !result.valid {
|
if !result.valid {
|
||||||
// this should be "wrong.jpg", which is a misnamed png file
|
// this should be "wrong.jpg", which is a misnamed png file
|
||||||
// 1. ensure extension is "png"
|
// 1. ensure extension is "png"
|
||||||
assert_eq!(extension_from_path(&*result.file).unwrap(), String::from("jpg"));
|
assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg"));
|
||||||
// 2. ensure mime type detected is IMAGE_PNG
|
// 2. ensure mime type detected is IMAGE_PNG
|
||||||
assert_eq!(result.mime, IMAGE_PNG);
|
assert_eq!(result.mime, IMAGE_PNG);
|
||||||
// 3. ensure recommended extension is in the list of known extensions for PNG files
|
// 3. ensure recommended extension is in the list of known extensions for PNG files
|
||||||
assert!(mime_extension_lookup(IMAGE_PNG)
|
assert!(mime_extension_lookup(IMAGE_PNG)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.contains(&result.recommended_extension().unwrap().into()));
|
.contains(&result.recommended_extension().unwrap()));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if the recommended extension for this file is in the list of known extensions for its mimetype
|
// check if the recommended extension for this file is in the list of known extensions for its mimetype
|
||||||
assert!(mime_extension_lookup(result.mime.clone())
|
assert!(mime_extension_lookup(result.mime.clone())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.contains(&result.recommended_extension().unwrap().into()));
|
.contains(&result.recommended_extension().unwrap()));
|
||||||
|
|
||||||
// make sure the guessed mimetype is correct based on the extension of the scanned file
|
// make sure the guessed mimetype is correct based on the extension of the scanned file
|
||||||
|
let ext = extension_from_path(result.file);
|
||||||
|
assert!(ext.is_some());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
result.mime,
|
result.mime,
|
||||||
match extension_from_path(&*result.file).as_deref() {
|
match ext.unwrap().to_string_lossy().borrow() {
|
||||||
Some("jpg") | Some("jpeg") => IMAGE_JPEG,
|
"jpg" | "jpeg" => IMAGE_JPEG,
|
||||||
Some("png") => IMAGE_PNG,
|
"png" => IMAGE_PNG,
|
||||||
Some("pdf") => APPLICATION_PDF,
|
"pdf" => APPLICATION_PDF,
|
||||||
Some("zip") => application_zip(),
|
"zip" => application_zip(),
|
||||||
Some(_) | None => APPLICATION_OCTET_STREAM, // general "fallback" type
|
_ => APPLICATION_OCTET_STREAM, // general "fallback" type
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -162,3 +166,29 @@ fn argument_parsing() {
|
||||||
// exts should be none
|
// exts should be none
|
||||||
assert!(args.exts.is_none());
|
assert!(args.exts.is_none());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rejects_bad_args() {
|
||||||
|
use clap::Clap;
|
||||||
|
assert!(Parameters::try_parse_from(vec!["fif", "-abcdefg", "-E", "-e"]).is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn identify_random_bytes() {
|
||||||
|
let db = get_mime_db();
|
||||||
|
let rng = fastrand::Rng::new();
|
||||||
|
let mut bytes: Vec<u8>;
|
||||||
|
let mut results: HashMap<Mime, i32> = HashMap::new();
|
||||||
|
|
||||||
|
for _ in 1..500 {
|
||||||
|
bytes = std::iter::repeat_with(|| rng.u8(..)).take(BUF_SIZE * 2).collect();
|
||||||
|
if let Some(detected_type) = db.get_type(&*bytes) {
|
||||||
|
*results.entry(detected_type).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (mime, count) in &results {
|
||||||
|
println!("{}:\t{} counts", mime, count);
|
||||||
|
}
|
||||||
|
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>())
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue