Merge branch 'master' of https://git.bune.city/lynnesbian/fif
Conflicts: Cargo.lock
This commit is contained in:
commit
48632a471a
7 changed files with 65 additions and 42 deletions
17
CHANGELOG.md
17
CHANGELOG.md
|
@ -3,14 +3,23 @@ Dates are given in YYYY-MM-DD format.
|
||||||
|
|
||||||
## v0.2
|
## v0.2
|
||||||
### v0.2.12 (2021-???)
|
### v0.2.12 (2021-???)
|
||||||
- Much better README.md
|
- Added Apple iWork document formats to documents extension set
|
||||||
|
- Cleaned up and properly documented tests
|
||||||
|
- Renamed `Script` (in `formats.rs`) to `Shell`, in line with renaming in `paramaters.rs`
|
||||||
|
|
||||||
|
### v0.2.12 (2021-04-14)
|
||||||
|
#### Features
|
||||||
|
- Added Text extension set
|
||||||
- Better documentation for command line arguments
|
- Better documentation for command line arguments
|
||||||
|
#### Bugfixes
|
||||||
|
- Fixed a very minor output bug relating to scanning symlinked directories
|
||||||
|
- Better detection for pre-OOXML Office files
|
||||||
|
#### Other
|
||||||
|
- Much better README.md
|
||||||
- Added more stuff to test.py
|
- Added more stuff to test.py
|
||||||
- PKGBUILD for Arch-based distros
|
- PKGBUILD for Arch-based distros
|
||||||
- Added Text extension set
|
|
||||||
- More test coverage
|
- More test coverage
|
||||||
- Fixed a very minor output bug relating to scanning symlinked directories
|
- Doubled BUF_SIZE
|
||||||
- Better detection for a specific formats (pre-OOXML Office, EXE, DLL)
|
|
||||||
|
|
||||||
### v0.2.11 (2021-04-04)
|
### v0.2.11 (2021-04-04)
|
||||||
#### Features
|
#### Features
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
[package]
|
[package]
|
||||||
name = "fif"
|
name = "fif"
|
||||||
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
||||||
version = "0.2.11"
|
version = "0.2.12"
|
||||||
authors = ["Lynnesbian <lynne@bune.city>"]
|
authors = ["Lynnesbian <lynne@bune.city>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
license = "GPL-3.0-or-later"
|
license = "GPL-3.0-or-later"
|
||||||
|
@ -29,7 +29,7 @@ log = "0.4.14"
|
||||||
mime_guess = "2.0.3"
|
mime_guess = "2.0.3"
|
||||||
snailquote = "0.3.0"
|
snailquote = "0.3.0"
|
||||||
once_cell = "1.7.2"
|
once_cell = "1.7.2"
|
||||||
infer = "0.3.6"
|
infer = "0.4.0"
|
||||||
rayon = { version = "1.5.0", optional = true }
|
rayon = { version = "1.5.0", optional = true }
|
||||||
exitcode = "1.1.2"
|
exitcode = "1.1.2"
|
||||||
cfg-if = "1.0.0"
|
cfg-if = "1.0.0"
|
||||||
|
|
|
@ -37,6 +37,7 @@ impl ExtensionSet {
|
||||||
.concat(),
|
.concat(),
|
||||||
Self::Documents => vec![
|
Self::Documents => vec![
|
||||||
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
|
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
|
||||||
|
"pages", "key", "numbers",
|
||||||
],
|
],
|
||||||
Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(),
|
Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(),
|
||||||
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
|
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
|
||||||
|
|
|
@ -110,9 +110,9 @@ pub trait Format {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Bourne-Shell compatible script.
|
/// Bourne-Shell compatible script.
|
||||||
pub struct Script {}
|
pub struct Shell {}
|
||||||
|
|
||||||
impl Format for Script {
|
impl Format for Shell {
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
Self {}
|
Self {}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,16 +20,14 @@ use crate::string_type::String;
|
||||||
pub const INITIAL_BUF_SIZE: usize = 128;
|
pub const INITIAL_BUF_SIZE: usize = 128;
|
||||||
|
|
||||||
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes.
|
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes.
|
||||||
pub const BUF_SIZE: usize = 4096;
|
pub const BUF_SIZE: usize = 8192;
|
||||||
|
|
||||||
/// Tries to identify the mimetype of a file from a given path.
|
/// Tries to identify the mimetype of a file from a given path.
|
||||||
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
||||||
let mut buffer = [0; INITIAL_BUF_SIZE];
|
let mut buffer = [0; INITIAL_BUF_SIZE];
|
||||||
let mut file = File::open(path)?;
|
let mut file = File::open(path)?;
|
||||||
|
|
||||||
// this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the
|
// read a small amount to start with
|
||||||
// first few bytes for the purpose of mime sniffing
|
|
||||||
#[allow(clippy::unused_io_amount)]
|
|
||||||
file.read(&mut buffer)?;
|
file.read(&mut buffer)?;
|
||||||
|
|
||||||
let r = db.get_type(&buffer).filter(|mime|
|
let r = db.get_type(&buffer).filter(|mime|
|
||||||
|
@ -43,8 +41,7 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
||||||
// doc/ppt/xls files are a subset of what's known as an "OLE2 compound document storage", at least according to
|
// doc/ppt/xls files are a subset of what's known as an "OLE2 compound document storage", at least according to
|
||||||
// shared-mime-info. if a pre-OOXML era MS office file is scanned and identified as x-ole-storage, reading further
|
// shared-mime-info. if a pre-OOXML era MS office file is scanned and identified as x-ole-storage, reading further
|
||||||
// will allow it to be detected correctly as the appropriate filetype.
|
// will allow it to be detected correctly as the appropriate filetype.
|
||||||
&& mime != &Mime::from_str("application/x-ole-storage").unwrap()
|
&& mime != &Mime::from_str("application/x-ole-storage").unwrap());
|
||||||
);
|
|
||||||
|
|
||||||
if r.is_some() {
|
if r.is_some() {
|
||||||
return Ok(r);
|
return Ok(r);
|
||||||
|
|
|
@ -29,7 +29,7 @@ use rayon::prelude::*;
|
||||||
use walkdir::{DirEntry, WalkDir};
|
use walkdir::{DirEntry, WalkDir};
|
||||||
|
|
||||||
use crate::findings::Findings;
|
use crate::findings::Findings;
|
||||||
use crate::formats::{Format, PowerShell, Script};
|
use crate::formats::{Format, PowerShell, Shell};
|
||||||
use crate::mime_db::MimeDb;
|
use crate::mime_db::MimeDb;
|
||||||
use crate::parameters::{OutputFormat, ScanOpts};
|
use crate::parameters::{OutputFormat, ScanOpts};
|
||||||
use crate::scan_error::ScanError;
|
use crate::scan_error::ScanError;
|
||||||
|
@ -133,7 +133,7 @@ fn main() {
|
||||||
let mut buffered_stdout = BufWriter::new(stdout());
|
let mut buffered_stdout = BufWriter::new(stdout());
|
||||||
|
|
||||||
let result = match args.output_format {
|
let result = match args.output_format {
|
||||||
OutputFormat::Sh => Script::new().write_all(&results, &mut buffered_stdout),
|
OutputFormat::Sh => Shell::new().write_all(&results, &mut buffered_stdout),
|
||||||
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
|
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
|
||||||
OutputFormat::Text => todo!(),
|
OutputFormat::Text => todo!(),
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,19 +1,15 @@
|
||||||
|
use crate::findings::Findings;
|
||||||
|
use crate::formats::{Format, Shell};
|
||||||
use crate::inspectors::{mime_extension_lookup, BUF_SIZE};
|
use crate::inspectors::{mime_extension_lookup, BUF_SIZE};
|
||||||
use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir};
|
|
||||||
|
|
||||||
use crate::mime_db::MimeDb;
|
use crate::mime_db::MimeDb;
|
||||||
use crate::parameters::{Parameters, ScanOpts};
|
|
||||||
use crate::string_type::String;
|
use crate::string_type::String;
|
||||||
use cfg_if::cfg_if;
|
use crate::{extension_from_path, scan_directory, scan_from_walkdir};
|
||||||
|
|
||||||
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
||||||
use mime_guess::Mime;
|
use mime_guess::Mime;
|
||||||
|
|
||||||
use crate::findings::Findings;
|
|
||||||
use crate::formats::{Format, Script};
|
|
||||||
use std::borrow::Borrow;
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::io::Read;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
|
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
|
||||||
|
@ -21,7 +17,7 @@ const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
|
||||||
const PDF_BYTES: &[u8] = b"%PDF-";
|
const PDF_BYTES: &[u8] = b"%PDF-";
|
||||||
const ZIP_BYTES: &[u8] = b"PK\x03\x04";
|
const ZIP_BYTES: &[u8] = b"PK\x03\x04";
|
||||||
|
|
||||||
cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
||||||
fn get_mime_db() -> crate::mime_db::InferDb {
|
fn get_mime_db() -> crate::mime_db::InferDb {
|
||||||
crate::mime_db::InferDb::init()
|
crate::mime_db::InferDb::init()
|
||||||
|
@ -39,6 +35,7 @@ fn application_zip() -> Mime {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Ensure that `extension_from_path` successfully returns the extension from a set of paths.
|
||||||
fn get_ext() {
|
fn get_ext() {
|
||||||
let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new();
|
let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new();
|
||||||
ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt")));
|
ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt")));
|
||||||
|
@ -54,6 +51,7 @@ fn get_ext() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
|
||||||
fn detect_type() {
|
fn detect_type() {
|
||||||
let db = get_mime_db();
|
let db = get_mime_db();
|
||||||
assert_eq!(db.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
|
assert_eq!(db.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
|
||||||
|
@ -63,6 +61,8 @@ fn detect_type() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Ensure that `mime_extension_lookup` works as expected, and that the set of extensions for JPEG, PNG, PDF, and ZIP
|
||||||
|
/// contain "jpg", "png", "pdf", and "zip", respectively.
|
||||||
fn recommend_ext() {
|
fn recommend_ext() {
|
||||||
assert!(mime_extension_lookup(IMAGE_JPEG)
|
assert!(mime_extension_lookup(IMAGE_JPEG)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -77,11 +77,17 @@ fn recommend_ext() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their
|
||||||
|
/// associated mime types correctly deduced.
|
||||||
fn simple_directory() {
|
fn simple_directory() {
|
||||||
|
use crate::parameters::ScanOpts;
|
||||||
|
use std::borrow::Borrow;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
|
|
||||||
|
// set of files to scan. all but the last files have magic numbers corresponding to their extension, except for
|
||||||
|
// "wrong.jpg", which is actually a png.
|
||||||
let mut files = HashMap::new();
|
let mut files = HashMap::new();
|
||||||
files.insert("test.jpg", JPEG_BYTES);
|
files.insert("test.jpg", JPEG_BYTES);
|
||||||
files.insert("test.jpeg", JPEG_BYTES);
|
files.insert("test.jpeg", JPEG_BYTES);
|
||||||
|
@ -107,41 +113,39 @@ fn simple_directory() {
|
||||||
follow_symlinks: false,
|
follow_symlinks: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
let entries = scan_directory(
|
let entries = scan_directory(&dir.path().to_path_buf(), None, &scan_opts).expect("Directory scan failed.");
|
||||||
&dir.path().to_path_buf(),
|
|
||||||
Some(&vec!["jpg", "jpeg", "png", "pdf", "zip"]),
|
|
||||||
&scan_opts,
|
|
||||||
)
|
|
||||||
.expect("Directory scan failed.");
|
|
||||||
|
|
||||||
assert_eq!(entries.len(), files.len());
|
assert_eq!(entries.len(), files.len());
|
||||||
|
|
||||||
// initialise global mime DB
|
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
|
||||||
init_db();
|
crate::init_db();
|
||||||
|
|
||||||
let results = scan_from_walkdir(&entries);
|
let results = scan_from_walkdir(&entries);
|
||||||
for result in results {
|
for result in results {
|
||||||
|
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
|
||||||
let result = result.expect("Error while scanning file");
|
let result = result.expect("Error while scanning file");
|
||||||
|
|
||||||
if !result.valid {
|
if !result.valid {
|
||||||
// this should be "wrong.jpg", which is a misnamed png file
|
// the only invalid file detected should be "wrong.jpg", which is a misnamed png file
|
||||||
// 1. ensure extension is "png"
|
// 1. ensure detected extension is "jpg"
|
||||||
assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg"));
|
assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg"));
|
||||||
// 2. ensure mime type detected is IMAGE_PNG
|
// 2. ensure detected mime type is IMAGE_PNG
|
||||||
assert_eq!(result.mime, IMAGE_PNG);
|
assert_eq!(result.mime, IMAGE_PNG);
|
||||||
// 3. ensure recommended extension is in the list of known extensions for PNG files
|
// 3. ensure the recommended extension for "wrong.jpg" is "png"
|
||||||
assert!(mime_extension_lookup(IMAGE_PNG)
|
assert_eq!(&result.recommended_extension().unwrap(), &String::from("png"));
|
||||||
.unwrap()
|
|
||||||
.contains(&result.recommended_extension().unwrap()));
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if the recommended extension for this file is in the list of known extensions for its mimetype
|
// check if the recommended extension for this file is in the list of known extensions for its mimetype - for
|
||||||
|
// example, if the file is determined to be an IMAGE_PNG, its recommended extension should be one of the extensions
|
||||||
|
// returned by `mime_extension_lookup(IMAGE_PNG)`.
|
||||||
assert!(mime_extension_lookup(result.mime.clone())
|
assert!(mime_extension_lookup(result.mime.clone())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.contains(&result.recommended_extension().unwrap()));
|
.contains(&result.recommended_extension().unwrap()));
|
||||||
|
|
||||||
// make sure the guessed mimetype is correct based on the extension of the scanned file
|
// make sure the guessed mimetype is correct based on the extension of the scanned file
|
||||||
|
// because we already know that the extensions match the mimetype (as we created these files ourselves earlier in
|
||||||
|
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
|
||||||
let ext = extension_from_path(result.file);
|
let ext = extension_from_path(result.file);
|
||||||
assert!(ext.is_some());
|
assert!(ext.is_some());
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
@ -161,7 +165,10 @@ fn simple_directory() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Ensure that command line argument parsing works correctly - flags are interpreted, booleans are set, and so on.
|
||||||
fn argument_parsing() {
|
fn argument_parsing() {
|
||||||
|
use crate::parameters::{Parameters, ScanOpts};
|
||||||
|
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
|
|
||||||
// pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
|
// pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
|
||||||
|
@ -192,7 +199,10 @@ fn argument_parsing() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Ensure that badly formed command line arguments are rejected.
|
||||||
fn rejects_bad_args() {
|
fn rejects_bad_args() {
|
||||||
|
use crate::parameters::Parameters;
|
||||||
|
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
let tests = [
|
let tests = [
|
||||||
// Non-existent flags:
|
// Non-existent flags:
|
||||||
|
@ -213,6 +223,8 @@ fn rejects_bad_args() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Generate random series of bytes and try to identify them. This test makes no assertions and can only fail if the
|
||||||
|
/// mime database somehow panics or hangs.
|
||||||
fn identify_random_bytes() {
|
fn identify_random_bytes() {
|
||||||
let db = get_mime_db();
|
let db = get_mime_db();
|
||||||
let rng = fastrand::Rng::new();
|
let rng = fastrand::Rng::new();
|
||||||
|
@ -233,7 +245,11 @@ fn identify_random_bytes() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
/// Ensure that, for a given file "wrong.bad", which should have extension "good", the shell output contains something
|
||||||
|
/// like "mv wrong.bad wrong.good".
|
||||||
fn outputs_move_commands() {
|
fn outputs_move_commands() {
|
||||||
|
use std::io::Read;
|
||||||
|
|
||||||
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
||||||
let entries = vec![Ok(Findings {
|
let entries = vec![Ok(Findings {
|
||||||
file: Path::new("misnamed_file.png"),
|
file: Path::new("misnamed_file.png"),
|
||||||
|
@ -244,7 +260,7 @@ fn outputs_move_commands() {
|
||||||
let mut cursor = std::io::Cursor::new(Vec::new());
|
let mut cursor = std::io::Cursor::new(Vec::new());
|
||||||
let mut contents = std::string::String::new();
|
let mut contents = std::string::String::new();
|
||||||
|
|
||||||
Script::new()
|
Shell::new()
|
||||||
.write_all(&entries, &mut cursor)
|
.write_all(&entries, &mut cursor)
|
||||||
.expect("Failed to write to cursor");
|
.expect("Failed to write to cursor");
|
||||||
cursor.set_position(0);
|
cursor.set_position(0);
|
||||||
|
|
Loading…
Reference in a new issue