466 lines
15 KiB
Rust
466 lines
15 KiB
Rust
use crate::findings::Findings;
|
|
use crate::formats::{Format, PowerShell, Shell};
|
|
use crate::inspectors::{mime_extension_lookup, BUF_SIZE};
|
|
use crate::mime_db::MimeDb;
|
|
use crate::string_type::String;
|
|
use crate::{extension_from_path, scan_directory, scan_from_walkdir};
|
|
|
|
use crate::parameters::Parameters;
|
|
use clap::Clap;
|
|
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
|
use mime_guess::Mime;
|
|
|
|
use crate::parameters::ExtensionSet;
|
|
use std::collections::HashMap;
|
|
use std::ffi::OsStr;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
|
|
const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
|
|
const PDF_BYTES: &[u8] = b"%PDF-";
|
|
const ZIP_BYTES: &[u8] = b"PK\x03\x04";
|
|
|
|
cfg_if::cfg_if! {
|
|
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
|
fn get_mime_db() -> crate::mime_db::InferDb {
|
|
crate::mime_db::InferDb::init()
|
|
}
|
|
} else {
|
|
fn get_mime_db() -> crate::mime_db::XdgDb {
|
|
crate::mime_db::XdgDb::init()
|
|
}
|
|
}
|
|
}
|
|
|
|
fn application_zip() -> Mime {
|
|
use std::str::FromStr;
|
|
Mime::from_str("application/zip").unwrap()
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that `extension_from_path` successfully returns the extension from a set of paths.
|
|
fn get_ext() {
|
|
let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new();
|
|
ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt")));
|
|
ext_checks.insert(Path::new("test.zip"), Some(OsStr::new("zip")));
|
|
ext_checks.insert(Path::new("test.tar.gz"), Some(OsStr::new("gz")));
|
|
ext_checks.insert(Path::new("test."), Some(OsStr::new("")));
|
|
ext_checks.insert(Path::new("test"), None);
|
|
ext_checks.insert(Path::new(".hidden"), None);
|
|
|
|
for (path, ext) in ext_checks {
|
|
assert_eq!(extension_from_path(path), ext);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
|
|
fn detect_type() {
|
|
let db = get_mime_db();
|
|
assert_eq!(db.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
|
|
assert_eq!(db.get_type(PNG_BYTES), Some(IMAGE_PNG));
|
|
assert_eq!(db.get_type(PDF_BYTES), Some(APPLICATION_PDF));
|
|
assert_eq!(db.get_type(ZIP_BYTES), Some(application_zip()));
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that `mime_extension_lookup` works as expected, and that the set of extensions for JPEG, PNG, PDF, and ZIP
|
|
/// contain "jpg", "png", "pdf", and "zip", respectively.
|
|
fn recommend_ext() {
|
|
assert!(mime_extension_lookup(IMAGE_JPEG.essence_str().into())
|
|
.unwrap()
|
|
.contains(&String::from("jpg")));
|
|
assert!(mime_extension_lookup(IMAGE_PNG.essence_str().into())
|
|
.unwrap()
|
|
.contains(&String::from("png")));
|
|
assert!(mime_extension_lookup(APPLICATION_PDF.essence_str().into())
|
|
.unwrap()
|
|
.contains(&String::from("pdf")));
|
|
assert!(mime_extension_lookup(application_zip().essence_str().into())
|
|
.unwrap()
|
|
.contains(&String::from("zip")));
|
|
}
|
|
|
|
#[test]
|
|
/// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their
|
|
/// associated mime types correctly deduced.
|
|
fn simple_directory() {
|
|
use crate::parameters::ScanOpts;
|
|
use std::borrow::Borrow;
|
|
use std::env::set_current_dir;
|
|
use std::fs::{canonicalize, File};
|
|
use std::io::Write;
|
|
use tempfile::tempdir;
|
|
|
|
// set of files to scan. all but the last files have magic numbers corresponding to their extension, except for
|
|
// "wrong.jpg", which is actually a png.
|
|
let mut files = HashMap::new();
|
|
files.insert("test.jpg", JPEG_BYTES);
|
|
files.insert("test.jpeg", JPEG_BYTES);
|
|
files.insert("test.png", PNG_BYTES);
|
|
files.insert("test.pdf", PDF_BYTES);
|
|
files.insert("test.zip", ZIP_BYTES);
|
|
files.insert("wrong.jpg", PNG_BYTES);
|
|
|
|
let dir = tempdir().expect("Failed to create temporary directory.");
|
|
set_current_dir(dir.path()).expect("Failed to change directory.");
|
|
|
|
for (name, bytes) in &files {
|
|
let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name));
|
|
|
|
file
|
|
.write_all(bytes)
|
|
.expect(&*format!("Failed to write to file: {}", name));
|
|
drop(file);
|
|
}
|
|
|
|
let scan_opts = ScanOpts {
|
|
hidden: true,
|
|
extensionless: false,
|
|
follow_symlinks: false,
|
|
};
|
|
|
|
let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed.");
|
|
|
|
assert_eq!(entries.len(), files.len());
|
|
|
|
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
|
|
crate::init_db();
|
|
|
|
let results = scan_from_walkdir(&entries, false);
|
|
let canonical_results = scan_from_walkdir(&entries, true);
|
|
assert_eq!(results.len(), canonical_results.len());
|
|
|
|
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
|
|
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
|
|
let result = result.as_ref().expect("Error while scanning file");
|
|
let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
|
|
|
|
// paths should be canonical
|
|
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
|
|
|
|
if !result.valid {
|
|
// the only invalid file detected should be "wrong.jpg", which is a misnamed png file
|
|
// 1. ensure detected extension is "jpg"
|
|
assert_eq!(extension_from_path(result.file.as_path()).unwrap(), OsStr::new("jpg"));
|
|
// 2. ensure detected mime type is IMAGE_PNG
|
|
assert_eq!(result.mime, IMAGE_PNG);
|
|
// 3. ensure the recommended extension for "wrong.jpg" is "png"
|
|
assert_eq!(&result.recommended_extension().unwrap(), &String::from("png"));
|
|
continue;
|
|
}
|
|
|
|
// check if the recommended extension for this file is in the list of known extensions for its mimetype - for
|
|
// example, if the file is determined to be an IMAGE_PNG, its recommended extension should be one of the extensions
|
|
// returned by `mime_extension_lookup(IMAGE_PNG)`.
|
|
assert!(mime_extension_lookup(result.mime.essence_str().into())
|
|
.unwrap()
|
|
.contains(&result.recommended_extension().unwrap()));
|
|
|
|
// make sure the guessed mimetype is correct based on the extension of the scanned file
|
|
// because we already know that the extensions match the mimetype (as we created these files ourselves earlier in
|
|
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
|
|
let ext = extension_from_path(result.file.as_path());
|
|
assert!(ext.is_some());
|
|
assert_eq!(
|
|
result.mime,
|
|
match ext.unwrap().to_string_lossy().borrow() {
|
|
"jpg" | "jpeg" => IMAGE_JPEG,
|
|
"png" => IMAGE_PNG,
|
|
"pdf" => APPLICATION_PDF,
|
|
"zip" => application_zip(),
|
|
_ => APPLICATION_OCTET_STREAM, // general "fallback" type
|
|
},
|
|
"Incorrect MIME type detected - got {:?} for a {:?} file",
|
|
result.mime,
|
|
ext.unwrap()
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that command line argument parsing works correctly - flags are interpreted, booleans are set, and so on.
|
|
fn argument_parsing() {
|
|
use crate::parameters::ScanOpts;
|
|
|
|
// pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
|
|
let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]);
|
|
|
|
// check if "jpg" is in the list of extensions to be scanned
|
|
assert!(
|
|
args
|
|
.extensions()
|
|
.expect("args.extensions() should be Some(_)!")
|
|
.contains(&"jpg"),
|
|
"args.extensions() should contain the `images` set!"
|
|
);
|
|
|
|
// make sure "scan_hidden" is false
|
|
assert!(!args.scan_hidden);
|
|
|
|
// exts should be none
|
|
assert!(args.exts.is_none());
|
|
|
|
// there shouldn't be any excluded extensions
|
|
assert!(args.excluded_extensions().is_none());
|
|
|
|
// get the ScanOpts, and make sure they match expectations
|
|
assert_eq!(
|
|
args.get_scan_opts(),
|
|
ScanOpts {
|
|
hidden: false,
|
|
extensionless: false,
|
|
follow_symlinks: true,
|
|
},
|
|
"ScanOpts are incorrect"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that `fif -e jpg dir` is interpreted as "scan for jpg files in dir" and not "scan for jpg and dir files"
|
|
fn positional_args() {
|
|
for flag in &["-x", "-e", "-X", "-E"] {
|
|
assert_eq!(
|
|
Parameters::parse_from(vec!["fif", flag, "images", "directory"]).dir,
|
|
PathBuf::from("directory")
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure the `exclude` flag (`-x`) overrides `-e` and `-E`.
|
|
fn exclude_overrides() {
|
|
// pass `-E images`, which includes many image extensions, and `-x jpg,png`, which should remove "jpg" and "png" from
|
|
// the extensions list
|
|
let args: Parameters = Parameters::parse_from(vec!["fif", "-x", "jpg,png", "-E", "images"]);
|
|
let extensions = args.extensions();
|
|
assert!(extensions.is_some(), "Extensions should contain the `images` set!");
|
|
let extensions = extensions.unwrap();
|
|
|
|
assert!(!extensions.contains(&"jpg"), "\"jpg\" should be excluded!");
|
|
assert!(!extensions.contains(&"png"), "\"png\" should be excluded!");
|
|
assert!(extensions.contains(&"jpeg"), "\"jpeg\" should be included!");
|
|
|
|
// pass `-e abc,def,ghi,jkl` and `-x abc,def` -- extensions() should only contain "ghi" and "jkl"
|
|
let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "abc,def,ghi,jkl", "-x", "abc,def"]);
|
|
let extensions = args.extensions();
|
|
assert!(extensions.is_some(), "Extensions should be set!");
|
|
let extensions = extensions.unwrap();
|
|
|
|
assert!(!extensions.contains(&"abc"));
|
|
assert!(!extensions.contains(&"def"));
|
|
assert!(extensions.contains(&"ghi"));
|
|
assert!(extensions.contains(&"jkl"));
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure the `exclude_set` flag (`-X`) overrides `-e`.
|
|
fn exclude_set_overrides_includes() {
|
|
// pass `-e jpg,flac` and `-X images` -- which should produce the equivalent of `-e flag`
|
|
let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "jpg,flac", "-X", "images"]);
|
|
let extensions = args.extensions();
|
|
assert!(extensions.is_some(), "Extensions should be set!");
|
|
let mut extensions = extensions.unwrap().into_iter();
|
|
|
|
assert_eq!(extensions.next(), Some("flac"), "Extensions should contain flac!");
|
|
assert_eq!(extensions.next(), None, "Too many extensions!");
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure the `exclude_set` flag (`-X`) overrides `-E`.
|
|
fn exclude_set_overrides_include_set() {
|
|
// pass `-E media` and `-X images` -- which should produce the equivalent of `-E audio,video`
|
|
let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "media", "-X", "images"]);
|
|
let extensions = args.extensions();
|
|
assert!(extensions.is_some(), "Extensions should be set!");
|
|
let extensions = extensions.unwrap();
|
|
|
|
// ensure all of audio and video's extensions are here
|
|
for &ext in ExtensionSet::Audio
|
|
.extensions()
|
|
.iter()
|
|
.chain(ExtensionSet::Video.extensions().iter())
|
|
{
|
|
assert!(extensions.contains(&ext), "Extensions should contain {}!", ext);
|
|
}
|
|
|
|
// ensure all of images' extensions are excluded
|
|
for ext in ExtensionSet::Images.extensions() {
|
|
assert!(!extensions.contains(&ext), "Extensions should not contain {}!", ext);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that badly formed command line arguments are rejected.
|
|
fn rejects_bad_args() {
|
|
let tests = [
|
|
// Non-existent flags:
|
|
vec!["fif", "-abcdefghijklmnopqrstuvwxyz"],
|
|
// `-E` without specifying a set:
|
|
vec!["fif", "-E"],
|
|
// `-E` with an invalid set:
|
|
vec!["fif", "-E", "pebis"],
|
|
// `-X` with an invalid set:
|
|
vec!["fif", "-X", "pebis"],
|
|
// `-e` with nothing but commas:
|
|
vec!["fif", "-e", ",,,,,"],
|
|
];
|
|
|
|
for test in &tests {
|
|
assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
/// Generate random series of bytes and try to identify them. This test makes no assertions and can only fail if the
|
|
/// mime database somehow panics or hangs.
|
|
fn identify_random_bytes() {
|
|
use rand::RngCore;
|
|
let db = get_mime_db();
|
|
let mut rng = rand::thread_rng();
|
|
let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2];
|
|
let mut results: HashMap<Mime, i32> = HashMap::new();
|
|
|
|
for _ in 1..1000 {
|
|
rng.fill_bytes(&mut bytes);
|
|
if let Some(detected_type) = db.get_type(&bytes) {
|
|
*results.entry(detected_type).or_insert(0) += 1;
|
|
}
|
|
}
|
|
|
|
for (mime, count) in &results {
|
|
println!("{}:\t{} counts", mime, count);
|
|
}
|
|
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>());
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that, for a given file "wrong.bad", which should have extension "good", the shell output contains something
|
|
/// like "mv wrong.bad wrong.good".
|
|
fn outputs_move_commands() {
|
|
use std::io::Read;
|
|
|
|
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
|
let entries = vec![Ok(Findings {
|
|
file: Path::new("misnamed_file.png").to_path_buf(),
|
|
valid: false,
|
|
mime: IMAGE_JPEG,
|
|
})];
|
|
|
|
for format in &["Shell", "PowerShell"] {
|
|
let mut cursor = std::io::Cursor::new(Vec::new());
|
|
let mut contents = std::string::String::new();
|
|
|
|
match *format {
|
|
"Shell" => Shell::new().write_all(&mut cursor, &entries),
|
|
"PowerShell" => PowerShell::new().write_all(&mut cursor, &entries),
|
|
_ => unreachable!(),
|
|
}
|
|
.expect("Failed to write to cursor");
|
|
|
|
cursor.set_position(0);
|
|
cursor
|
|
.read_to_string(&mut contents)
|
|
.expect("Failed to read from cursor to string");
|
|
|
|
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
|
|
assert!(
|
|
contents.contains("misnamed_file.jpg"),
|
|
"{} output doesn't contain move command!\n===\n{}",
|
|
format,
|
|
contents
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
#[cfg(feature = "json")]
|
|
/// Ensure JSON output is valid.
|
|
fn test_json() {
|
|
use crate::formats::Json;
|
|
use std::io::Read;
|
|
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
|
let entries = vec![Ok(Findings {
|
|
file: Path::new("misnamed_file.png").to_path_buf(),
|
|
valid: false,
|
|
mime: IMAGE_JPEG,
|
|
})];
|
|
|
|
let mut cursor = std::io::Cursor::new(Vec::new());
|
|
let mut contents = std::string::String::new();
|
|
|
|
Json::new()
|
|
.write_all(&mut cursor, &entries)
|
|
.expect("Failed to write to cursor");
|
|
|
|
cursor.set_position(0);
|
|
cursor
|
|
.read_to_string(&mut contents)
|
|
.expect("Failed to read from cursor to string");
|
|
|
|
// the output should contain the file's mime type
|
|
assert!(
|
|
contents.contains(IMAGE_JPEG.essence_str()),
|
|
"JSON output doesn't contain move command!\n===\n{}",
|
|
contents
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that the Media extension set contains all (is a superset) of Audio, Video, and Images.
|
|
fn media_contains_audio_video_images() {
|
|
use crate::parameters::ExtensionSet::{Audio, Images, Media, Video};
|
|
let media_exts = Media.extensions();
|
|
|
|
// assert every extension in the audio/video/image sets is contained in the media set
|
|
[Audio.extensions(), Video.extensions(), Images.extensions()]
|
|
.concat()
|
|
.into_iter()
|
|
.for_each(|ext| assert!(media_exts.contains(&ext)));
|
|
|
|
assert_eq!(
|
|
Parameters::parse_from(&["fif", "-E", "media"]).extensions(),
|
|
Parameters::parse_from(&["fif", "-E", "audio,video,images"]).extensions()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
/// Ensure that the `writables!` macro produces the output it should.
|
|
fn writables_is_correct() {
|
|
use crate::formats::Writable;
|
|
use crate::writables;
|
|
|
|
assert_eq!(
|
|
&[
|
|
"henlo".into(),
|
|
Path::new("henlo").into(),
|
|
Writable::Newline,
|
|
Writable::Space
|
|
],
|
|
writables!["henlo", (Path::new("henlo")), Newline, Space]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
/// Test various combinations of verbosity flags.
|
|
fn verbosity() {
|
|
assert!(
|
|
Parameters::try_parse_from(&["fif", "-q", "-v"]).is_err(),
|
|
"Failed to reject usage of both -q and -v!"
|
|
);
|
|
|
|
let mut expected_results = HashMap::new();
|
|
expected_results.insert("-qqqqqqqq", "off");
|
|
expected_results.insert("-qqq", "off");
|
|
expected_results.insert("-qq", "error");
|
|
expected_results.insert("-q", "warn");
|
|
expected_results.insert("-s", "info");
|
|
expected_results.insert("-v", "debug");
|
|
expected_results.insert("-vv", "trace");
|
|
expected_results.insert("-vvv", "trace");
|
|
expected_results.insert("-vvvvvvvv", "trace");
|
|
|
|
for (flags, level) in expected_results {
|
|
assert_eq!(Parameters::parse_from(&["fif", flags]).default_verbosity(), level);
|
|
}
|
|
}
|