fif/src/tests/mod.rs

583 lines
18 KiB
Rust

// SPDX-FileCopyrightText: 2021-2022 Lynnesbian
// SPDX-License-Identifier: GPL-3.0-or-later
use std::collections::{BTreeMap, HashMap};
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use clap::Parser;
use fif::files::{mime_extension_lookup, scan_directory, scan_from_walkdir, BUF_SIZE};
use fif::findings::Findings;
use fif::formats::{Format, PowerShell, Shell};
use fif::mime_db::MimeDb;
use fif::utils::APPLICATION_ZIP;
use fif::{String, MIMEDB};
use itertools::Itertools;
use maplit::{btreeset, hashmap};
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use crate::parameters::ExtensionSet;
use crate::parameters::Parameters;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
const PDF_BYTES: &[u8] = b"%PDF-";
const ZIP_BYTES: &[u8] = b"PK\x03\x04";
#[test]
/// Ensure that `extension_from_path` successfully returns the extension from a set of paths.
fn get_ext() {
let ext_checks: HashMap<_, Option<&OsStr>> = hashmap![
Path::new("test.txt") => Some(OsStr::new("txt")),
Path::new("test.zip") => Some(OsStr::new("zip")),
Path::new("test.tar.gz") => Some(OsStr::new("gz")),
Path::new("test.") => Some(OsStr::new("")),
Path::new("test") => None,
Path::new(".hidden") => None,
];
for (path, ext) in ext_checks {
assert_eq!(path.extension(), ext);
}
}
#[test]
/// Ensure that the MIME types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
fn detect_type() {
assert_eq!(MIMEDB.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
assert_eq!(MIMEDB.get_type(PNG_BYTES), Some(IMAGE_PNG));
assert_eq!(MIMEDB.get_type(PDF_BYTES), Some(APPLICATION_PDF));
assert_eq!(MIMEDB.get_type(ZIP_BYTES), Some(APPLICATION_ZIP.clone()));
}
#[test]
/// Ensure that `mime_extension_lookup` works as expected, and that the set of extensions for JPEG, PNG, PDF, and ZIP
/// contain "jpg", "png", "pdf", and "zip", respectively.
fn recommend_ext() {
let tests = hashmap![
&IMAGE_JPEG => "jpg",
&IMAGE_PNG => "png",
&APPLICATION_PDF => "pdf",
&*APPLICATION_ZIP => "zip",
];
for (mime, ext) in tests {
assert!(
mime_extension_lookup(mime.essence_str().into())
.unwrap()
.contains(&String::from(ext)),
"mime_extension_lookup for {} didn't contain {}!",
mime.essence_str(),
ext
);
}
}
#[test]
/// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their
/// associated MIME types correctly deduced.
fn simple_directory() {
use std::borrow::Borrow;
use std::env::set_current_dir;
use std::fs::{canonicalize, File};
use std::io::Write;
use tempfile::tempdir;
use crate::parameters::ScanOpts;
// set of files to scan. all but the last files have magic numbers corresponding to their extension, except for
// "wrong.jpg", which is actually a png.
let files = hashmap![
"test.jpg" => JPEG_BYTES,
"test.jpeg" => JPEG_BYTES,
"test.png" => PNG_BYTES,
"test.pdf" => PDF_BYTES,
"test.zip" => ZIP_BYTES,
"wrong.jpg" => PNG_BYTES,
"ignore.fake_ext" => ZIP_BYTES,
];
let dir = tempdir().expect("Failed to create temporary directory.");
set_current_dir(dir.path()).expect("Failed to change directory.");
for (name, bytes) in &files {
let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name));
file
.write_all(bytes)
.expect(&*format!("Failed to write to file: {}", name));
drop(file);
}
let scan_opts = ScanOpts {
hidden: true,
extensionless: false,
follow_symlinks: false,
ignore_unknown_exts: true,
};
let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed.");
// there should be one file missing: "ignore.fake_ext"
assert_eq!(entries.len(), files.len() - 1);
let use_threads = cfg!(feature = "multi-threaded");
let results = scan_from_walkdir(&entries, false, use_threads).0;
let canonical_results = scan_from_walkdir(&entries, true, use_threads).0;
assert_eq!(results.len(), canonical_results.len());
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
// paths should be canonical
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
if !result.valid {
// the only invalid file detected should be "wrong.jpg", which is a misnamed png file
// 1. ensure detected extension is "jpg"
assert_eq!(result.file.as_path().extension().unwrap(), OsStr::new("jpg"));
// 2. ensure detected MIME type is IMAGE_PNG
assert_eq!(result.mime, IMAGE_PNG);
// 3. ensure the recommended extension for "wrong.jpg" is "png"
assert_eq!(&result.recommended_extension().unwrap(), &String::from("png"));
// 4. ensure the recommended filename for "wrong.jpg" is "wrong.png"
assert_eq!(result.recommended_path().unwrap().file_name(), Some(OsStr::new("wrong.png")));
continue;
}
// check if the recommended extension for this file is in the list of known extensions for its MIME type - for
// example, if the file is determined to be an IMAGE_PNG, its recommended extension should be one of the extensions
// returned by `mime_extension_lookup(IMAGE_PNG)`.
assert!(mime_extension_lookup(result.mime.essence_str().into())
.unwrap()
.contains(&result.recommended_extension().unwrap()));
// ensure that the recommended_name function outputs something beginning with "test"
assert!(result
.recommended_path()
.unwrap()
.file_name()
.unwrap()
.to_string_lossy()
.starts_with("test"));
// make sure the guessed MIME type is correct based on the extension of the scanned file
// because we already know that the extensions match the MIME type (as we created these files ourselves earlier in
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
let ext = result.file.as_path().extension().unwrap();
assert_eq!(
result.mime,
match ext.to_string_lossy().borrow() {
"jpg" | "jpeg" => IMAGE_JPEG,
"png" => IMAGE_PNG,
"pdf" => APPLICATION_PDF,
"zip" => APPLICATION_ZIP.clone(),
_ => APPLICATION_OCTET_STREAM, // general "fallback" type
},
"Incorrect MIME type detected - got {:?} for a {:?} file",
result.mime,
ext
);
}
}
#[test]
/// Ensure that command line argument parsing works correctly - flags are interpreted, booleans are set, and so on.
fn argument_parsing() {
use crate::parameters::ScanOpts;
// pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]);
// check if "jpg" is in the list of extensions to be scanned
assert!(
args
.extensions()
.expect("args.extensions() should be Some(_)!")
.contains(&"jpg"),
"args.extensions() should contain the `images` set!"
);
// make sure "scan_hidden" is false
assert!(!args.scan_hidden);
// exts should be none
assert!(args.exts.is_none());
// there shouldn't be any excluded extensions
assert!(args.excluded_extensions().is_none());
// get the ScanOpts, and make sure they match expectations
assert_eq!(
args.get_scan_opts(),
ScanOpts {
hidden: false,
extensionless: false,
follow_symlinks: true,
ignore_unknown_exts: false,
},
"ScanOpts are incorrect"
);
}
#[test]
/// Ensure that `fif -e jpg dir` is interpreted as "scan for jpg files in dir" and not "scan for jpg and dir files"
fn positional_args() {
for flag in &["-x", "-e", "-X", "-E"] {
assert_eq!(
Parameters::parse_from(vec!["fif", flag, "images", "directory"]).dir,
PathBuf::from("directory")
);
}
}
#[test]
/// Ensure the `exclude` flag (`-x`) overrides `-e` and `-E`.
fn exclude_overrides() {
// pass `-E images`, which includes many image extensions, and `-x jpg,png`, which should remove "jpg" and "png" from
// the extensions list
let args: Parameters = Parameters::parse_from(vec!["fif", "-x", "jpg,png", "-E", "images"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should contain the `images` set!");
let extensions = extensions.unwrap();
assert!(!extensions.contains(&"jpg"), "\"jpg\" should be excluded!");
assert!(!extensions.contains(&"png"), "\"png\" should be excluded!");
assert!(extensions.contains(&"jpeg"), "\"jpeg\" should be included!");
// pass `-e abc,def,ghi,jkl` and `-x abc,def` -- extensions() should only contain "ghi" and "jkl"
let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "abc,def,ghi,jkl", "-x", "abc,def"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should be set!");
assert_eq!(extensions, Some(btreeset!["ghi", "jkl"]));
}
#[test]
/// Ensure the `exclude_set` flag (`-X`) overrides `-e`.
fn exclude_set_overrides_includes() {
// pass `-e jpg,flac` and `-X images` -- which should produce the equivalent of `-e flag`
let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "jpg,flac", "-X", "images"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should be set!");
assert_eq!(extensions, Some(btreeset!["flac"]));
}
#[test]
/// Ensure the `exclude_set` flag (`-X`) overrides `-E`.
fn exclude_set_overrides_include_set() {
// pass `-E media` and `-X images` -- which should produce the equivalent of `-E audio,video`
let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "media", "-X", "images"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should be set!");
let extensions = extensions.unwrap();
// ensure all of audio and video's extensions are here
for &ext in ExtensionSet::Audio
.extensions()
.iter()
.chain(ExtensionSet::Video.extensions().iter())
{
assert!(extensions.contains(&ext), "Extensions should contain {}!", ext);
}
// ensure all of images' extensions are excluded
for ext in ExtensionSet::Images.extensions() {
assert!(!extensions.contains(&ext), "Extensions should not contain {}!", ext);
}
}
#[test]
/// Ensure that badly formed command line arguments are rejected.
fn rejects_bad_args() {
use assert_cmd::Command;
let tests = [
// Non-existent flags:
vec!["fif", "-abcdefghijklmnopqrstuvwxyz"],
// `-E` without specifying a set:
vec!["fif", "-E"],
// `-E` with an invalid set:
vec!["fif", "-E", "pebis"],
// `-X` with an invalid set:
vec!["fif", "-X", "pebis"],
// `-e` with nothing but commas:
vec!["fif", "-e", ",,,,,"],
// `-x` with nothing but commas:
vec!["fif", "-x", ",,,,,"],
// `-j` with a negative value:
vec!["fif", "-j", "-1"],
// `--prompt` without `--fix`:
vec!["fif", "--prompt", "always"],
// `--overwrite` without `--fix`:
vec!["fif", "--overwrite"],
];
for test in &tests {
// first, try testing the flags against the Parameters struct...
assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test);
// ...then, make sure it actually works against the binary
let mut cmd = Command::cargo_bin("fif").unwrap();
cmd.args(test).assert().failure();
}
}
#[test]
/// Ensure that a few simple, well-formed command line argument cases pass
fn accepts_good_args() {
use assert_cmd::Command;
// all of these commands pass either the version or help flag, ensuring that they won't fail for reasons relating
// to filesystem access
let tests = [
vec!["-V"],
vec!["--version"],
vec!["-E", "images", "--version"],
vec!["-h"],
vec!["--help"],
vec!["dir_name", "--version"],
];
for test in &tests {
let mut cmd = Command::cargo_bin("fif").unwrap();
cmd.args(test).assert().success();
}
}
#[test]
/// Ensures that output from the `-V` and `--version` flags is formatted properly.
fn check_version_output() {
use std::string::String;
use assert_cmd::Command;
use regex::Regex;
// test `-V` matches the format of "fif x.y.z"
let mut cmd = Command::cargo_bin("fif").unwrap();
let output = cmd.arg("-V").ok().unwrap().stdout;
let output = String::from_utf8(output).unwrap();
assert!(
Regex::new(r#"fif v([0-9]\.){2}[0-9]"#).unwrap().is_match(output.trim()),
"\"{}\" does not match the expected `-v` format!",
output
);
// test `--version` matches the format of "fif x.y.z (OS, example backend, commit #1234abc)"
let mut cmd = Command::cargo_bin("fif").unwrap();
let output = cmd.arg("--version").ok().unwrap().stdout;
let output = String::from_utf8(output).unwrap();
assert!(
Regex::new(r#"fif v([0-9]\.){2}[0-9] \(.+, .+ backend, (unknown commit|commit #[[:xdigit:]]{7})\)"#)
.unwrap()
.is_match(output.trim()),
"\"{}\" does not match the expected `--version` format!",
output.trim()
);
}
#[test]
/// Generate random series of bytes and try to identify them. This test makes no assertions and can only fail if the
/// mime database somehow panics or hangs.
fn identify_random_bytes() {
use rand::RngCore;
let mut rng = rand::thread_rng();
let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2];
let mut results: BTreeMap<Mime, i32> = BTreeMap::new();
for _ in 1..1000 {
rng.fill_bytes(&mut bytes);
if let Some(detected_type) = MIMEDB.get_type(&bytes) {
*results.entry(detected_type).or_insert(0) += 1;
}
}
for (mime, count) in &results {
println!("{}:\t{} counts", mime, count);
}
println!("No type found:\t{} counts", 1000 - results.values().sum::<i32>());
}
#[test]
/// Ensure that, for a given file "wrong.bad", which should have extension "good", the shell output contains something
/// like "mv wrong.bad wrong.good".
fn outputs_move_commands() {
use std::io::Read;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
}];
for format in &["Shell", "PowerShell"] {
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
match *format {
"Shell" => Shell.write_all(&mut cursor, &findings, &[]),
"PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]),
_ => unreachable!(),
}
.expect("Failed to write to cursor");
cursor.set_position(0);
cursor
.read_to_string(&mut contents)
.expect("Failed to read from cursor to string");
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
assert!(
contents.contains("misnamed_file.jpg") && contents.contains("misnamed_file.png"),
"{} output doesn't contain move command!\n===\n{}",
format,
contents
);
}
}
#[test]
#[cfg(feature = "json")]
/// Ensure JSON output is valid.
fn test_json() {
use std::io::Read;
use crate::formats::Json;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
}];
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
Json
.write_all(&mut cursor, &findings, &[])
.expect("Failed to write to cursor");
cursor.set_position(0);
cursor
.read_to_string(&mut contents)
.expect("Failed to read from cursor to string");
// the output should contain the file's MIME type
assert!(
contents.contains(IMAGE_JPEG.essence_str()),
"JSON output doesn't contain move command!\n===\n{}",
contents
);
}
#[test]
/// Ensure that the Media extension set contains all (is a superset) of Audio, Video, and Images.
fn media_contains_audio_video_images() {
use crate::parameters::ExtensionSet::{Audio, Images, Media, Video};
let media_exts = Media.extensions();
// assert every extension in the audio/video/image sets is contained in the media set
[Audio.extensions(), Video.extensions(), Images.extensions()]
.concat()
.into_iter()
.for_each(|ext| assert!(media_exts.contains(&ext)));
assert_eq!(
Parameters::parse_from(&["fif", "-E", "media"]).extensions(),
Parameters::parse_from(&["fif", "-E", "audio,video,images"]).extensions()
);
}
#[test]
/// Ensure that the `writables!` and `writablesln!` macros produce the output they should.
fn writables_is_correct() {
use fif::formats::Writable;
use fif::{writables, writablesln};
assert_eq!(
&["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
writables!["henlo", (Path::new("henlo")), Newline]
);
assert_eq!(
&["henlo".into(), Path::new("henlo").into(), Writable::Newline, Writable::Newline],
writablesln!["henlo", (Path::new("henlo")), Newline]
);
}
#[test]
/// Test various combinations of verbosity flags.
fn verbosity() {
use log::LevelFilter;
assert!(
Parameters::try_parse_from(&["fif", "-q", "-v"]).is_err(),
"Failed to reject usage of both -q and -v!"
);
let expected_results = hashmap![
"-qqqqqqqq" => LevelFilter::Off,
"-qqq" => LevelFilter::Off,
"-qq" => LevelFilter::Error,
"-q" => LevelFilter::Warn,
"-s" => LevelFilter::Info,
"-v" => LevelFilter::Debug,
"-vv" => LevelFilter::Trace,
"-vvv" => LevelFilter::Trace,
"-vvvvvvvv" => LevelFilter::Trace,
];
for (flags, level) in expected_results {
assert_eq!(Parameters::parse_from(&["fif", flags]).get_verbosity(), level);
}
}
#[test]
/// Ensures `os_name()`'s output is the same as [`std::env::consts::OS`], capitalisation notwithstanding
fn validate_os_name() {
assert_eq!(fif::utils::os_name().to_lowercase(), std::env::consts::OS.to_lowercase());
}
#[test]
/// Ensures that [`Findings`] are sorted properly.
fn sort_findings() {
let findings = vec![
Findings {
file: Path::new("ccc").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
},
Findings {
file: Path::new("bbb.xyz").to_path_buf(),
valid: true,
mime: IMAGE_PNG,
},
Findings {
file: Path::new("aaa").to_path_buf(),
valid: true,
mime: APPLICATION_PDF,
},
];
let mut findings = findings.iter().sorted_unstable();
assert_eq!(findings.next().unwrap().file, Path::new("aaa"));
assert_eq!(findings.next().unwrap().file, Path::new("bbb.xyz"));
assert_eq!(findings.next().unwrap().file, Path::new("ccc"));
assert_eq!(findings.next(), None);
}
#[test]
#[cfg(not(all(target_endian = "big", target_pointer_width = "32")))]
/// Ensures that [`SmartString`]s don't deviate from std's Strings
fn validate_string_type() {
use std::string::String as StdString;
use fif::String as SmartString;
assert_eq!(SmartString::new(), StdString::new());
assert_eq!(SmartString::from("smol"), StdString::from("smol"));
assert_eq!(
SmartString::from("A long and therefore heap-allocated string"),
StdString::from("A long and therefore heap-allocated string")
);
smartstring::validate();
}