// SPDX-FileCopyrightText: 2021-2022 Lynnesbian // SPDX-License-Identifier: GPL-3.0-or-later use std::collections::{BTreeMap, HashMap}; use std::ffi::OsStr; use std::path::{Path, PathBuf}; use clap::Parser; use fif::files::{mime_extension_lookup, scan_directory, scan_from_walkdir, BUF_SIZE}; use fif::findings::Findings; use fif::formats::{Format, PowerShell, Shell}; use fif::mime_db::MimeDb; use fif::utils::APPLICATION_ZIP; use fif::{String, MIMEDB}; use itertools::Itertools; use maplit::{btreeset, hashmap}; use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use crate::parameters::ExtensionSet; use crate::parameters::Parameters; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"; const PDF_BYTES: &[u8] = b"%PDF-"; const ZIP_BYTES: &[u8] = b"PK\x03\x04"; #[test] /// Ensure that `extension_from_path` successfully returns the extension from a set of paths. fn get_ext() { let ext_checks: HashMap<_, Option<&OsStr>> = hashmap![ Path::new("test.txt") => Some(OsStr::new("txt")), Path::new("test.zip") => Some(OsStr::new("zip")), Path::new("test.tar.gz") => Some(OsStr::new("gz")), Path::new("test.") => Some(OsStr::new("")), Path::new("test") => None, Path::new(".hidden") => None, ]; for (path, ext) in ext_checks { assert_eq!(path.extension(), ext); } } #[test] /// Ensure that the MIME types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers. fn detect_type() { assert_eq!(MIMEDB.get_type(JPEG_BYTES), Some(IMAGE_JPEG)); assert_eq!(MIMEDB.get_type(PNG_BYTES), Some(IMAGE_PNG)); assert_eq!(MIMEDB.get_type(PDF_BYTES), Some(APPLICATION_PDF)); assert_eq!(MIMEDB.get_type(ZIP_BYTES), Some(APPLICATION_ZIP.clone())); } #[test] /// Ensure that `mime_extension_lookup` works as expected, and that the set of extensions for JPEG, PNG, PDF, and ZIP /// contain "jpg", "png", "pdf", and "zip", respectively. fn recommend_ext() { let tests = hashmap![ &IMAGE_JPEG => "jpg", &IMAGE_PNG => "png", &APPLICATION_PDF => "pdf", &*APPLICATION_ZIP => "zip", ]; for (mime, ext) in tests { assert!( mime_extension_lookup(mime.essence_str().into()) .unwrap() .contains(&String::from(ext)), "mime_extension_lookup for {} didn't contain {}!", mime.essence_str(), ext ); } } #[test] /// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their /// associated MIME types correctly deduced. fn simple_directory() { use std::borrow::Borrow; use std::env::set_current_dir; use std::fs::{canonicalize, File}; use std::io::Write; use tempfile::tempdir; use crate::parameters::ScanOpts; // set of files to scan. all but the last files have magic numbers corresponding to their extension, except for // "wrong.jpg", which is actually a png. let files = hashmap![ "test.jpg" => JPEG_BYTES, "test.jpeg" => JPEG_BYTES, "test.png" => PNG_BYTES, "test.pdf" => PDF_BYTES, "test.zip" => ZIP_BYTES, "wrong.jpg" => PNG_BYTES, "ignore.fake_ext" => ZIP_BYTES, ]; let dir = tempdir().expect("Failed to create temporary directory."); set_current_dir(dir.path()).expect("Failed to change directory."); for (name, bytes) in &files { let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name)); file .write_all(bytes) .expect(&*format!("Failed to write to file: {}", name)); drop(file); } let scan_opts = ScanOpts { hidden: true, extensionless: false, follow_symlinks: false, ignore_unknown_exts: true, }; let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed."); // there should be one file missing: "ignore.fake_ext" assert_eq!(entries.len(), files.len() - 1); let use_threads = cfg!(feature = "multi-threaded"); let results = scan_from_walkdir(&entries, false, use_threads).0; let canonical_results = scan_from_walkdir(&entries, true, use_threads).0; assert_eq!(results.len(), canonical_results.len()); for (result, canonical_result) in results.iter().zip(canonical_results.iter()) { // there should be no IO errors during this test. any IO errors encountered are outside the scope of this test. // paths should be canonical assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file); if !result.valid { // the only invalid file detected should be "wrong.jpg", which is a misnamed png file // 1. ensure detected extension is "jpg" assert_eq!(result.file.as_path().extension().unwrap(), OsStr::new("jpg")); // 2. ensure detected MIME type is IMAGE_PNG assert_eq!(result.mime, IMAGE_PNG); // 3. ensure the recommended extension for "wrong.jpg" is "png" assert_eq!(&result.recommended_extension().unwrap(), &String::from("png")); // 4. ensure the recommended filename for "wrong.jpg" is "wrong.png" assert_eq!(result.recommended_path().unwrap().file_name(), Some(OsStr::new("wrong.png"))); continue; } // check if the recommended extension for this file is in the list of known extensions for its MIME type - for // example, if the file is determined to be an IMAGE_PNG, its recommended extension should be one of the extensions // returned by `mime_extension_lookup(IMAGE_PNG)`. assert!(mime_extension_lookup(result.mime.essence_str().into()) .unwrap() .contains(&result.recommended_extension().unwrap())); // ensure that the recommended_name function outputs something beginning with "test" assert!(result .recommended_path() .unwrap() .file_name() .unwrap() .to_string_lossy() .starts_with("test")); // make sure the guessed MIME type is correct based on the extension of the scanned file // because we already know that the extensions match the MIME type (as we created these files ourselves earlier in // the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc. let ext = result.file.as_path().extension().unwrap(); assert_eq!( result.mime, match ext.to_string_lossy().borrow() { "jpg" | "jpeg" => IMAGE_JPEG, "png" => IMAGE_PNG, "pdf" => APPLICATION_PDF, "zip" => APPLICATION_ZIP.clone(), _ => APPLICATION_OCTET_STREAM, // general "fallback" type }, "Incorrect MIME type detected - got {:?} for a {:?} file", result.mime, ext ); } } #[test] /// Ensure that command line argument parsing works correctly - flags are interpreted, booleans are set, and so on. fn argument_parsing() { use crate::parameters::ScanOpts; // pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]); // check if "jpg" is in the list of extensions to be scanned assert!( args .extensions() .expect("args.extensions() should be Some(_)!") .contains(&"jpg"), "args.extensions() should contain the `images` set!" ); // make sure "scan_hidden" is false assert!(!args.scan_hidden); // exts should be none assert!(args.exts.is_none()); // there shouldn't be any excluded extensions assert!(args.excluded_extensions().is_none()); // get the ScanOpts, and make sure they match expectations assert_eq!( args.get_scan_opts(), ScanOpts { hidden: false, extensionless: false, follow_symlinks: true, ignore_unknown_exts: false, }, "ScanOpts are incorrect" ); } #[test] /// Ensure that `fif -e jpg dir` is interpreted as "scan for jpg files in dir" and not "scan for jpg and dir files" fn positional_args() { for flag in &["-x", "-e", "-X", "-E"] { assert_eq!( Parameters::parse_from(vec!["fif", flag, "images", "directory"]).dir, PathBuf::from("directory") ); } } #[test] /// Ensure the `exclude` flag (`-x`) overrides `-e` and `-E`. fn exclude_overrides() { // pass `-E images`, which includes many image extensions, and `-x jpg,png`, which should remove "jpg" and "png" from // the extensions list let args: Parameters = Parameters::parse_from(vec!["fif", "-x", "jpg,png", "-E", "images"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should contain the `images` set!"); let extensions = extensions.unwrap(); assert!(!extensions.contains(&"jpg"), "\"jpg\" should be excluded!"); assert!(!extensions.contains(&"png"), "\"png\" should be excluded!"); assert!(extensions.contains(&"jpeg"), "\"jpeg\" should be included!"); // pass `-e abc,def,ghi,jkl` and `-x abc,def` -- extensions() should only contain "ghi" and "jkl" let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "abc,def,ghi,jkl", "-x", "abc,def"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should be set!"); assert_eq!(extensions, Some(btreeset!["ghi", "jkl"])); } #[test] /// Ensure the `exclude_set` flag (`-X`) overrides `-e`. fn exclude_set_overrides_includes() { // pass `-e jpg,flac` and `-X images` -- which should produce the equivalent of `-e flag` let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "jpg,flac", "-X", "images"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should be set!"); assert_eq!(extensions, Some(btreeset!["flac"])); } #[test] /// Ensure the `exclude_set` flag (`-X`) overrides `-E`. fn exclude_set_overrides_include_set() { // pass `-E media` and `-X images` -- which should produce the equivalent of `-E audio,video` let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "media", "-X", "images"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should be set!"); let extensions = extensions.unwrap(); // ensure all of audio and video's extensions are here for &ext in ExtensionSet::Audio .extensions() .iter() .chain(ExtensionSet::Video.extensions().iter()) { assert!(extensions.contains(&ext), "Extensions should contain {}!", ext); } // ensure all of images' extensions are excluded for ext in ExtensionSet::Images.extensions() { assert!(!extensions.contains(&ext), "Extensions should not contain {}!", ext); } } #[test] /// Ensure that badly formed command line arguments are rejected. fn rejects_bad_args() { use assert_cmd::Command; let tests = [ // Non-existent flags: vec!["fif", "-abcdefghijklmnopqrstuvwxyz"], // `-E` without specifying a set: vec!["fif", "-E"], // `-E` with an invalid set: vec!["fif", "-E", "pebis"], // `-X` with an invalid set: vec!["fif", "-X", "pebis"], // `-e` with nothing but commas: vec!["fif", "-e", ",,,,,"], // `-x` with nothing but commas: vec!["fif", "-x", ",,,,,"], // `-j` with a negative value: vec!["fif", "-j", "-1"], // `--prompt` without `--fix`: vec!["fif", "--prompt", "always"], // `--overwrite` without `--fix`: vec!["fif", "--overwrite"], ]; for test in &tests { // first, try testing the flags against the Parameters struct... assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test); // ...then, make sure it actually works against the binary let mut cmd = Command::cargo_bin("fif").unwrap(); cmd.args(test).assert().failure(); } } #[test] /// Ensure that a few simple, well-formed command line argument cases pass fn accepts_good_args() { use assert_cmd::Command; // all of these commands pass either the version or help flag, ensuring that they won't fail for reasons relating // to filesystem access let tests = [ vec!["-V"], vec!["--version"], vec!["-E", "images", "--version"], vec!["-h"], vec!["--help"], vec!["dir_name", "--version"], ]; for test in &tests { let mut cmd = Command::cargo_bin("fif").unwrap(); cmd.args(test).assert().success(); } } #[test] /// Ensures that output from the `-V` and `--version` flags is formatted properly. fn check_version_output() { use std::string::String; use assert_cmd::Command; use regex::Regex; // test `-V` matches the format of "fif x.y.z" let mut cmd = Command::cargo_bin("fif").unwrap(); let output = cmd.arg("-V").ok().unwrap().stdout; let output = String::from_utf8(output).unwrap(); assert!( Regex::new(r#"fif v([0-9]\.){2}[0-9]"#).unwrap().is_match(output.trim()), "\"{}\" does not match the expected `-v` format!", output ); // test `--version` matches the format of "fif x.y.z (OS, example backend, commit #1234abc)" let mut cmd = Command::cargo_bin("fif").unwrap(); let output = cmd.arg("--version").ok().unwrap().stdout; let output = String::from_utf8(output).unwrap(); assert!( Regex::new(r#"fif v([0-9]\.){2}[0-9] \(.+, .+ backend, (unknown commit|commit #[[:xdigit:]]{7})\)"#) .unwrap() .is_match(output.trim()), "\"{}\" does not match the expected `--version` format!", output.trim() ); } #[test] /// Generate random series of bytes and try to identify them. This test makes no assertions and can only fail if the /// mime database somehow panics or hangs. fn identify_random_bytes() { use rand::RngCore; let mut rng = rand::thread_rng(); let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2]; let mut results: BTreeMap = BTreeMap::new(); for _ in 1..1000 { rng.fill_bytes(&mut bytes); if let Some(detected_type) = MIMEDB.get_type(&bytes) { *results.entry(detected_type).or_insert(0) += 1; } } for (mime, count) in &results { println!("{}:\t{} counts", mime, count); } println!("No type found:\t{} counts", 1000 - results.values().sum::()); } #[test] /// Ensure that, for a given file "wrong.bad", which should have extension "good", the shell output contains something /// like "mv wrong.bad wrong.good". fn outputs_move_commands() { use std::io::Read; // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file let findings = vec![Findings { file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, }]; for format in &["Shell", "PowerShell"] { let mut cursor = std::io::Cursor::new(Vec::new()); let mut contents = std::string::String::new(); match *format { "Shell" => Shell.write_all(&mut cursor, &findings, &[]), "PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]), _ => unreachable!(), } .expect("Failed to write to cursor"); cursor.set_position(0); cursor .read_to_string(&mut contents) .expect("Failed to read from cursor to string"); // the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg" assert!( contents.contains("misnamed_file.jpg") && contents.contains("misnamed_file.png"), "{} output doesn't contain move command!\n===\n{}", format, contents ); } } #[test] #[cfg(feature = "json")] /// Ensure JSON output is valid. fn test_json() { use std::io::Read; use crate::formats::Json; // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file let findings = vec![Findings { file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, }]; let mut cursor = std::io::Cursor::new(Vec::new()); let mut contents = std::string::String::new(); Json .write_all(&mut cursor, &findings, &[]) .expect("Failed to write to cursor"); cursor.set_position(0); cursor .read_to_string(&mut contents) .expect("Failed to read from cursor to string"); // the output should contain the file's MIME type assert!( contents.contains(IMAGE_JPEG.essence_str()), "JSON output doesn't contain move command!\n===\n{}", contents ); } #[test] /// Ensure that the Media extension set contains all (is a superset) of Audio, Video, and Images. fn media_contains_audio_video_images() { use crate::parameters::ExtensionSet::{Audio, Images, Media, Video}; let media_exts = Media.extensions(); // assert every extension in the audio/video/image sets is contained in the media set [Audio.extensions(), Video.extensions(), Images.extensions()] .concat() .into_iter() .for_each(|ext| assert!(media_exts.contains(&ext))); assert_eq!( Parameters::parse_from(&["fif", "-E", "media"]).extensions(), Parameters::parse_from(&["fif", "-E", "audio,video,images"]).extensions() ); } #[test] /// Ensure that the `writables!` and `writablesln!` macros produce the output they should. fn writables_is_correct() { use fif::formats::Writable; use fif::{writables, writablesln}; assert_eq!( &["henlo".into(), Path::new("henlo").into(), Writable::Newline,], writables!["henlo", (Path::new("henlo")), Newline] ); assert_eq!( &["henlo".into(), Path::new("henlo").into(), Writable::Newline, Writable::Newline], writablesln!["henlo", (Path::new("henlo")), Newline] ); } #[test] /// Test various combinations of verbosity flags. fn verbosity() { use log::LevelFilter; assert!( Parameters::try_parse_from(&["fif", "-q", "-v"]).is_err(), "Failed to reject usage of both -q and -v!" ); let expected_results = hashmap![ "-qqqqqqqq" => LevelFilter::Off, "-qqq" => LevelFilter::Off, "-qq" => LevelFilter::Error, "-q" => LevelFilter::Warn, "-s" => LevelFilter::Info, "-v" => LevelFilter::Debug, "-vv" => LevelFilter::Trace, "-vvv" => LevelFilter::Trace, "-vvvvvvvv" => LevelFilter::Trace, ]; for (flags, level) in expected_results { assert_eq!(Parameters::parse_from(&["fif", flags]).get_verbosity(), level); } } #[test] /// Ensures `os_name()`'s output is the same as [`std::env::consts::OS`], capitalisation notwithstanding fn validate_os_name() { assert_eq!(fif::utils::os_name().to_lowercase(), std::env::consts::OS.to_lowercase()); } #[test] /// Ensures that [`Findings`] are sorted properly. fn sort_findings() { let findings = vec![ Findings { file: Path::new("ccc").to_path_buf(), valid: false, mime: IMAGE_JPEG, }, Findings { file: Path::new("bbb.xyz").to_path_buf(), valid: true, mime: IMAGE_PNG, }, Findings { file: Path::new("aaa").to_path_buf(), valid: true, mime: APPLICATION_PDF, }, ]; let mut findings = findings.iter().sorted_unstable(); assert_eq!(findings.next().unwrap().file, Path::new("aaa")); assert_eq!(findings.next().unwrap().file, Path::new("bbb.xyz")); assert_eq!(findings.next().unwrap().file, Path::new("ccc")); assert_eq!(findings.next(), None); } #[test] #[cfg(not(all(target_endian = "big", target_pointer_width = "32")))] /// Ensures that [`SmartString`]s don't deviate from std's Strings fn validate_string_type() { use std::string::String as StdString; use fif::String as SmartString; assert_eq!(SmartString::new(), StdString::new()); assert_eq!(SmartString::from("smol"), StdString::from("smol")); assert_eq!( SmartString::from("A long and therefore heap-allocated string"), StdString::from("A long and therefore heap-allocated string") ); smartstring::validate(); }