use std::collections::HashMap; use std::ffi::OsStr; use std::path::{Path, PathBuf}; use clap::Clap; use fif::files::{mime_extension_lookup, BUF_SIZE}; use fif::files::{scan_directory, scan_from_walkdir}; use fif::findings::Findings; use fif::formats::{Format, PowerShell, Shell}; use fif::mime_db::MimeDb; use fif::String; use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use crate::parameters::ExtensionSet; use crate::parameters::Parameters; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"; const PDF_BYTES: &[u8] = b"%PDF-"; const ZIP_BYTES: &[u8] = b"PK\x03\x04"; cfg_if::cfg_if! { if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] { fn get_mime_db() -> fif::mime_db::InferDb { fif::mime_db::InferDb::init() } } else { fn get_mime_db() -> fif::mime_db::XdgDb { fif::mime_db::XdgDb::init() } } } fn application_zip() -> Mime { use std::str::FromStr; Mime::from_str("application/zip").unwrap() } #[test] /// Ensure that `extension_from_path` successfully returns the extension from a set of paths. fn get_ext() { let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new(); ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt"))); ext_checks.insert(Path::new("test.zip"), Some(OsStr::new("zip"))); ext_checks.insert(Path::new("test.tar.gz"), Some(OsStr::new("gz"))); ext_checks.insert(Path::new("test."), Some(OsStr::new(""))); ext_checks.insert(Path::new("test"), None); ext_checks.insert(Path::new(".hidden"), None); for (path, ext) in ext_checks { assert_eq!(path.extension(), ext); } } #[test] /// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers. fn detect_type() { let db = get_mime_db(); assert_eq!(db.get_type(JPEG_BYTES), Some(IMAGE_JPEG)); assert_eq!(db.get_type(PNG_BYTES), Some(IMAGE_PNG)); assert_eq!(db.get_type(PDF_BYTES), Some(APPLICATION_PDF)); assert_eq!(db.get_type(ZIP_BYTES), Some(application_zip())); } #[test] /// Ensure that `mime_extension_lookup` works as expected, and that the set of extensions for JPEG, PNG, PDF, and ZIP /// contain "jpg", "png", "pdf", and "zip", respectively. fn recommend_ext() { assert!(mime_extension_lookup(IMAGE_JPEG.essence_str().into()) .unwrap() .contains(&String::from("jpg"))); assert!(mime_extension_lookup(IMAGE_PNG.essence_str().into()) .unwrap() .contains(&String::from("png"))); assert!(mime_extension_lookup(APPLICATION_PDF.essence_str().into()) .unwrap() .contains(&String::from("pdf"))); assert!(mime_extension_lookup(application_zip().essence_str().into()) .unwrap() .contains(&String::from("zip"))); } #[test] /// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their /// associated mime types correctly deduced. fn simple_directory() { use std::borrow::Borrow; use std::env::set_current_dir; use std::fs::{canonicalize, File}; use std::io::Write; use tempfile::tempdir; use crate::parameters::ScanOpts; // set of files to scan. all but the last files have magic numbers corresponding to their extension, except for // "wrong.jpg", which is actually a png. let mut files = HashMap::new(); files.insert("test.jpg", JPEG_BYTES); files.insert("test.jpeg", JPEG_BYTES); files.insert("test.png", PNG_BYTES); files.insert("test.pdf", PDF_BYTES); files.insert("test.zip", ZIP_BYTES); files.insert("wrong.jpg", PNG_BYTES); files.insert("ignore.fake_ext", ZIP_BYTES); let dir = tempdir().expect("Failed to create temporary directory."); set_current_dir(dir.path()).expect("Failed to change directory."); for (name, bytes) in &files { let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name)); file .write_all(bytes) .expect(&*format!("Failed to write to file: {}", name)); drop(file); } let scan_opts = ScanOpts { hidden: true, extensionless: false, follow_symlinks: false, ignore_unknown_exts: true, }; let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed."); // there should be one file missing: "ignore.fake_ext" assert_eq!(entries.len(), files.len() - 1); // initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present. crate::init_db(); let results = scan_from_walkdir(&entries, false); let canonical_results = scan_from_walkdir(&entries, true); assert_eq!(results.len(), canonical_results.len()); for (result, canonical_result) in results.iter().zip(canonical_results.iter()) { // there should be no IO errors during this test. any IO errors encountered are outside the scope of this test. let result = result.as_ref().expect("Error while scanning file"); let canonical_result = canonical_result.as_ref().expect("Error while scanning file"); // paths should be canonical assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file); if !result.valid { // the only invalid file detected should be "wrong.jpg", which is a misnamed png file // 1. ensure detected extension is "jpg" assert_eq!(result.file.as_path().extension().unwrap(), OsStr::new("jpg")); // 2. ensure detected mime type is IMAGE_PNG assert_eq!(result.mime, IMAGE_PNG); // 3. ensure the recommended extension for "wrong.jpg" is "png" assert_eq!(&result.recommended_extension().unwrap(), &String::from("png")); continue; } // check if the recommended extension for this file is in the list of known extensions for its mimetype - for // example, if the file is determined to be an IMAGE_PNG, its recommended extension should be one of the extensions // returned by `mime_extension_lookup(IMAGE_PNG)`. assert!(mime_extension_lookup(result.mime.essence_str().into()) .unwrap() .contains(&result.recommended_extension().unwrap())); // make sure the guessed mimetype is correct based on the extension of the scanned file // because we already know that the extensions match the mimetype (as we created these files ourselves earlier in // the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc. let ext = result.file.as_path().extension().unwrap(); assert_eq!( result.mime, match ext.to_string_lossy().borrow() { "jpg" | "jpeg" => IMAGE_JPEG, "png" => IMAGE_PNG, "pdf" => APPLICATION_PDF, "zip" => application_zip(), _ => APPLICATION_OCTET_STREAM, // general "fallback" type }, "Incorrect MIME type detected - got {:?} for a {:?} file", result.mime, ext ); } } #[test] /// Ensure that command line argument parsing works correctly - flags are interpreted, booleans are set, and so on. fn argument_parsing() { use crate::parameters::ScanOpts; // pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]); // check if "jpg" is in the list of extensions to be scanned assert!( args .extensions() .expect("args.extensions() should be Some(_)!") .contains(&"jpg"), "args.extensions() should contain the `images` set!" ); // make sure "scan_hidden" is false assert!(!args.scan_hidden); // exts should be none assert!(args.exts.is_none()); // there shouldn't be any excluded extensions assert!(args.excluded_extensions().is_none()); // get the ScanOpts, and make sure they match expectations assert_eq!( args.get_scan_opts(), ScanOpts { hidden: false, extensionless: false, follow_symlinks: true, ignore_unknown_exts: false, }, "ScanOpts are incorrect" ); } #[test] /// Ensure that `fif -e jpg dir` is interpreted as "scan for jpg files in dir" and not "scan for jpg and dir files" fn positional_args() { for flag in &["-x", "-e", "-X", "-E"] { assert_eq!( Parameters::parse_from(vec!["fif", flag, "images", "directory"]).dir, PathBuf::from("directory") ); } } #[test] /// Ensure the `exclude` flag (`-x`) overrides `-e` and `-E`. fn exclude_overrides() { // pass `-E images`, which includes many image extensions, and `-x jpg,png`, which should remove "jpg" and "png" from // the extensions list let args: Parameters = Parameters::parse_from(vec!["fif", "-x", "jpg,png", "-E", "images"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should contain the `images` set!"); let extensions = extensions.unwrap(); assert!(!extensions.contains(&"jpg"), "\"jpg\" should be excluded!"); assert!(!extensions.contains(&"png"), "\"png\" should be excluded!"); assert!(extensions.contains(&"jpeg"), "\"jpeg\" should be included!"); // pass `-e abc,def,ghi,jkl` and `-x abc,def` -- extensions() should only contain "ghi" and "jkl" let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "abc,def,ghi,jkl", "-x", "abc,def"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should be set!"); let extensions = extensions.unwrap(); assert!(!extensions.contains(&"abc")); assert!(!extensions.contains(&"def")); assert!(extensions.contains(&"ghi")); assert!(extensions.contains(&"jkl")); } #[test] /// Ensure the `exclude_set` flag (`-X`) overrides `-e`. fn exclude_set_overrides_includes() { // pass `-e jpg,flac` and `-X images` -- which should produce the equivalent of `-e flag` let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "jpg,flac", "-X", "images"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should be set!"); let mut extensions = extensions.unwrap().into_iter(); assert_eq!(extensions.next(), Some("flac"), "Extensions should contain flac!"); assert_eq!(extensions.next(), None, "Too many extensions!"); } #[test] /// Ensure the `exclude_set` flag (`-X`) overrides `-E`. fn exclude_set_overrides_include_set() { // pass `-E media` and `-X images` -- which should produce the equivalent of `-E audio,video` let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "media", "-X", "images"]); let extensions = args.extensions(); assert!(extensions.is_some(), "Extensions should be set!"); let extensions = extensions.unwrap(); // ensure all of audio and video's extensions are here for &ext in ExtensionSet::Audio .extensions() .iter() .chain(ExtensionSet::Video.extensions().iter()) { assert!(extensions.contains(&ext), "Extensions should contain {}!", ext); } // ensure all of images' extensions are excluded for ext in ExtensionSet::Images.extensions() { assert!(!extensions.contains(&ext), "Extensions should not contain {}!", ext); } } #[test] /// Ensure that badly formed command line arguments are rejected. fn rejects_bad_args() { let tests = [ // Non-existent flags: vec!["fif", "-abcdefghijklmnopqrstuvwxyz"], // `-E` without specifying a set: vec!["fif", "-E"], // `-E` with an invalid set: vec!["fif", "-E", "pebis"], // `-X` with an invalid set: vec!["fif", "-X", "pebis"], // `-e` with nothing but commas: vec!["fif", "-e", ",,,,,"], ]; for test in &tests { assert!(Parameters::try_parse_from(test).is_err(), "Failed to reject {:?}", test); } } #[test] /// Generate random series of bytes and try to identify them. This test makes no assertions and can only fail if the /// mime database somehow panics or hangs. fn identify_random_bytes() { use rand::RngCore; let db = get_mime_db(); let mut rng = rand::thread_rng(); let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2]; let mut results: HashMap = HashMap::new(); for _ in 1..1000 { rng.fill_bytes(&mut bytes); if let Some(detected_type) = db.get_type(&bytes) { *results.entry(detected_type).or_insert(0) += 1; } } for (mime, count) in &results { println!("{}:\t{} counts", mime, count); } println!("No type found:\t{} counts", 1000 - results.values().sum::()); } #[test] /// Ensure that, for a given file "wrong.bad", which should have extension "good", the shell output contains something /// like "mv wrong.bad wrong.good". fn outputs_move_commands() { use std::io::Read; // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file let entries = vec![Ok(Findings { file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, })]; for format in &["Shell", "PowerShell"] { let mut cursor = std::io::Cursor::new(Vec::new()); let mut contents = std::string::String::new(); match *format { "Shell" => Shell.write_all(&mut cursor, &entries), "PowerShell" => PowerShell.write_all(&mut cursor, &entries), _ => unreachable!(), } .expect("Failed to write to cursor"); cursor.set_position(0); cursor .read_to_string(&mut contents) .expect("Failed to read from cursor to string"); // the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg" assert!( contents.contains("misnamed_file.jpg") && contents.contains("misnamed_file.png"), "{} output doesn't contain move command!\n===\n{}", format, contents ); } } #[test] #[cfg(feature = "json")] /// Ensure JSON output is valid. fn test_json() { use std::io::Read; use crate::formats::Json; // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file let entries = vec![Ok(Findings { file: Path::new("misnamed_file.png").to_path_buf(), valid: false, mime: IMAGE_JPEG, })]; let mut cursor = std::io::Cursor::new(Vec::new()); let mut contents = std::string::String::new(); Json .write_all(&mut cursor, &entries) .expect("Failed to write to cursor"); cursor.set_position(0); cursor .read_to_string(&mut contents) .expect("Failed to read from cursor to string"); // the output should contain the file's mime type assert!( contents.contains(IMAGE_JPEG.essence_str()), "JSON output doesn't contain move command!\n===\n{}", contents ); } #[test] /// Ensure that the Media extension set contains all (is a superset) of Audio, Video, and Images. fn media_contains_audio_video_images() { use crate::parameters::ExtensionSet::{Audio, Images, Media, Video}; let media_exts = Media.extensions(); // assert every extension in the audio/video/image sets is contained in the media set [Audio.extensions(), Video.extensions(), Images.extensions()] .concat() .into_iter() .for_each(|ext| assert!(media_exts.contains(&ext))); assert_eq!( Parameters::parse_from(&["fif", "-E", "media"]).extensions(), Parameters::parse_from(&["fif", "-E", "audio,video,images"]).extensions() ); } #[test] /// Ensure that the `writables!` macro produces the output it should. fn writables_is_correct() { use fif::formats::Writable; use fif::writables; assert_eq!( &["henlo".into(), Path::new("henlo").into(), Writable::Newline,], writables!["henlo", (Path::new("henlo")), Newline] ); } #[test] /// Test various combinations of verbosity flags. fn verbosity() { use log::LevelFilter; assert!( Parameters::try_parse_from(&["fif", "-q", "-v"]).is_err(), "Failed to reject usage of both -q and -v!" ); let mut expected_results = HashMap::new(); expected_results.insert("-qqqqqqqq", LevelFilter::Off); expected_results.insert("-qqq", LevelFilter::Off); expected_results.insert("-qq", LevelFilter::Error); expected_results.insert("-q", LevelFilter::Warn); expected_results.insert("-s", LevelFilter::Info); expected_results.insert("-v", LevelFilter::Debug); expected_results.insert("-vv", LevelFilter::Trace); expected_results.insert("-vvv", LevelFilter::Trace); expected_results.insert("-vvvvvvvv", LevelFilter::Trace); for (flags, level) in expected_results { assert_eq!(Parameters::parse_from(&["fif", flags]).default_verbosity(), level); } } #[test] /// Ensures that smart strings don't deviate from std's Strings fn validate_string_type() { use std::string::String as StdString; use fif::String as SmartString; assert_eq!(SmartString::new(), StdString::new()); assert_eq!(SmartString::from("smol"), StdString::from("smol")); assert_eq!( SmartString::from("A long and therefore heap-allocated string"), StdString::from("A long and therefore heap-allocated string") ); // uncomment if i ever update to smartstring >= 0.2.9 // smartstring::validate(); }