diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ecf3b4..13ab219 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ Dates are given in YYYY-MM-DD format. - Better documentation for command line arguments - Added more stuff to test.py - PKGBUILD for Arch-based distros +- Added Text extension set +- More test coverage ### v0.2.11 (2021-04-04) #### Features diff --git a/Cargo.lock b/Cargo.lock index a769684..2693f39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -284,9 +284,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714" +checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41" [[package]] name = "log" @@ -549,9 +549,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "1.0.68" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87" +checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 1d54139..f9c2329 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,9 @@ fastrand = "1.4.0" [profile.release] lto = "thin" +[profile.test] +opt-level = 0 + # optimise dependencies, even when producing debug builds [profile.dev.package."*"] opt-level = 3 diff --git a/src/extension_set.rs b/src/extension_set.rs index 27ffa77..377b914 100644 --- a/src/extension_set.rs +++ b/src/extension_set.rs @@ -14,6 +14,8 @@ pub enum ExtensionSet { Media, /// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc. Documents, + /// Extensions used for text file formats, such as `txt`, `toml`, `html`, etc. + Text, /// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc. Archives, } @@ -34,9 +36,10 @@ impl ExtensionSet { Self::Documents => vec![ "pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps", ], + Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(), // many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used // somehow to extract extensions for compressed files from mime_guess? - Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"], + Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz"], } } } diff --git a/src/tests/mod.rs b/src/tests/mod.rs index cd90891..a67a7fd 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -8,9 +8,12 @@ use cfg_if::cfg_if; use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use mime_guess::Mime; +use crate::findings::Findings; +use crate::formats::{Format, Script}; use std::borrow::Borrow; use std::collections::HashMap; use std::ffi::OsStr; +use std::io::Read; use std::path::Path; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; @@ -149,7 +152,10 @@ fn simple_directory() { "pdf" => APPLICATION_PDF, "zip" => application_zip(), _ => APPLICATION_OCTET_STREAM, // general "fallback" type - } + }, + "Incorrect MIME type detected - got {:?} for a {:?} file", + result.mime, + ext.unwrap() ); } } @@ -180,7 +186,8 @@ fn argument_parsing() { hidden: false, extensionless: false, follow_symlinks: true - } + }, + "ScanOpts are incorrect" ) } @@ -224,3 +231,42 @@ fn identify_random_bytes() { } println!("No type found:\t{} counts", 500 - results.values().sum::()) } + +#[test] +fn outputs_move_commands() { + // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file + let entries = vec![Ok(Findings { + file: Path::new("misnamed_file.png"), + valid: false, + mime: IMAGE_JPEG, + })]; + + let mut cursor = std::io::Cursor::new(Vec::new()); + let mut contents = std::string::String::new(); + + Script::new() + .write_all(&entries, &mut cursor) + .expect("Failed to write to cursor"); + cursor.set_position(0); + cursor + .read_to_string(&mut contents) + .expect("Failed to read from cursor to string"); + + // the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg" + assert!( + contents.contains("misnamed_file.jpg"), + "Output doesn't contain move command!" + ) +} + +#[test] +fn media_contains_audio_video_images() { + use crate::extension_set::ExtensionSet::{Audio, Images, Media, Videos}; + let media_exts = Media.extensions(); + + // assert every extension in the audio/video/image sets is contained in the media set + [Audio.extensions(), Videos.extensions(), Images.extensions()] + .concat() + .into_iter() + .for_each(|ext| assert!(media_exts.contains(&ext))); +}