added text extension set, more test coverage, cargo update
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Lynne Megido 2021-04-08 23:33:33 +10:00
parent 0f2f408c09
commit d54cc8d6da
Signed by: lynnesbian
GPG Key ID: F0A184B5213D9F90
5 changed files with 61 additions and 7 deletions

View File

@ -7,6 +7,8 @@ Dates are given in YYYY-MM-DD format.
- Better documentation for command line arguments
- Added more stuff to test.py
- PKGBUILD for Arch-based distros
- Added Text extension set
- More test coverage
### v0.2.11 (2021-04-04)
#### Features

8
Cargo.lock generated
View File

@ -284,9 +284,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.92"
version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714"
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
[[package]]
name = "log"
@ -549,9 +549,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.68"
version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87"
checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb"
dependencies = [
"proc-macro2",
"quote",

View File

@ -67,6 +67,9 @@ fastrand = "1.4.0"
[profile.release]
lto = "thin"
[profile.test]
opt-level = 0
# optimise dependencies, even when producing debug builds
[profile.dev.package."*"]
opt-level = 3

View File

@ -14,6 +14,8 @@ pub enum ExtensionSet {
Media,
/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
Documents,
/// Extensions used for text file formats, such as `txt`, `toml`, `html`, etc.
Text,
/// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc.
Archives,
}
@ -34,9 +36,10 @@ impl ExtensionSet {
Self::Documents => vec![
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
],
Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(),
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
// somehow to extract extensions for compressed files from mime_guess?
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"],
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz"],
}
}
}

View File

@ -8,9 +8,12 @@ use cfg_if::cfg_if;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime;
use crate::findings::Findings;
use crate::formats::{Format, Script};
use std::borrow::Borrow;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::io::Read;
use std::path::Path;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
@ -149,7 +152,10 @@ fn simple_directory() {
"pdf" => APPLICATION_PDF,
"zip" => application_zip(),
_ => APPLICATION_OCTET_STREAM, // general "fallback" type
}
},
"Incorrect MIME type detected - got {:?} for a {:?} file",
result.mime,
ext.unwrap()
);
}
}
@ -180,7 +186,8 @@ fn argument_parsing() {
hidden: false,
extensionless: false,
follow_symlinks: true
}
},
"ScanOpts are incorrect"
)
}
@ -224,3 +231,42 @@ fn identify_random_bytes() {
}
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>())
}
#[test]
fn outputs_move_commands() {
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png"),
valid: false,
mime: IMAGE_JPEG,
})];
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
Script::new()
.write_all(&entries, &mut cursor)
.expect("Failed to write to cursor");
cursor.set_position(0);
cursor
.read_to_string(&mut contents)
.expect("Failed to read from cursor to string");
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
assert!(
contents.contains("misnamed_file.jpg"),
"Output doesn't contain move command!"
)
}
#[test]
fn media_contains_audio_video_images() {
use crate::extension_set::ExtensionSet::{Audio, Images, Media, Videos};
let media_exts = Media.extensions();
// assert every extension in the audio/video/image sets is contained in the media set
[Audio.extensions(), Videos.extensions(), Images.extensions()]
.concat()
.into_iter()
.for_each(|ext| assert!(media_exts.contains(&ext)));
}