Browse Source

new-mime-guess update necessitated minor changes

edition-2021
Lynne Megido 4 months ago
parent
commit
b48b8d5a65
Signed by: lynnesbian
GPG Key ID: F0A184B5213D9F90
  1. 2
      CHANGELOG.md
  2. 54
      Cargo.lock
  3. 3
      Cargo.toml
  4. 2
      src/findings.rs
  5. 8
      src/inspectors.rs
  6. 2
      src/mime_db.rs
  7. 3
      src/tests/mod.rs
  8. 1
      test.py

2
CHANGELOG.md

@ -6,6 +6,8 @@ Dates are given in YYYY-MM-DD format.
#### Other
- Refactored `formats.rs`
- More accurate dependency versions in `Cargo.toml` to ensure that the MSRV stays supported
- Sister project (?) [`new-mime-guess`] is now v3.0.0, updated to 2018 edition Rust, and with some new file and MIME
types added
### v0.3.3 (2021-07-07)
#### Features

54
Cargo.lock generated

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "arrayvec"
version = "0.5.2"
@ -193,6 +195,7 @@ dependencies = [
"infer",
"itertools",
"log",
"mime",
"new_mime_guess",
"once_cell",
"rand",
@ -344,11 +347,13 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
[[package]]
name = "new_mime_guess"
version = "2.1.1"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "991337b97f81dff759c3edabb0bc01ceac92bff6f54852853824bbe1acd969f7"
checksum = "c8c8e990db7ba41ce2b4b3a38380a0f3385d61c55b3d4999d6e00389562a79a2"
dependencies = [
"mime",
"phf",
"phf_codegen",
"unicase",
]
@ -385,6 +390,45 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85"
[[package]]
name = "phf"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b21b531851d1d86a90bd62dd79be87ce2d90b7bcb6afbae07813921d6156696a"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f8c56073a14f772740bd86231763732f7559635215bf75df9f26f1d713d99e0"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a68318426de33640f02be62b4ae8eb1261be2efbc337b60c54d845bf4484e0d9"
dependencies = [
"siphasher",
"unicase",
]
[[package]]
name = "ppv-lite86"
version = "0.2.10"
@ -578,6 +622,12 @@ dependencies = [
"serde",
]
[[package]]
name = "siphasher"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "729a25c17d72b06c68cb47955d44fda88ad2d3e7d77e025663fdd69b93dd71a1"
[[package]]
name = "smartstring"
version = "0.2.7"

3
Cargo.toml

@ -27,7 +27,8 @@ json = ["serde", "serde_json"]
[dependencies]
walkdir = "2.3.2"
log = "0.4.14"
mime_guess = { package = "new_mime_guess", version = "2.1.1" }
mime = "0.3.16"
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "3.0.0" }
snailquote = "0.3.0"
once_cell = "1.8.0"
rayon = { version = "1.5.0", optional = true }

2
src/findings.rs

@ -1,6 +1,6 @@
use std::path::{Path, PathBuf};
use mime_guess::Mime;
use mime::Mime;
use crate::inspectors::mime_extension_lookup;
use crate::string_type::String;

8
src/inspectors.rs

@ -7,7 +7,7 @@ use std::path::Path;
use std::str::FromStr;
use cached::cached;
use mime_guess::Mime;
use mime::Mime;
use crate::mime_db::MimeDb;
use crate::string_type::String;
@ -33,7 +33,7 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let r = db.get_type(&buffer).filter(|mime|
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
mime != &mime_guess::mime::TEXT_XML
mime != &mime::TEXT_XML
// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.
// determining that a file is in one of the MS office formats in particular requires looking quite far into the
// file.
@ -92,13 +92,13 @@ cached! {
Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
Some(if essence == mime_guess::mime::IMAGE_JPEG.essence_str() {
Some(if essence == mime::IMAGE_JPEG.essence_str() {
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime_guess::mime::TEXT_XML.essence_str() || essence == "application/xml" {
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually

2
src/mime_db.rs

@ -1,7 +1,7 @@
//! Backend-neutral Mime database implementation.
use cfg_if::cfg_if;
use mime_guess::Mime;
use mime::Mime;
pub trait MimeDb {
fn init() -> Self;

3
src/tests/mod.rs

@ -7,8 +7,7 @@ use crate::{scan_directory, scan_from_walkdir};
use crate::parameters::Parameters;
use clap::Clap;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime;
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use crate::parameters::ExtensionSet;
use std::collections::HashMap;

1
test.py

@ -54,6 +54,7 @@ def main():
sys.exit(2)
print("Done! You might want to run cargo clean...")
subprocess.run(["du", "-sh", "target"])
if __name__ == "__main__":
main()

Loading…
Cancel
Save