From b48b8d5a656bc8f708e647177edac08c86fc8cdb Mon Sep 17 00:00:00 2001 From: Lynnesbian Date: Fri, 6 Aug 2021 23:33:42 +1000 Subject: [PATCH] new-mime-guess update necessitated minor changes --- CHANGELOG.md | 2 ++ Cargo.lock | 54 +++++++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 3 ++- src/findings.rs | 2 +- src/inspectors.rs | 8 +++---- src/mime_db.rs | 2 +- src/tests/mod.rs | 3 +-- test.py | 1 + 8 files changed, 64 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f98f72c..c12d72f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ Dates are given in YYYY-MM-DD format. #### Other - Refactored `formats.rs` - More accurate dependency versions in `Cargo.toml` to ensure that the MSRV stays supported +- Sister project (?) [`new-mime-guess`] is now v3.0.0, updated to 2018 edition Rust, and with some new file and MIME + types added ### v0.3.3 (2021-07-07) #### Features diff --git a/Cargo.lock b/Cargo.lock index f4b8bc5..89e864d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "arrayvec" version = "0.5.2" @@ -193,6 +195,7 @@ dependencies = [ "infer", "itertools", "log", + "mime", "new_mime_guess", "once_cell", "rand", @@ -344,11 +347,13 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" [[package]] name = "new_mime_guess" -version = "2.1.1" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "991337b97f81dff759c3edabb0bc01ceac92bff6f54852853824bbe1acd969f7" +checksum = "c8c8e990db7ba41ce2b4b3a38380a0f3385d61c55b3d4999d6e00389562a79a2" dependencies = [ "mime", + "phf", + "phf_codegen", "unicase", ] @@ -385,6 +390,45 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" +[[package]] +name = "phf" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b21b531851d1d86a90bd62dd79be87ce2d90b7bcb6afbae07813921d6156696a" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f8c56073a14f772740bd86231763732f7559635215bf75df9f26f1d713d99e0" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a68318426de33640f02be62b4ae8eb1261be2efbc337b60c54d845bf4484e0d9" +dependencies = [ + "siphasher", + "unicase", +] + [[package]] name = "ppv-lite86" version = "0.2.10" @@ -578,6 +622,12 @@ dependencies = [ "serde", ] +[[package]] +name = "siphasher" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "729a25c17d72b06c68cb47955d44fda88ad2d3e7d77e025663fdd69b93dd71a1" + [[package]] name = "smartstring" version = "0.2.7" diff --git a/Cargo.toml b/Cargo.toml index 2a729cc..b299661 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,7 +27,8 @@ json = ["serde", "serde_json"] [dependencies] walkdir = "2.3.2" log = "0.4.14" -mime_guess = { package = "new_mime_guess", version = "2.1.1" } +mime = "0.3.16" +mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "3.0.0" } snailquote = "0.3.0" once_cell = "1.8.0" rayon = { version = "1.5.0", optional = true } diff --git a/src/findings.rs b/src/findings.rs index de10c9e..64ad176 100644 --- a/src/findings.rs +++ b/src/findings.rs @@ -1,6 +1,6 @@ use std::path::{Path, PathBuf}; -use mime_guess::Mime; +use mime::Mime; use crate::inspectors::mime_extension_lookup; use crate::string_type::String; diff --git a/src/inspectors.rs b/src/inspectors.rs index d6c3632..c572653 100644 --- a/src/inspectors.rs +++ b/src/inspectors.rs @@ -7,7 +7,7 @@ use std::path::Path; use std::str::FromStr; use cached::cached; -use mime_guess::Mime; +use mime::Mime; use crate::mime_db::MimeDb; use crate::string_type::String; @@ -33,7 +33,7 @@ pub fn mime_type(db: &T, path: &Path) -> io::Result> { let r = db.get_type(&buffer).filter(|mime| // some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already // one such type is XML - there's many more specific types that can be determined by reading further (such as SVG) - mime != &mime_guess::mime::TEXT_XML + mime != &mime::TEXT_XML // another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures. // determining that a file is in one of the MS office formats in particular requires looking quite far into the // file. @@ -92,13 +92,13 @@ cached! { Some(exts) => { let possible_exts: Vec = exts.iter().map(|e| String::from(*e)).collect(); - Some(if essence == mime_guess::mime::IMAGE_JPEG.essence_str() { + Some(if essence == mime::IMAGE_JPEG.essence_str() { // possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are // far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can // add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif. [vec![String::from("jpg")], possible_exts].concat() - } else if essence == mime_guess::mime::TEXT_XML.essence_str() || essence == "application/xml" { + } else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" { // a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should // (in my opinion) be "xml". // there's also another problem: SVG files can easily be misidentified as XML files, because they usually diff --git a/src/mime_db.rs b/src/mime_db.rs index 0631534..ce9e8d9 100644 --- a/src/mime_db.rs +++ b/src/mime_db.rs @@ -1,7 +1,7 @@ //! Backend-neutral Mime database implementation. use cfg_if::cfg_if; -use mime_guess::Mime; +use mime::Mime; pub trait MimeDb { fn init() -> Self; diff --git a/src/tests/mod.rs b/src/tests/mod.rs index d8038fc..72dfcda 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -7,8 +7,7 @@ use crate::{scan_directory, scan_from_walkdir}; use crate::parameters::Parameters; use clap::Clap; -use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; -use mime_guess::Mime; +use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use crate::parameters::ExtensionSet; use std::collections::HashMap; diff --git a/test.py b/test.py index e83a9cf..e4bf3e3 100755 --- a/test.py +++ b/test.py @@ -54,6 +54,7 @@ def main(): sys.exit(2) print("Done! You might want to run cargo clean...") + subprocess.run(["du", "-sh", "target"]) if __name__ == "__main__": main()