From 12d9001bb82d6914f64fe333c8a009f8891dfbcf Mon Sep 17 00:00:00 2001 From: Lynnesbian Date: Sun, 4 Apr 2021 22:42:34 +1000 Subject: [PATCH] better mime type detection - consider some/x-thing and some/thing to be identical - use a patched version of mime_guess with many more extension/type mappings --- CHANGELOG.md | 3 +++ Cargo.lock | 21 ++++++++++----------- Cargo.toml | 2 ++ src/inspectors.rs | 19 ++++++++++++++++++- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e40a80c..590cf0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Dates are given in YYYY-MM-DD format. [here]https://github.com/bodil/smartstring/blob/v0.2.6/src/config.rs#L101-L103 for why that was a problem in the first place) - Fixed broken tests +- Better mime type detection: + - Consider "some/x-thing" and "some/thing" to be identical + - Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension<->type mappings ### v0.2.10 (2021-03-26) - PowerShell support! diff --git a/Cargo.lock b/Cargo.lock index a4bbaa6..d18d079 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -284,9 +284,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.91" +version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8916b1f6ca17130ec6568feccee27c156ad12037880833a3b842a823236502e7" +checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714" [[package]] name = "log" @@ -305,9 +305,9 @@ checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" [[package]] name = "memoffset" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" +checksum = "f83fb6581e8ed1f85fd45c116db8405483899489e38406156c25eb743554361d" dependencies = [ "autocfg", ] @@ -320,9 +320,8 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" [[package]] name = "mime_guess" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2684d4c2e97d99848d30b324b00c8fcc7e5c897b7cbb5819b09e7c90e8baf212" +version = "2.0.4" +source = "git+https://github.com/Lynnesbian/mime_guess#679d3b8887d30bd43a83f162d61b7226675c7012" dependencies = [ "mime", "unicase", @@ -393,9 +392,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.24" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +checksum = "a152013215dca273577e18d2bf00fa862b89b24169fb78c4c95aeb07992c9cec" dependencies = [ "unicode-xid", ] @@ -550,9 +549,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "1.0.64" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fd9d1e9976102a03c542daa2eff1b43f9d72306342f3f8b3ed5fb8908195d6f" +checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 1970fcc..8e17f49 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,8 @@ smartstring = "0.2.6" [patch.crates-io] # use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" } +# forked version with many more mime types +mime_guess = {git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4"} [dependencies.clap] version = "3.0.0-beta.2" diff --git a/src/inspectors.rs b/src/inspectors.rs index a3067f5..980589c 100644 --- a/src/inspectors.rs +++ b/src/inspectors.rs @@ -72,7 +72,24 @@ cached! { // match on the mime's `essence_str` rather than the mime itself - mime_guess::get_mime_extensions ignores the type // suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. passing the essence_str // (which includes the suffix) fixes this. - match mime_guess::get_mime_extensions_str(mime.essence_str()) { + let essence = mime.essence_str(); + let mut exts = mime_guess::get_mime_extensions_str(essence); + if exts.is_none() { + // no matches :c + // mime_guess' database isn't exactly perfect... there are a lot of times where the db will return "some/x-thing" + // but mime_guess only understands "some/thing", or vice-versa. + // so, if there appear to be no extensions, try replacing "some/x-thing" with "some/thing", or "some/thing" with + // "some/x-thing". + if essence.contains("/x-") { + // replace e.g. "application/x-gzip" with "application/gzip" + exts = mime_guess::get_mime_extensions_str(&essence.replace("/x-", "/")); + } else { + // replace e.g. "video/mp2t" with "video/x-mp2t" + exts = mime_guess::get_mime_extensions_str(&essence.replace("/", "/x-")); + } + } + + match exts { Some(exts) => { let possible_exts: Vec = exts.iter().map(|e| String::from(*e)).collect();