replace cached dep with custom hashmap thing

i benchmarked it with hyperfine and in terms of performance it's pretty much identical, with a slight (fraction of a percent) advantage to my implementation
This commit is contained in:
Lynne Megido 2021-09-24 22:02:04 +10:00
parent 3d41183f1c
commit 97b0a6edaa
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
3 changed files with 86 additions and 70 deletions

View file

@ -16,6 +16,7 @@ files module, removed string module, etc.
"Features" heading into "Added" and "Changed" sections, renaming "Bugfixes" to "Fixed", and removing the "Features" heading into "Added" and "Changed" sections, renaming "Bugfixes" to "Fixed", and removing the
headings that (pointlessly?) previously divided the changelog into v0.3, v0.2, and v0.1 headings that (pointlessly?) previously divided the changelog into v0.3, v0.2, and v0.1
- A few minor grammar tweaks and reorganisations - A few minor grammar tweaks and reorganisations
- Replaced [`cached`] dependency with a simple HashMap-backed store
## v0.3.6 - 2021-08-16 ## v0.3.6 - 2021-08-16
### Other ### Other

View file

@ -1,15 +1,16 @@
use std::collections::BTreeSet; use std::collections::{BTreeSet, HashMap};
use std::fs::File; use std::fs::File;
use std::io; use std::io;
use std::io::{Read, Seek, SeekFrom}; use std::io::{Read, Seek, SeekFrom};
use std::path::Path; use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use std::sync::RwLock;
use cached::cached;
use cfg_if::cfg_if; use cfg_if::cfg_if;
use log::{debug, error}; use log::{debug, error};
use mime::Mime; use mime::Mime;
use mime_guess::from_ext; use mime_guess::from_ext;
use once_cell::sync::Lazy;
use walkdir::{DirEntry, WalkDir}; use walkdir::{DirEntry, WalkDir};
use crate::findings::{Findings, ScanError}; use crate::findings::{Findings, ScanError};
@ -17,6 +18,8 @@ use crate::mime_db::MimeDb;
use crate::parameters::ScanOpts; use crate::parameters::ScanOpts;
use crate::{String, MIMEDB}; use crate::{String, MIMEDB};
static MIMEXT: Lazy<RwLock<HashMap<String, Option<Vec<String>>>>> = Lazy::new(|| RwLock::new(HashMap::new()));
cfg_if! { cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {
/// Determines whether or not a file is hidden by checking its win32 file attributes. /// Determines whether or not a file is hidden by checking its win32 file attributes.
@ -255,18 +258,19 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
Ok(db.get_type(&buffer)) Ok(db.get_type(&buffer))
} }
cached! {
MIMEXT;
fn mime_extension_lookup(essence: String) -> Option<Vec<String>> = {
// Returns a list of known extensions for this mime type, if any. // Returns a list of known extensions for this mime type, if any.
// This function uses the [Mime]'s "essence" rather than the [Mime] itself - mime_guess::get_mime_extensions ignores // This function uses the [Mime]'s "essence" rather than the [Mime] itself - mime_guess::get_mime_extensions ignores
// the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the // the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
// essence_str (which includes the suffix) fixes this. // essence_str (which includes the suffix) fixes this.
// ↑ this is supposed to be a doc comment, but the cached! macro doesn't support that... i would switch to the pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
// proc_macro version of cached, but it has a huge number of deps :c if let Ok(cache) = MIMEXT.read() {
if let Some(exts) = cache.get(&essence) {
return exts.clone();
}
}
let essence = essence.as_str(); let essence = essence;
let mut exts = mime_guess::get_mime_extensions_str(essence); let mut exts = mime_guess::get_mime_extensions_str(essence.as_str());
if exts.is_none() { if exts.is_none() {
// no matches :c // no matches :c
// mime_guess' database isn't exactly perfect... there are a lot of times where the db will return "some/x-thing" // mime_guess' database isn't exactly perfect... there are a lot of times where the db will return "some/x-thing"
@ -282,7 +286,7 @@ cached! {
} }
} }
match exts { let exts = match exts {
Some(exts) => { Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect(); let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
@ -291,7 +295,6 @@ cached! {
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can // far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif. // add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat() [vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" { } else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should // a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml". // (in my opinion) be "xml".
@ -304,15 +307,21 @@ cached! {
// "application/xhtml+xml", let it through - in other words, if it's identified as application/xml, but its // "application/xhtml+xml", let it through - in other words, if it's identified as application/xml, but its
// extension is classed as application/*+xml, consider it OK // extension is classed as application/*+xml, consider it OK
[vec![String::from("xml"), String::from("svg")], possible_exts].concat() [vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if essence == "application/msword" { } else if essence == "application/msword" {
// classic office files considered harmful // classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")] vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" { } else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly... // neither xdg-mime nor infer seem to be able to detect office XML files properly...
[vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], possible_exts].concat() [
vec![
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
],
possible_exts,
]
.concat()
} else if essence == "application/x-ms-dos-executable" { } else if essence == "application/x-ms-dos-executable" {
// both .dll and .exe files are given the same mime type... but you definitely don't want to rename one to the // both .dll and .exe files are given the same mime type... but you definitely don't want to rename one to the
// other! // other!
@ -320,8 +329,14 @@ cached! {
} else { } else {
possible_exts possible_exts
}) })
}, }
None => None None => None,
} };
if let Ok(mut cache) = MIMEXT.write() {
cache.insert(essence, exts.clone());
exts
} else {
unreachable!()
} }
} }