support using either infer or xdg_mime for mime detection

i guess we can build for windows now
This commit is contained in:
Lynne Megido 2021-02-15 04:58:57 +10:00
parent bc4edecd21
commit 2431e3493a
Signed by: lynnesbian
GPG Key ID: F0A184B5213D9F90
5 changed files with 105 additions and 11 deletions

7
Cargo.lock generated
View File

@ -174,6 +174,7 @@ dependencies = [
"cached",
"clap",
"env_logger",
"infer",
"log",
"mime_guess",
"once_cell",
@ -235,6 +236,12 @@ dependencies = [
"hashbrown",
]
[[package]]
name = "infer"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8906512588cd815b8f759fd0ac11de2a84c985c0f792f70df611e9325c270c1f"
[[package]]
name = "lazy_static"
version = "1.4.0"

View File

@ -10,20 +10,23 @@ rust-version = "1.43.0" # cached requires 1.42.0
#license-file = "LICENSE"
[features]
default = ["mini-buffer", "multi-threaded"]
default = ["mini-buffer", "multi-threaded", "infer-backend"]
mini-buffer = []
multi-threaded = []
infer-backend = ["infer"]
xdg-mime-backend = ["xdg-mime"]
[dependencies]
walkdir = "2.3.1"
log = "0.4.14"
smartstring = "0.2.6"
# use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd"}
mime_guess = "2.0.3"
rayon = "1.5.0"
snailquote = "0.3.0"
once_cell = "1.5.2"
infer = { version = "0.3.4", optional = true }
# use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd", optional = true }
[dependencies.clap]
version = "3.0.0-beta.2"

View File

@ -6,7 +6,8 @@ use std::path::Path;
use cached::cached;
use mime_guess::Mime;
use smartstring::alias::String;
use xdg_mime::SharedMimeInfo;
use crate::mimedb::MimeDb;
// use log::{debug, warn};
@ -20,7 +21,7 @@ use xdg_mime::SharedMimeInfo;
// at least 265 bytes to identify a tar file.
const BUF_SIZE: usize = 512;
pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, > {
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>, > {
// attempt to read up to the BUF_SIZE bytes of the file
let mut buffer = [0; 64];
let mut file = File::open(path)?;
@ -31,7 +32,7 @@ pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, >
file.read(&mut buffer)?;
let r = db.get_mime_type_for_data(&buffer).map(|m| m.0);
let r = db.get_type(&buffer);
if r.is_some() {
return Ok(r);
}
@ -40,7 +41,7 @@ pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, >
file.seek(SeekFrom::Start(0))?;
file.read(&mut buffer)?;
// warn!("dang");
Ok(db.get_mime_type_for_data(&buffer).map(|m| m.0))
Ok(db.get_type(&buffer))
}
// TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime

View File

@ -26,6 +26,7 @@ use walkdir::{DirEntry, WalkDir};
use crate::findings::Findings;
use crate::formats::{Format, Script};
use crate::mimedb::MimeDb;
use crate::parameters::OutputFormat;
use crate::scanerror::ScanError;
@ -34,11 +35,15 @@ mod inspectors;
mod formats;
mod scanerror;
mod findings;
mod mimedb;
static MIMEDB: OnceCell<xdg_mime::SharedMimeInfo> = OnceCell::new();
#[cfg(feature = "infer-backend")]
static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new();
// TODO: test if this actually works on a windows machine - not there's much of a point right now, considering
// xdg-mime-rs doesn't support windows
#[cfg(feature = "xdg-mime-backend")]
static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new();
// TODO: test if this actually works on a windows machine
#[cfg(windows)]
fn is_hidden(entry: &DirEntry) -> bool {
use std::os::windows::prelude::*;
@ -154,7 +159,12 @@ fn main() {
// .target(env_logger::Target::Stdout) // log to stdout rather than stderr
.init();
MIMEDB.set(xdg_mime::SharedMimeInfo::new()).or(Err("Failed to initialise MIMEDB")).unwrap();
#[cfg(feature = "infer-backend")]
MIMEDB.set(mimedb::InferDb::init()).or(Err("Failed to initialise MIMEDB")).unwrap();
#[cfg(feature = "xdg-mime-backend")]
MIMEDB.set(mimedb::XdgDb::init()).or(Err("Failed to initialise MIMEDB")).unwrap();
debug!("Iterating directory: {:?}", args.dirs);
let stepper = WalkDir::new(&args.dirs).into_iter();

73
src/mimedb.rs Normal file
View File

@ -0,0 +1,73 @@
use std::str::FromStr;
use mime_guess::Mime;
pub trait MimeDb {
fn init() -> Self;
fn get_type(&self, data: &[u8]) -> Option<Mime>;
}
#[cfg(feature = "infer-backend")]
pub struct InferDb {
db: infer::Infer
}
#[cfg(feature = "infer-backend")]
impl MimeDb for InferDb {
fn init() -> Self {
let mut info = infer::Infer::new();
// add a random file type just to make sure adding works and such
info.add(
"image/jpeg2000",
".jp2",
|buf| {
buf.len() > 23
&& buf[0] == 0x00
&& buf[1] == 0x00
&& buf[2] == 0x00
&& buf[3] == 0x0C
&& buf[4] == 0x6A
&& buf[5] == 0x50
&& buf[6] == 0x20
&& buf[7] == 0x20
&& buf[8] == 0x0D
&& buf[9] == 0x0A
&& buf[10] == 0x87
&& buf[11] == 0x0A
&& buf[20] == 0x6A
&& buf[21] == 0x70
&& buf[22] == 0x32
&& buf[23] == 0x20
}
);
// unmut
let info = info;
Self { db: info }
}
fn get_type(&self, data: &[u8]) -> Option<Mime> {
self.db
.get(data)
.map(|f| Mime::from_str(f.mime_type()).unwrap())
}
}
#[cfg(feature = "xdg-mime-backend")]
pub struct XdgDb {
db: xdg_mime::SharedMimeInfo
}
#[cfg(feature = "xdg-mime-backend")]
impl MimeDb for XdgDb {
fn init() -> Self {
Self {
db: xdg_mime::SharedMimeInfo::new()
}
}
fn get_type(&self, data: &[u8]) -> Option<Mime> {
self.db.get_mime_type_for_data(&data).map(|m| m.0)
}
}