support using either infer or xdg_mime for mime detection

i guess we can build for windows now
This commit is contained in:
Lynne Megido 2021-02-15 04:58:57 +10:00
parent bc4edecd21
commit 2431e3493a
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
5 changed files with 105 additions and 11 deletions

7
Cargo.lock generated
View file

@ -174,6 +174,7 @@ dependencies = [
"cached", "cached",
"clap", "clap",
"env_logger", "env_logger",
"infer",
"log", "log",
"mime_guess", "mime_guess",
"once_cell", "once_cell",
@ -235,6 +236,12 @@ dependencies = [
"hashbrown", "hashbrown",
] ]
[[package]]
name = "infer"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8906512588cd815b8f759fd0ac11de2a84c985c0f792f70df611e9325c270c1f"
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.4.0" version = "1.4.0"

View file

@ -10,20 +10,23 @@ rust-version = "1.43.0" # cached requires 1.42.0
#license-file = "LICENSE" #license-file = "LICENSE"
[features] [features]
default = ["mini-buffer", "multi-threaded"] default = ["mini-buffer", "multi-threaded", "infer-backend"]
mini-buffer = [] mini-buffer = []
multi-threaded = [] multi-threaded = []
infer-backend = ["infer"]
xdg-mime-backend = ["xdg-mime"]
[dependencies] [dependencies]
walkdir = "2.3.1" walkdir = "2.3.1"
log = "0.4.14" log = "0.4.14"
smartstring = "0.2.6" smartstring = "0.2.6"
# use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd"}
mime_guess = "2.0.3" mime_guess = "2.0.3"
rayon = "1.5.0" rayon = "1.5.0"
snailquote = "0.3.0" snailquote = "0.3.0"
once_cell = "1.5.2" once_cell = "1.5.2"
infer = { version = "0.3.4", optional = true }
# use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd", optional = true }
[dependencies.clap] [dependencies.clap]
version = "3.0.0-beta.2" version = "3.0.0-beta.2"

View file

@ -6,7 +6,8 @@ use std::path::Path;
use cached::cached; use cached::cached;
use mime_guess::Mime; use mime_guess::Mime;
use smartstring::alias::String; use smartstring::alias::String;
use xdg_mime::SharedMimeInfo;
use crate::mimedb::MimeDb;
// use log::{debug, warn}; // use log::{debug, warn};
@ -20,7 +21,7 @@ use xdg_mime::SharedMimeInfo;
// at least 265 bytes to identify a tar file. // at least 265 bytes to identify a tar file.
const BUF_SIZE: usize = 512; const BUF_SIZE: usize = 512;
pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, > { pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>, > {
// attempt to read up to the BUF_SIZE bytes of the file // attempt to read up to the BUF_SIZE bytes of the file
let mut buffer = [0; 64]; let mut buffer = [0; 64];
let mut file = File::open(path)?; let mut file = File::open(path)?;
@ -31,7 +32,7 @@ pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, >
file.read(&mut buffer)?; file.read(&mut buffer)?;
let r = db.get_mime_type_for_data(&buffer).map(|m| m.0); let r = db.get_type(&buffer);
if r.is_some() { if r.is_some() {
return Ok(r); return Ok(r);
} }
@ -40,7 +41,7 @@ pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, >
file.seek(SeekFrom::Start(0))?; file.seek(SeekFrom::Start(0))?;
file.read(&mut buffer)?; file.read(&mut buffer)?;
// warn!("dang"); // warn!("dang");
Ok(db.get_mime_type_for_data(&buffer).map(|m| m.0)) Ok(db.get_type(&buffer))
} }
// TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime // TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime

View file

@ -26,6 +26,7 @@ use walkdir::{DirEntry, WalkDir};
use crate::findings::Findings; use crate::findings::Findings;
use crate::formats::{Format, Script}; use crate::formats::{Format, Script};
use crate::mimedb::MimeDb;
use crate::parameters::OutputFormat; use crate::parameters::OutputFormat;
use crate::scanerror::ScanError; use crate::scanerror::ScanError;
@ -34,11 +35,15 @@ mod inspectors;
mod formats; mod formats;
mod scanerror; mod scanerror;
mod findings; mod findings;
mod mimedb;
static MIMEDB: OnceCell<xdg_mime::SharedMimeInfo> = OnceCell::new(); #[cfg(feature = "infer-backend")]
static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new();
// TODO: test if this actually works on a windows machine - not there's much of a point right now, considering #[cfg(feature = "xdg-mime-backend")]
// xdg-mime-rs doesn't support windows static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new();
// TODO: test if this actually works on a windows machine
#[cfg(windows)] #[cfg(windows)]
fn is_hidden(entry: &DirEntry) -> bool { fn is_hidden(entry: &DirEntry) -> bool {
use std::os::windows::prelude::*; use std::os::windows::prelude::*;
@ -154,7 +159,12 @@ fn main() {
// .target(env_logger::Target::Stdout) // log to stdout rather than stderr // .target(env_logger::Target::Stdout) // log to stdout rather than stderr
.init(); .init();
MIMEDB.set(xdg_mime::SharedMimeInfo::new()).or(Err("Failed to initialise MIMEDB")).unwrap(); #[cfg(feature = "infer-backend")]
MIMEDB.set(mimedb::InferDb::init()).or(Err("Failed to initialise MIMEDB")).unwrap();
#[cfg(feature = "xdg-mime-backend")]
MIMEDB.set(mimedb::XdgDb::init()).or(Err("Failed to initialise MIMEDB")).unwrap();
debug!("Iterating directory: {:?}", args.dirs); debug!("Iterating directory: {:?}", args.dirs);
let stepper = WalkDir::new(&args.dirs).into_iter(); let stepper = WalkDir::new(&args.dirs).into_iter();

73
src/mimedb.rs Normal file
View file

@ -0,0 +1,73 @@
use std::str::FromStr;
use mime_guess::Mime;
pub trait MimeDb {
fn init() -> Self;
fn get_type(&self, data: &[u8]) -> Option<Mime>;
}
#[cfg(feature = "infer-backend")]
pub struct InferDb {
db: infer::Infer
}
#[cfg(feature = "infer-backend")]
impl MimeDb for InferDb {
fn init() -> Self {
let mut info = infer::Infer::new();
// add a random file type just to make sure adding works and such
info.add(
"image/jpeg2000",
".jp2",
|buf| {
buf.len() > 23
&& buf[0] == 0x00
&& buf[1] == 0x00
&& buf[2] == 0x00
&& buf[3] == 0x0C
&& buf[4] == 0x6A
&& buf[5] == 0x50
&& buf[6] == 0x20
&& buf[7] == 0x20
&& buf[8] == 0x0D
&& buf[9] == 0x0A
&& buf[10] == 0x87
&& buf[11] == 0x0A
&& buf[20] == 0x6A
&& buf[21] == 0x70
&& buf[22] == 0x32
&& buf[23] == 0x20
}
);
// unmut
let info = info;
Self { db: info }
}
fn get_type(&self, data: &[u8]) -> Option<Mime> {
self.db
.get(data)
.map(|f| Mime::from_str(f.mime_type()).unwrap())
}
}
#[cfg(feature = "xdg-mime-backend")]
pub struct XdgDb {
db: xdg_mime::SharedMimeInfo
}
#[cfg(feature = "xdg-mime-backend")]
impl MimeDb for XdgDb {
fn init() -> Self {
Self {
db: xdg_mime::SharedMimeInfo::new()
}
}
fn get_type(&self, data: &[u8]) -> Option<Mime> {
self.db.get_mime_type_for_data(&data).map(|m| m.0)
}
}