From 383f6a30f2b2b4df7b2a5fa7169819a932418250 Mon Sep 17 00:00:00 2001 From: Lynne Date: Wed, 14 Apr 2021 16:49:14 +1000 Subject: [PATCH] improved pre-OOXML office mime detection --- CHANGELOG.md | 1 + src/inspectors.rs | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65b965c..0fc25ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Dates are given in YYYY-MM-DD format. - Added Text extension set - More test coverage - Fixed a very minor output bug relating to scanning symlinked directories +- Better detection for a specific formats (pre-OOXML Office, EXE, DLL) ### v0.2.11 (2021-04-04) #### Features diff --git a/src/inspectors.rs b/src/inspectors.rs index e12e7c8..7925569 100644 --- a/src/inspectors.rs +++ b/src/inspectors.rs @@ -39,7 +39,12 @@ pub fn mime_type(db: &T, path: &Path) -> io::Result> { // another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures. // determining that a file is in one of the MS office formats in particular requires looking quite far into the // file. - && mime != &Mime::from_str("application/zip").unwrap()); + && mime != &Mime::from_str("application/zip").unwrap() + // doc/ppt/xls files are a subset of what's known as an "OLE2 compound document storage", at least according to + // shared-mime-info. if a pre-OOXML era MS office file is scanned and identified as x-ole-storage, reading further + // will allow it to be detected correctly as the appropriate filetype. + && mime != &Mime::from_str("application/x-ole-storage").unwrap() + ); if r.is_some() { return Ok(r); @@ -89,6 +94,8 @@ cached! { } } + + match exts { Some(exts) => { let possible_exts: Vec = exts.iter().map(|e| String::from(*e)).collect();