diff --git a/src/files.rs b/src/files.rs index fa7df63..2ad54ca 100644 --- a/src/files.rs +++ b/src/files.rs @@ -6,6 +6,7 @@ use std::collections::{BTreeSet, HashMap}; use std::fs::File; use std::io::{self, Read, Seek, SeekFrom}; +use std::ops::Deref; use std::path::Path; use std::str::FromStr; @@ -22,10 +23,25 @@ use crate::findings::{Findings, ScanError}; use crate::mime_db::MimeDb; use crate::parameters::ScanOpts; use crate::{String, MIMEDB}; +use crate::utils::APPLICATION_ZIP; /// Cache of mimetypes and their associated extensions, used by [`mime_extension_lookup()`] static MIMEXT: Lazy>>>> = Lazy::new(|| RwLock::new(HashMap::new())); +/// The number of bytes to read initially when identifying a file's MIME type. Used in the [`mime_type`] function. +/// +/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small +/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats +/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases. +pub const INITIAL_BUF_SIZE: usize = 128; + +/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes. Used in +/// the [`mime_type`] function. +pub const BUF_SIZE: usize = 8192; + +/// A [`Mime`] representing the "application/x-ole-storage" mimetype. +static APPLICATION_X_OLE_STORAGE: Lazy = Lazy::new(|| Mime::from_str("application/x-ole-storage").unwrap()); + cfg_if! { if #[cfg(windows)] { /// Determines whether or not a file is hidden by checking its win32 file attributes. @@ -233,16 +249,6 @@ pub fn scan_directory( } } -/// The number of bytes to read initially. -/// -/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small -/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats -/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases. -pub const INITIAL_BUF_SIZE: usize = 128; - -/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes. -pub const BUF_SIZE: usize = 8192; - /// Tries to identify the mimetype of a file from a given path. pub fn mime_type(db: &T, path: &Path) -> io::Result> { let mut buffer = [0; INITIAL_BUF_SIZE]; @@ -278,11 +284,11 @@ pub fn mime_type(db: &T, path: &Path) -> io::Result> { // another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures. // determining that a file is in one of the MS office formats in particular requires looking quite far into the // file. - && mime != &Mime::from_str("application/zip").unwrap() + && mime != APPLICATION_ZIP.deref() // doc/ppt/xls files are a subset of what's known as an "OLE2 compound document storage", at least according to // shared-mime-info. if a pre-OOXML era MS office file is scanned and identified as x-ole-storage, reading further // will allow it to be detected correctly as the appropriate filetype. - && mime != &Mime::from_str("application/x-ole-storage").unwrap()); + && mime != APPLICATION_X_OLE_STORAGE.deref()); if r.is_some() { return Ok(r);