use std::fs::File; use std::io; use std::io::{Read, Seek, SeekFrom}; use std::path::Path; use cached::cached; use mime_guess::Mime; use smartstring::alias::String; use crate::mimedb::MimeDb; // use log::{debug, warn}; // from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest // buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131 // bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes. // unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix // world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway, // so maybe it's fine...? maybe this should be configurable by the user? i don't know. // empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that mime_type requires // at least 265 bytes to identify a tar file. const BUF_SIZE: usize = 512; pub fn mime_type(db: &T, path: &Path) -> io::Result, > { // attempt to read up to the BUF_SIZE bytes of the file let mut buffer = [0; 64]; let mut file = File::open(path)?; // this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the // first few bytes for the purpose of mime sniffing #[allow(clippy::unused_io_amount)] file.read(&mut buffer)?; let r = db.get_type(&buffer); if r.is_some() { return Ok(r); } let mut buffer = [0; BUF_SIZE]; file.seek(SeekFrom::Start(0))?; file.read(&mut buffer)?; // warn!("dang"); Ok(db.get_type(&buffer)) } // TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime // object, pass a hash of it? cached! { MIMEXT; fn mime_extension_lookup(mime: Mime) -> Option> = { if mime == mime_guess::mime::IMAGE_JPEG { // jpeg files are given the primary extension "jpe", due to the extension list being stored in alphabetical order. // to handle this particular case, return a custom vector consisting of just "jpg" and "jpeg". return Some(vec![String::from("jpg"), String::from("jpeg")]); } match mime_guess::get_mime_extensions(&mime) { // get a list of possible extensions for this mime type Some(exts) => Some(exts.iter().map(|e| String::from(*e)).collect()), None => None } } }