fif/src/inspectors.rs

64 lines
2.4 KiB
Rust

use std::fs::File;
use std::io;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
use cached::cached;
use mime_guess::Mime;
use smartstring::alias::String;
use crate::mimedb::MimeDb;
// use log::{debug, warn};
// from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest
// buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131
// bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes.
// unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix
// world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway,
// so maybe it's fine...? maybe this should be configurable by the user? i don't know.
// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that mime_type requires
// at least 265 bytes to identify a tar file.
const BUF_SIZE: usize = 512;
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>, > {
// attempt to read up to the BUF_SIZE bytes of the file
let mut buffer = [0; 64];
let mut file = File::open(path)?;
// this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the
// first few bytes for the purpose of mime sniffing
#[allow(clippy::unused_io_amount)]
file.read(&mut buffer)?;
let r = db.get_type(&buffer);
if r.is_some() {
return Ok(r);
}
let mut buffer = [0; BUF_SIZE];
file.seek(SeekFrom::Start(0))?;
file.read(&mut buffer)?;
// warn!("dang");
Ok(db.get_type(&buffer))
}
// TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime
// object, pass a hash of it?
cached! {
MIMEXT;
fn mime_extension_lookup(mime: Mime) -> Option<Vec<String>> = {
if mime == mime_guess::mime::IMAGE_JPEG {
// jpeg files are given the primary extension "jpe", due to the extension list being stored in alphabetical order.
// to handle this particular case, return a custom vector consisting of just "jpg" and "jpeg".
return Some(vec![String::from("jpg"), String::from("jpeg")]);
}
match mime_guess::get_mime_extensions(&mime) { // get a list of possible extensions for this mime type
Some(exts) => Some(exts.iter().map(|e| String::from(*e)).collect()),
None => None
}
}
}