diff --git a/.gitignore b/.gitignore index e811dce..aea0234 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ /imgs fif_* /old +/awful *.sh diff --git a/.idea/fif.iml b/.idea/fif.iml index 9ed97f6..3dddcd7 100644 --- a/.idea/fif.iml +++ b/.idea/fif.iml @@ -6,6 +6,7 @@ + diff --git a/Cargo.toml b/Cargo.toml index 788613f..58dd3e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,4 +26,4 @@ default-features = false features = ["termcolor", "atty"] [profile.release] -lto = "thin" \ No newline at end of file +lto = "thin" diff --git a/chunked b/chunked new file mode 100755 index 0000000..2840f3d Binary files /dev/null and b/chunked differ diff --git a/src/formats.rs b/src/formats.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/inspectors.rs b/src/inspectors.rs index bd9b5ed..580fd09 100644 --- a/src/inspectors.rs +++ b/src/inspectors.rs @@ -3,20 +3,41 @@ use std::path::Path; use std::io; use mime_guess::Mime; use std::fs::File; -use std::io::Read; +use std::io::{Read, Seek, SeekFrom}; use smartstring::alias::String; use cached::proc_macro::cached; +use log::{debug, warn}; + +// from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest +// buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131 +// bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes. +// unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix +// world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway, +// so maybe it's fine...? maybe this should be configurable by the user? i don't know. +// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that mime_type requires +// at least 265 bytes to identify a tar file. +const BUF_SIZE: usize = 512; pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result, > { - // attempt to read up to the 256 bytes of the file - let mut buffer = [0; 256]; + // attempt to read up to the BUF_SIZE bytes of the file + let mut buffer = [0; 64]; let mut file = File::open(path)?; - // this can be ignored because it's perfectly okay if the file is less than 256 bytes long - we only care about the + // this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the // first few bytes for the purpose of mime sniffing #[allow(clippy::unused_io_amount)] file.read(&mut buffer)?; + + let r = db.get_mime_type_for_data(&buffer).map(|m| m.0); + if r.is_some() { + return Ok(r); + } + + let mut buffer = [0; BUF_SIZE]; + file.seek(SeekFrom::Start(0))?; + file.read(&mut buffer)?; + // warn!("dang"); Ok(db.get_mime_type_for_data(&buffer).map(|m| m.0)) }