first read a smol chunk, if we can't ID the file, read BUF_SIZE
This commit is contained in:
parent
6d49336e6b
commit
8fc3f18466
6 changed files with 28 additions and 5 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -2,4 +2,5 @@
|
|||
/imgs
|
||||
fif_*
|
||||
/old
|
||||
/awful
|
||||
*.sh
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/imgs" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/old" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/awful" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
|
|
@ -26,4 +26,4 @@ default-features = false
|
|||
features = ["termcolor", "atty"]
|
||||
|
||||
[profile.release]
|
||||
lto = "thin"
|
||||
lto = "thin"
|
||||
|
|
BIN
chunked
Executable file
BIN
chunked
Executable file
Binary file not shown.
0
src/formats.rs
Normal file
0
src/formats.rs
Normal file
|
@ -3,20 +3,41 @@ use std::path::Path;
|
|||
use std::io;
|
||||
use mime_guess::Mime;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::io::{Read, Seek, SeekFrom};
|
||||
use smartstring::alias::String;
|
||||
use cached::proc_macro::cached;
|
||||
use log::{debug, warn};
|
||||
|
||||
// from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest
|
||||
// buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131
|
||||
// bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes.
|
||||
// unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix
|
||||
// world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway,
|
||||
// so maybe it's fine...? maybe this should be configurable by the user? i don't know.
|
||||
// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that mime_type requires
|
||||
// at least 265 bytes to identify a tar file.
|
||||
const BUF_SIZE: usize = 512;
|
||||
|
||||
pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, > {
|
||||
// attempt to read up to the 256 bytes of the file
|
||||
let mut buffer = [0; 256];
|
||||
// attempt to read up to the BUF_SIZE bytes of the file
|
||||
let mut buffer = [0; 64];
|
||||
let mut file = File::open(path)?;
|
||||
|
||||
// this can be ignored because it's perfectly okay if the file is less than 256 bytes long - we only care about the
|
||||
// this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the
|
||||
// first few bytes for the purpose of mime sniffing
|
||||
#[allow(clippy::unused_io_amount)]
|
||||
file.read(&mut buffer)?;
|
||||
|
||||
|
||||
let r = db.get_mime_type_for_data(&buffer).map(|m| m.0);
|
||||
if r.is_some() {
|
||||
return Ok(r);
|
||||
}
|
||||
|
||||
let mut buffer = [0; BUF_SIZE];
|
||||
file.seek(SeekFrom::Start(0))?;
|
||||
file.read(&mut buffer)?;
|
||||
// warn!("dang");
|
||||
Ok(db.get_mime_type_for_data(&buffer).map(|m| m.0))
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue