2021-02-06 21:48:31 +10:00 · 2021-02-06 21:48:31 +10:00 · 8fc3f18466
commit 8fc3f18466
parent 6d49336e6b
6 changed files with 28 additions and 5 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,4 +2,5 @@
 /imgs
 fif_*
 /old
+/awful
 *.sh
--- a/.idea/fif.iml
+++ b/.idea/fif.iml
@ -6,6 +6,7 @@
      <excludeFolder url="file://$MODULE_DIR$/target" />
      <excludeFolder url="file://$MODULE_DIR$/imgs" />
      <excludeFolder url="file://$MODULE_DIR$/old" />
+      <excludeFolder url="file://$MODULE_DIR$/awful" />
    </content>
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
--- a/BIN
+++ b/BIN
--- a/src/formats.rs
+++ b/src/formats.rs
--- a/src/inspectors.rs
+++ b/src/inspectors.rs
@ -3,20 +3,41 @@ use std::path::Path;
 use std::io;
 use mime_guess::Mime;
 use std::fs::File;
-use std::io::Read;
+use std::io::{Read, Seek, SeekFrom};
 use smartstring::alias::String;
 use cached::proc_macro::cached;
+use log::{debug, warn};
+
+// from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest
+// buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131
+// bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes.
+// unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix
+// world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway,
+// so maybe it's fine...? maybe this should be configurable by the user? i don't know.
+// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that mime_type requires
+// at least 265 bytes to identify a tar file.
+const BUF_SIZE: usize = 512;

 pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result<Option<Mime>, > {
-	// attempt to read up to the 256 bytes of the file
-	let mut buffer = [0; 256];
+	// attempt to read up to the BUF_SIZE bytes of the file
+	let mut buffer = [0; 64];
 	let mut file = File::open(path)?;

-	// this can be ignored because it's perfectly okay if the file is less than 256 bytes long - we only care about the
+	// this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the
 	// first few bytes for the purpose of mime sniffing
 	#[allow(clippy::unused_io_amount)]
 	file.read(&mut buffer)?;

+
+	let r = db.get_mime_type_for_data(&buffer).map(|m| m.0);
+	if r.is_some() {
+		return Ok(r);
+	}
+
+	let mut buffer = [0; BUF_SIZE];
+	file.seek(SeekFrom::Start(0))?;
+	file.read(&mut buffer)?;
+	// warn!("dang");
 	Ok(db.get_mime_type_for_data(&buffer).map(|m| m.0))
 }
editor.table_modal.label.rows
editor.table_modal.label.columns