diff --git a/.gitignore b/.gitignore
index e811dce..aea0234 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
/imgs
fif_*
/old
+/awful
*.sh
diff --git a/.idea/fif.iml b/.idea/fif.iml
index 9ed97f6..3dddcd7 100644
--- a/.idea/fif.iml
+++ b/.idea/fif.iml
@@ -6,6 +6,7 @@
+
diff --git a/Cargo.toml b/Cargo.toml
index 788613f..58dd3e2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -26,4 +26,4 @@ default-features = false
features = ["termcolor", "atty"]
[profile.release]
-lto = "thin"
\ No newline at end of file
+lto = "thin"
diff --git a/chunked b/chunked
new file mode 100755
index 0000000..2840f3d
Binary files /dev/null and b/chunked differ
diff --git a/src/formats.rs b/src/formats.rs
new file mode 100644
index 0000000..e69de29
diff --git a/src/inspectors.rs b/src/inspectors.rs
index bd9b5ed..580fd09 100644
--- a/src/inspectors.rs
+++ b/src/inspectors.rs
@@ -3,20 +3,41 @@ use std::path::Path;
use std::io;
use mime_guess::Mime;
use std::fs::File;
-use std::io::Read;
+use std::io::{Read, Seek, SeekFrom};
use smartstring::alias::String;
use cached::proc_macro::cached;
+use log::{debug, warn};
+
+// from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest
+// buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131
+// bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes.
+// unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix
+// world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway,
+// so maybe it's fine...? maybe this should be configurable by the user? i don't know.
+// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that mime_type requires
+// at least 265 bytes to identify a tar file.
+const BUF_SIZE: usize = 512;
pub fn mime_type(db: &SharedMimeInfo, path: &Path) -> io::Result