2021-02-22 02:08:08 +10:00 · 2021-02-22 02:02:27 +10:00 · 2021-02-22 01:55:27 +10:00 · 2021-02-22 00:46:51 +10:00 · 2021-02-22 00:20:17 +10:00 · 2021-02-22 00:15:09 +10:00
13 changed files with 377 additions and 126 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,6 @@
 /imgs
 fif_*
 /old
 /awful
 *.sh
 !clippy.sh
 cargo-timing*.html
--- a/Cargo.lock
+++ b/Cargo.lock
@ -39,6 +39,12 @@ dependencies = [
 "once_cell",
 ]
 [[package]]
 name = "cc"
 version = "1.0.67"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
@ -77,12 +83,6 @@ dependencies = [
 "syn",
 ]
 [[package]]
 name = "const_fn"
 version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6"
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.0"
@ -106,27 +106,28 @@ dependencies = [
 [[package]]
 name = "crossbeam-epoch"
-version = "0.9.1"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d"
+checksum = "d60ab4a8dba064f2fbb5aa270c28da5cf4bbd0e72dae1140a6b0353a779dbe00"
 dependencies = [
 "cfg-if",
 "const_fn",
 "crossbeam-utils",
 "lazy_static",
 "loom",
 "memoffset",
 "scopeguard",
 ]
 [[package]]
 name = "crossbeam-utils"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d"
+checksum = "bae8f328835f8f5a6ceb6a7842a7f2d0c03692adb5c889347235d59194731fe3"
 dependencies = [
 "autocfg",
 "cfg-if",
 "lazy_static",
 "loom",
 ]
 [[package]]
@ -167,13 +168,20 @@ dependencies = [
 "termcolor",
 ]
 [[package]]
 name = "exitcode"
 version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
 [[package]]
 name = "fif"
-version = "0.2.1"
+version = "0.2.3"
 dependencies = [
 "cached",
 "clap",
 "env_logger",
 "exitcode",
 "infer",
 "log",
 "mime_guess",
@ -185,6 +193,19 @@ dependencies = [
 "xdg-mime",
 ]
 [[package]]
 name = "generator"
 version = "0.6.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8cdc09201b2e8ca1b19290cf7e65de2246b8e91fb6874279722189c4de7b94dc"
 dependencies = [
 "cc",
 "libc",
 "log",
 "rustc_version",
 "winapi",
 ]
 [[package]]
 name = "getrandom"
 version = "0.2.2"
@ -276,6 +297,17 @@ dependencies = [
 "cfg-if",
 ]
 [[package]]
 name = "loom"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d44c73b4636e497b4917eb21c33539efa3816741a2d3ff26c6316f1b529481a4"
 dependencies = [
 "cfg-if",
 "generator",
 "scoped-tls",
 ]
 [[package]]
 name = "memchr"
 version = "2.3.4"
@ -426,6 +458,15 @@ dependencies = [
 "redox_syscall",
 ]
 [[package]]
 name = "rustc_version"
 version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
 dependencies = [
 "semver",
 ]
 [[package]]
 name = "ryu"
 version = "1.0.5"
@ -441,12 +482,33 @@ dependencies = [
 "winapi-util",
 ]
 [[package]]
 name = "scoped-tls"
 version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
 [[package]]
 name = "scopeguard"
 version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
 [[package]]
 name = "semver"
 version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
 dependencies = [
 "semver-parser",
 ]
 [[package]]
 name = "semver-parser"
 version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 [[package]]
 name = "smartstring"
 version = "0.2.6"
@ -514,18 +576,18 @@ dependencies = [
 [[package]]
 name = "thiserror"
-version = "1.0.23"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
+checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e"
 dependencies = [
 "thiserror-impl",
 ]
 [[package]]
 name = "thiserror-impl"
-version = "1.0.23"
+version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
+checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0"
 dependencies = [
 "proc-macro2",
 "quote",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,11 +1,16 @@
 [package]
 name = "fif"
 description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
-version = "0.2.1"
+version = "0.2.3"
 authors = ["Lynnesbian <lynne@bune.city>"]
 edition = "2018"
 license = "GPL-3.0-or-later"
 rust-version = "1.43.0" # cached requires 1.42.0
 repository = "https://git.bune.city/lynnesbian/fif"
 readme = "README.md"
 keywords = ["mime", "mimetype", "utilities", "tools"]
 categories = ["command-line-utilities"]
 exclude = [".idea/", "Cross.toml", "*.sh"]
 #resolver = "2"
 #license-file = "LICENSE"
@ -24,7 +29,10 @@ snailquote = "0.3.0"
 once_cell = "1.5.2"
 rayon = { version = "1.5.0", optional = true }
 infer = { version = "0.3.4", optional = true }
 exitcode = "1.1.2"
 # use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
 [target.'cfg(not(target_os = "windows"))'.dependencies]
 xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd", optional = true }
 [dependencies.clap]
--- a/Cross.toml
+++ b/Cross.toml
@ -0,0 +1,2 @@
 [build.env]
 passthrough = ["RUST_BACKTRACE", "RUST_LOG"]
--- a/README.md
+++ b/README.md
@ -0,0 +1,33 @@
 fif
 ===
 A command-line tool for detecting and optionally correcting files with incorrect extensions.
 ## Installation
 ```bash
 cargo install --locked fif
 ```
 ## Usage
 See `fif --help` for more.
 ### The basics
 The simplest way to use fif looks like this:
 ```bash
 fif -E images ~/Pictures
 ```
 This command will scan all of the files with extensions used by image files (.jpg, .png, etc) in your `~/Pictures`
 directory.
 You can also manually specify a set of extensions to use:
 ```bash
 fif -e jpeg,jpg,zip,docx ~/Documents
 ```
 By default, fif will output a bash script that can be used to fix all the files it found with incorrect file extensions.
 You might find it useful to output this script to a file (rather than to stdout):
 ```bash
 fif -E images ~/Pictures > output.sh
 ```
--- a/clippy.sh
+++ b/clippy.sh
@ -0,0 +1,17 @@
 #!/bin/bash
 fd -e rs -x touch {}
 cargo clippy -- \
 -W clippy::nursery \
 -W clippy::perf \
 -W clippy::pedantic \
 -W clippy::complexity \
 -W clippy::cargo \
 -A clippy::unused_io_amount \
 -A clippy::redundant_closure_for_method_calls \
 -A clippy::shadow_unrelated \
 # ALLOWS:
 # unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that
 # redundant_closure...: the alternative is often much more verbose
 # shadow_unrelated: sometimes things that seem unrelated are actually related ;)
--- a/src/extensionset.rs
+++ b/src/extensionset.rs
@ -7,18 +7,22 @@ pub enum ExtensionSet {
 	Videos,
 	Media,
 	Documents,
-	Archives
+	Archives,
 }
 impl ExtensionSet {
 	pub fn extensions(&self) -> Vec<&str> {
 		match self {
-			Self::Images => vec!["png", "jpg", "jpeg", "webp", "raw", "gif", "apng", "tga", "bmp", "tif", "tiff", "heif",
+			Self::Images => mime_guess::get_mime_extensions_str("image/*"),
-				"avif", "jp2", "mng", "svg"],
+			Self::Videos => mime_guess::get_mime_extensions_str("video/*"),
-			Self::Videos => vec!["webm", "mp4", "mkv", "mov", "avi", "m4v", "wmv", "bik", "ogv", "qt", "3gp", "3g2", "divx"],
+			Self::Audio => mime_guess::get_mime_extensions_str("audio/*"),
-			Self::Audio => vec!["ogg", "oga", "opus", "mp3", "m4a", "aac", "flac", "ape", "midi", "mid", "alac", "wav",
+			Self::Documents => Some(&[
-				"aiff", "aa3", "at3"],
+				"doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "pdf", "odt", "ods", "odp",
-			_ => todo!()
+			][..]),
-		}
+			// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
 			// somehow to extract extensions for compressed files from mime_guess?
 			Self::Archives => Some(&["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"][..]),
 			_ => todo!(),
 		}.unwrap().to_vec()
 	}
 }
--- a/src/formats.rs
+++ b/src/formats.rs
@ -1,8 +1,6 @@
 use std::io::{self, Write};
 #[cfg(unix)]
 use std::os::unix::ffi::OsStrExt;
 #[cfg(windows)]
 use std::os::windows::ffi::OsStrExt;
 use std::path::PathBuf;
 use snailquote::escape;
@ -14,20 +12,49 @@ const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
 type Entries = [Result<Findings, (ScanError, PathBuf)>];
-fn write_pathbuf<W: Write>(f: &mut W, path: &PathBuf) -> io::Result<()> {
+enum Writable<'a> {
-	match path.to_str() {
+	String(&'a str),
-		Some(string) => {
+	Path(&'a PathBuf),
-			write!(f, "{}", escape(string))
+	Space,
 	Newline,
 }
 // the lifetime of a lifetime
 impl<'a> From<&'a str> for Writable<'a> {
 	fn from(s: &'a str) -> Writable<'a> {
 		Writable::String(s)
 	}
-		None => {
+}
-			write!(f, "'")?;
+
 impl<'a> From<&'a PathBuf> for Writable<'a> {
 	fn from(p: &'a PathBuf) -> Writable<'a> {
 		Writable::Path(p)
 	}
 }
 fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
 	// ehhhh
 	for writeable in writeables {
 		match writeable {
 			Writable::Space => write!(f, " ")?,
 			Writable::Newline => writeln!(f, )?,
 			Writable::String(s) => write!(f, "{}", s)?,
 			Writable::Path(path) => {
 				if let Some(string) = path.to_str() {
 					write!(f, "{}", escape(string))?
 				} else {
 					write!(f, "'''")?;
 					#[cfg(unix)]
 						f.write_all(&*path.as_os_str().as_bytes())?;
 					#[cfg(windows)]
-			f.write_all(&*path.as_os_str().encode_wide().collect())?; // TODO: TEST THIS
+					write!(f, "{}", path.as_os_str().to_string_lossy())?; // TODO: implement bonked strings for windows
-			write!(f, "'")
+					// f.write_all(&*path.as_os_str().encode_wide().collect::<Vec<u16>>())?;
 					write!(f, "'''")?
 				}
 			}
 		}
 	}
 	Ok(())
 }
 pub trait Format {
@ -40,22 +67,19 @@ pub trait Format {
 	fn footer<W: Write>(&self, entries: &Entries, f: &mut W) -> io::Result<()>;
 	fn write_all<W: Write>(&self, entries: &Entries, f: &mut W) -> io::Result<()> {
-		// TODO: clean this up - it's horrifying
+		// TODO: clean this up - it's kinda messy
 		self.header(entries, f)?;
 		for entry in entries {
 			match entry {
 				Ok(finding) => {
 					// the file was successfully scanned, and a mimetype was detected
 					if !finding.valid {
 						// the file's extension is wrong - check for known extension
 					if let Some(ext) = finding.recommended_extension() {
 						self.rename(f, &finding.file, &finding.file.with_extension(ext.as_str()))?
 					} else {
 						self.no_known_extension(f, &finding.file)?
 					}
 				}
-				}
+
 				Err(error) => {
 					// something went wrong 0uo
 					match error.0 {
@ -81,30 +105,37 @@ impl Format for Script {
 	}
 	fn rename<W: Write>(&self, f: &mut W, from: &PathBuf, to: &PathBuf) -> io::Result<()> {
-		// TODO: surely there's a better way...
+		smart_write(f, &[
-		write!(f, "mv -v -i -- ")?;
+			"mv -v -i -- ".into(),
-		write_pathbuf(f, from)?;
+			from.into(),
-		write!(f, " ")?;
+			Writable::Space,
-		write_pathbuf(f, to)?;
+			to.into(),
-		writeln!(f,)
+			Writable::Newline
 		])
 	}
 	fn no_known_extension<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
-		write!(f, "printf No known extension for ")?;
+		smart_write(f, &[
-		write_pathbuf(f, path)?;
+			"echo No known extension for ".into(),
-		writeln!(f,"\nprintf '\n'")
+			path.into(),
 			Writable::Newline
 		])
 	}
 	fn unreadable<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
-		write!(f, "# Failed to read ")?;
+		smart_write(f, &[
-		write_pathbuf(f, path)?;
+			"# Failed to read ".into(),
-		writeln!(f,)
+			path.into(),
 			Writable::Newline
 		])
 	}
 	fn unknown_type<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
-		write!(f, "# Failed to detect mime type for ")?;
+		smart_write(f, &[
-		write_pathbuf(f, path)?;
+			"# Failed to detect mime type for ".into(),
-		writeln!(f,)
+			path.into(),
 			Writable::Newline
 		])
 	}
 	fn header<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> {
@ -116,6 +147,6 @@ impl Format for Script {
 	}
 	fn footer<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> {
-		writeln!(f, "\nprintf 'Done.\\n'")
+		writeln!(f, "\necho 'Done.'")
 	}
 }
--- a/src/inspectors.rs
+++ b/src/inspectors.rs
@ -17,14 +17,19 @@ use crate::mimedb::MimeDb;
 // unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix
 // world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway,
 // so maybe it's fine...? maybe this should be configurable by the user? i don't know.
-// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that mime_type requires
+// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that xdg-mime requires
 // at least 265 bytes to identify a tar file.
 // additionally, since many formats can by identified with ≤64 bytes, it's worth reading 64 bytes, checking for the mime
 // type, and then reading the full 512 bytes if necessary. in most cases, this will end up being faster on the whole,
 // even though two reads are needed for certain formats, unless the directory being scanned is predominantly made up of
 // such formats.
 const INITIAL_BUF_SIZE: usize = 64;
 const BUF_SIZE: usize = 512;
 pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
-	// attempt to read up to the BUF_SIZE bytes of the file
+	let mut buffer = [0; INITIAL_BUF_SIZE];
 	let mut buffer = [0; 64];
 	let mut file = File::open(path)?;
 	// this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the
@ -33,14 +38,21 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
 	file.read(&mut buffer)?;
 	let r = db.get_type(&buffer);
 	if r.is_some() {
 		return Ok(r);
 	}
 	// attempt to read up to the BUF_SIZE bytes of the file.
 	// we've already read the first 64 bytes into a buffer, but i can't see an obvious way to reuse those 64 bytes that's
 	// faster than simply moving the seek position back to the start of the file and re-reading the whole 512 bytes.
 	// for example, starting with a buffer of 64 bytes, then creating a new 512 byte buffer from the contents of the first
 	// buffer with (512 - 64) blank bytes, then finally reading the rest, is much slower than simply reading the file
 	// twice. i don't at all doubt that there IS a way to do this efficiently, and i can think of a way in principle, but
 	// i'm not sure how to express it in a way that is both idiomatic/safe and fast.
 	let mut buffer = [0; BUF_SIZE];
 	file.seek(SeekFrom::Start(0))?;
 	file.read(&mut buffer)?;
 	// warn!("dang");
 	Ok(db.get_type(&buffer))
 }
@ -49,13 +61,34 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
 cached! {
 	MIMEXT;
 	fn mime_extension_lookup(mime: Mime) -> Option<Vec<String>> = {
-		if mime == mime_guess::mime::IMAGE_JPEG {
+
-			// jpeg files are given the primary extension "jpe", due to the extension list being stored in alphabetical order.
+		// match on the mime's `essence_str` rather than the mime itself - mime_guess::get_mime_extensions ignores the type
-			// to handle this particular case, return a custom vector consisting of just "jpg" and "jpeg".
+		// suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. passing the essence_str
-			return Some(vec![String::from("jpg"), String::from("jpeg")]);
+		// (which includes the suffix) fixes this.
-		}
+		match mime_guess::get_mime_extensions_str(mime.essence_str()) {
-		match mime_guess::get_mime_extensions(&mime) { // get a list of possible extensions for this mime type
+			Some(exts) => {
-			Some(exts) => Some(exts.iter().map(|e| String::from(*e)).collect()),
+				let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
 				Some(if mime == mime_guess::mime::IMAGE_JPEG {
 					// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
 					// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
 					// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
 					[vec![String::from("jpg")], possible_exts].concat()
 				} else if mime == mime_guess::mime::TEXT_XML {
 					// a somewhat similar case arises with XML files - the first suggested extension is "addin", when it should
 					// (in my opinion) be "xml".
 					// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
 					// *are* valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read
 					// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"
 					// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered
 					// to have valid extensions.
 					[vec![String::from("xml"), String::from("svg")], possible_exts].concat()
 				} else {
 					possible_exts
 				})
 			},
 			None => None
 		}
 	}
--- a/src/main.rs
+++ b/src/main.rs
@ -18,7 +18,7 @@ use std::io::{stdout, BufWriter};
 use std::path::{Path, PathBuf};
 use clap::Clap;
-use log::{debug, info, trace, warn};
+use log::{debug, error, info, trace, warn};
 use once_cell::sync::OnceCell;
 #[cfg(feature = "multi-threaded")]
 use rayon::prelude::*;
@ -30,14 +30,15 @@ use crate::formats::{Format, Script};
 use crate::mimedb::MimeDb;
 use crate::parameters::OutputFormat;
 use crate::scanerror::ScanError;
 use std::process::exit;
 mod extensionset;
 mod findings;
 mod formats;
 mod inspectors;
 mod mimedb;
 mod parameters;
 mod scanerror;
 mod extensionset;
 #[cfg(feature = "infer-backend")]
 static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new();
@ -49,7 +50,7 @@ static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new();
 #[cfg(windows)]
 fn is_hidden(entry: &DirEntry) -> bool {
 	use std::os::windows::prelude::*;
-	std::fs::metadata(entry) // try to get metadata for file
+	std::fs::metadata(entry.path()) // try to get metadata for file
 		.map_or(
 			false,                             // if getting metadata/attributes fails, assume it's not hidden
 			|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
@ -116,20 +117,18 @@ fn scan_file(entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> {
 	let valid = match known_exts {
 		// there is a known set of extensions for this mimetype, and the file has an extension
 		Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_lowercase().into()),
-		// there is a known set of extensions for this mimetype, but the file has no extension
+		// either this file has no extension, or there is no known set of extensions for this mimetype :(
-		Some(_) => false,
+		Some(_) | None => false,
 		// there is no known set of extensions for this mimetype -- assume it's correct
 		None => true,
 	};
 	Ok(Findings {
 		file: entry.path().to_path_buf(),
-		valid, // make this a function
+		valid,
 		mime: result,
 	})
 }
-fn scan_from_walkdir(entries: Vec<DirEntry>) -> Vec<Result<Findings, (ScanError, PathBuf)>> {
+fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, PathBuf)>> {
 	#[cfg(feature = "multi-threaded")]
 	{
 		// rather than using a standard par_iter, split the entries into chunks of 32 first.
@ -154,6 +153,7 @@ fn scan_from_walkdir(entries: Vec<DirEntry>) -> Vec<Result<Findings, (ScanError,
 fn main() {
 	let args = parameters::Parameters::parse();
 	let mut builder = env_logger::Builder::from_default_env();
 	builder
 		// .format(|buf, r| writeln!(buf, "{} - {}", r.level(), r.args()))
@ -165,22 +165,19 @@ fn main() {
 	#[cfg(feature = "infer-backend")]
 	MIMEDB
 		.set(mimedb::InferDb::init())
-		.or(Err("Failed to initialise MIMEDB"))
+		.or(Err("Failed to initialise Infer backend!"))
 		.unwrap();
 	#[cfg(feature = "xdg-mime-backend")]
 	MIMEDB
 		.set(mimedb::XdgDb::init())
-		.or(Err("Failed to initialise MIMEDB"))
+		.or(Err("Failed to initialise XDG Mime backend!"))
 		.unwrap();
 	debug!("Iterating directory: {:?}", args.dirs);
 	let extensions: Vec<&str> = if let Some(exts) = &args.exts {
-		exts
+		exts.iter().map(|s| s.as_str()).collect()
 			.iter()
 			.map(|s| s.as_str())
 			.collect()
 	} else if let Some(exts) = &args.ext_set {
 		exts.extensions().to_vec()
 	} else {
@ -190,38 +187,81 @@ fn main() {
 	debug!("Checking files with extensions: {:?}", extensions);
 	let stepper = WalkDir::new(&args.dirs).into_iter();
 	let mut probably_fatal_error = false;
 	let entries: Vec<DirEntry> = stepper
 		.filter_entry(|e| wanted_file(&args, &extensions, e)) // filter out unwanted files
-		.filter_map(|e| e.ok()) // ignore anything that fails, e.g. files we don't have read access on
+		.filter_map(|e| {
 			if let Err(err) = &e {
 				debug!("uh oh spaghettio!! {:#?}", e);
 				// log errors to stdout, and remove them from the iterator
 				let path = err.path().map_or("General error".into(), Path::to_string_lossy);
 				if err.depth() == 0 {
 					// if something goes wrong while trying to read the root directory, we're probably not going to get much done
 					probably_fatal_error = true;
 				}
 				// TODO: is there a way to just say `map_or(x, |y| y).thing()` instead of `map_or(x.thing(), |y| y.thing())`?
 				// i don't care whether i'm returning a walkdir error or an io error, i just care about whether or not it
 				// implements ToString (which they both do). map_or doesn't work on trait objects though :(
 				error!(
 					"{}: {}",
 					path,
 					err.io_error().map_or(err.to_string(), |e| e.to_string())
 				);
 				return None;
 			}
 			e.ok()
 		})
 		.filter(|e| !e.file_type().is_dir()) // remove directories from the final list
 		.collect();
 	if entries.is_empty() {
 		if probably_fatal_error {
 			// no need to log anything for fatal errors - fif will already have printed something obvious like
 			// "[ERROR] /fake/path: No such file or directory (os error 2)". we can assume that if this has happened, the dir
 			// given as input doesn't exist or is otherwise unreadable.
 			exit(exitcode::NOINPUT);
 		}
 		warn!("No files matching requested options found.");
 		exit(exitcode::DATAERR);
 	}
 	trace!("Found {} items to check", entries.len());
-	let results = scan_from_walkdir(entries);
+	let results: Vec<_> = scan_from_walkdir(&entries)
 		.into_iter()
 		.filter(
 			|result| result.is_err() || !result.as_ref().unwrap().valid,
 			// TODO: find a way to trace! the valid files without doing ↓
 			// || if result.as_ref().unwrap().valid { trace!("{:?} is fine", result.as_ref().unwrap().file); false } else { true }
 		)
 		.collect();
 	for result in &results {
 		match result {
 			Ok(r) => {
 				if !r.valid {
 				info!(
 					"{:?} should have file extension {}",
 					r.file,
-						r.recommended_extension().unwrap()
+					r.recommended_extension().unwrap_or_else(|| "???".into())
 				)
 				} else {
 					trace!("{:?} is totally fine", r.file)
 				}
 			}
 			Err(f) => warn!("{:#?}: Error 0uo - {}", f.1, f.0),
 		}
 	}
 	if results.is_empty() {
 		info!("All files have valid extensions!")
 	}
 	match args.output_format {
 		OutputFormat::Script => {
 			let s = Script::new();
-			s.write_all(&results, &mut BufWriter::new(stdout().lock()))
+			if s.write_all(&results, &mut BufWriter::new(stdout().lock())).is_err() {
-				.expect("failed to output");
+				exit(exitcode::IOERR);
 			}
 		}
 		OutputFormat::Text => todo!(),
 	}
--- a/src/mimedb.rs
+++ b/src/mimedb.rs
@ -17,25 +17,39 @@ pub struct InferDb {
 impl MimeDb for InferDb {
 	fn init() -> Self {
 		let mut info = infer::Infer::new();
-		// add a random file type just to make sure adding works and such
+
 		// jpeg2000 support because why the stinch not
 		info.add("image/jpeg2000", ".jp2", |buf| {
-			buf.len() > 23
+			buf.len() > 23 && buf[..23] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A\x6A\x70\x32\x20"[..]
-				&& buf[0] == 0x00
+		});
-				&& buf[1] == 0x00
+
-				&& buf[2] == 0x00
+		info.add("image/svg+xml", "svg", |buf| {
-				&& buf[3] == 0x0C
+			// before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish
-				&& buf[4] == 0x6A
+			// by "SGML-ish", i mean starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign,
-				&& buf[5] == 0x50
+			// and then there's some other stuff we don't care about right now
-				&& buf[6] == 0x20
+
-				&& buf[7] == 0x20
+			// so, here comes our fancy pants """""SGML-ish validator"""""
-				&& buf[8] == 0x0D
+			for c in buf {
-				&& buf[9] == 0x0A
+				match c {
-				&& buf[10] == 0x87
+					// whitespace (according to https://www.w3.org/TR/xml/#NT-S)
-				&& buf[11] == 0x0A
+					b'\t' | b'\r' | b'\n' | b'\x20' => continue,
-				&& buf[20] == 0x6A
+					b'<' => break,
-				&& buf[21] == 0x70
+					_ => return false,
-				&& buf[22] == 0x32
+				}
-				&& buf[23] == 0x20
+			}
 			// finally, to check whether or not the file is an SVG:
 			// - split the buffer up into chunks separated by the less than sign
 			// - check to see if this chunk starts with any of these identifiers:
 			let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"];
 			// - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to do
 			//   the same
 			// - and finally, if none of the chunks match, we'll return false
 			// TODO: this is kind of messy, i'd like to clean it up somehow :(
 			buf
 				.split(|c| *c == b'<')
 				.any(|buf| identifiers.iter().any(|id| buf.starts_with(id)))
 		});
 		// unmut
--- a/src/parameters.rs
+++ b/src/parameters.rs
@ -1,8 +1,8 @@
 use std::path::PathBuf;
 use crate::extensionset::ExtensionSet;
 use clap::Clap;
 use smartstring::{LazyCompact, SmartString};
 use crate::extensionset::ExtensionSet;
 #[derive(Clap, PartialEq, Debug)]
 pub enum OutputFormat {
@ -13,7 +13,13 @@ pub enum OutputFormat {
 #[derive(Clap, Debug)]
 pub struct Parameters {
 	/// Only examine files with these extensions (Comma-separated list)
-	#[clap(short, long, use_delimiter = true, require_delimiter = true, required_unless_present = "ext-set")]
+	#[clap(
 		short,
 		long,
 		use_delimiter = true,
 		require_delimiter = true,
 		required_unless_present = "ext-set"
 	)]
 	pub exts: Option<Vec<SmartString<LazyCompact>>>,
 	/// write good docs 0uo
--- a/src/scanerror.rs
+++ b/src/scanerror.rs
@ -1,5 +1,6 @@
 use std::fmt::{Display, Formatter, Result};
 #[derive(Debug)]
 pub enum ScanError {
 	File,
 	Mime,
Author	SHA1	Message	Date
Lynne	19038d3493	slightly better readme	2021-02-22 02:08:08 +10:00
Lynne	c9209c2dbc	minor tweaks for what should/shouldn't be included in git/crate	2021-02-22 02:02:27 +10:00
Lynne	aedfd6d57f	retrieve extensions from mime_guess, minor code changes, added a readme and such	2021-02-22 01:55:27 +10:00
Lynne	b95c8ec85c	cleaned up formats.rs	2021-02-22 00:46:51 +10:00
Lynne	729576b803	thanks cargo update	2021-02-22 00:20:17 +10:00
Lynne	9091850ec5	thanks rustfmt	2021-02-22 00:15:09 +10:00
Lynne	7a2f009622	thanks clippy	2021-02-22 00:09:53 +10:00
Lynne	986717a6ae	nicer error handling, exit codes	2021-02-22 00:07:50 +10:00
Lynne	426e09fb05	SVG support, better and more comments, minor code cleanup	2021-02-21 21:30:58 +10:00
Lynne	21fb26e3da	removed invalid cargo specifier	2021-02-21 16:59:25 +10:00
Lynne	9e51525d27	hide xdg-mime-backend for windows users, cargo update	2021-02-20 04:02:54 +10:00
Lynne	7fdc75c9e9	windows support! and other stuff	2021-02-20 03:57:36 +10:00
		`@ -0,0 +1,2 @@`
							`[build.env]`
							`passthrough = ["RUST_BACKTRACE", "RUST_LOG"]`
editor.table_modal.label.rows
editor.table_modal.label.columns