diff --git a/CHANGELOG.md b/CHANGELOG.md index ae54e49..ef3ac05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased +### Added +- When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries + ## v0.5.0 - 2022-01-01 ### Changed - The Minimum Supported Rust Version (MSRV) is now **1.54.0**. diff --git a/Cargo.lock b/Cargo.lock index f8ab360..6ddf7c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,9 +80,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.0.7" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12e8611f9ae4e068fa3e56931fded356ff745e70987ff76924a6e0ab1c8ef2e3" +checksum = "8c506244a13c87262f84bf16369740d0b7c3850901b6a642aa41b031a710c473" dependencies = [ "atty", "bitflags", @@ -754,9 +754,9 @@ dependencies = [ [[package]] name = "siphasher" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba1eead9e94aa5a2e02de9e7839f96a007f686ae7a1d57c7797774810d24908a" +checksum = "a86232ab60fa71287d7f2ddae4a7073f6b7aac33631c3015abb556f08c6d0a3e" [[package]] name = "smallvec" diff --git a/src/mime_db.rs b/src/mime_db.rs index 39e68d4..54a05e7 100644 --- a/src/mime_db.rs +++ b/src/mime_db.rs @@ -42,28 +42,56 @@ cfg_if! { fn init() -> Self { let mut info = infer::Infer::new(); + // In addition to the file inferences provided by Infer, I've also added a few of my own below. Some of them + // replace Infer's existing ones, some of them are less than perfect, and still others are for relatively + // obscure formats, so I'm not really sure whether or not they should be contributed upstream. + + // OpenDocument Text (used by e.g. LibreOffice Writer) info.add("application/vnd.oasis.opendocument.text", "odt", |buf| { open_document_check(buf, "text") }); + // OpenDocument Spreadsheet (LibreOffice Calc) info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| { open_document_check(buf, "spreadsheet") }); + // OpenOffice Presentation (LibreOffice Impress) info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| { open_document_check(buf, "presentation") }); + // Ren'Py Archive (Ren'Py: https://www.renpy.org/) info.add("application/x-rpa", "rpa", |buf| { buf.len() >= 34 && buf.starts_with(b"RPA-") && buf[7] == b' ' && buf[24] ==b' ' }); + // Mach-O Binaries (The executable format used by macOS) + // my source for most of this info is this article: https://h3adsh0tzz.com/2020/01/macho-file-format/ + // like linux's ELF binaries, mach-o binaries do not typically have an extension, but if they did, it'd + // probably be something like ".macho", so, that'll do i guess. fif doesn't actually use the extensions + // specified here anyway. + info.add("application/x-mach-binary", "macho", |buf| { + // a 32-bit mach-o header occupies 28 bits of space, so any input smaller than that cannot be a mach-o + // binary, even if it starts with the magic numbers. + // the three magic numbers that can appear are 0xFEEDFACF, 0xFEEDFACE, and 0xCAFEBABE. the code below + // checks for all three of these, in both big and little endian order. + + // java class files also start with 0xCAFEBABE. since infer doesn't support detecting these files, + // collisions are not an issue. if, however, infer does gain support for identifying java class files, the + // 0xCAFEBABE check should be removed, as java bytecode files are far more prevalent than 32-bit universal + // mach-o binaries [citation needed]. + buf.len() >= 28 && [b"\xFE\xED\xFA\xCF", b"\xFE\xED\xFA\xCE", b"\xCA\xFE\xBA\xBE", b"\xCF\xFA\xED\xFE", + b"\xCE\xFA\xED\xFE", b"\xBE\xBA\xFE\xCA"].iter().any(|magic_numbers| buf.starts_with(&magic_numbers[..])) + }); + // info.add("application/x-msi", "msi", |buf| { // TODO: find a way to detect MSI files properly - this just detects those weird windows OLE files and therefore // also picks up on .doc files // buf.starts_with(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") // }); + // Scalable Vector Graphics info.add("image/svg+xml", "svg", |buf| { // before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish, // by which i mean, starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign, @@ -83,8 +111,8 @@ cfg_if! { // - split the buffer up into chunks separated by the less than sign // - check to see if this chunk starts with any of these identifiers: let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"]; - // - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to do - // the same + // - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to + // do the same // - and finally, if none of the chunks match, we'll return false // TODO: this is kind of messy, i'd like to clean it up somehow :( diff --git a/src/parameters.rs b/src/parameters.rs index 40626d3..f4dea28 100644 --- a/src/parameters.rs +++ b/src/parameters.rs @@ -7,7 +7,7 @@ use std::collections::BTreeSet; use std::path::PathBuf; use cfg_if::cfg_if; -use clap::{Parser, ArgEnum}; +use clap::{ArgEnum, Parser}; use crate::utils::{CLAP_LONG_VERSION, CLAP_VERSION}; use crate::String as StringType; diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 82b2f24..6743f61 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -351,6 +351,7 @@ fn accepts_good_args() { /// Ensures that output from the `-V` and `--version` flags is formatted properly. fn check_version_output() { use std::string::String; + use assert_cmd::Command; use regex::Regex; @@ -364,7 +365,6 @@ fn check_version_output() { output ); - // test `--version` matches the format of "fif x.y.z (OS, example backend, commit #1234abc)" let mut cmd = Command::cargo_bin("fif").unwrap(); let output = cmd.arg("--version").ok().unwrap().stdout;