added comments and mach-o binary support to infer
This commit is contained in:
parent
a0396e2e1e
commit
330b273be6
5 changed files with 40 additions and 8 deletions
|
@ -4,6 +4,10 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is
|
|||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
|
||||
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
### Added
|
||||
- When using the [`infer`] backend, fif is now able to detect [Mach-O](https://en.wikipedia.org/wiki/Mach-O) binaries
|
||||
|
||||
## v0.5.0 - 2022-01-01
|
||||
### Changed
|
||||
- The Minimum Supported Rust Version (MSRV) is now **1.54.0**.
|
||||
|
|
8
Cargo.lock
generated
8
Cargo.lock
generated
|
@ -80,9 +80,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
|||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "3.0.7"
|
||||
version = "3.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12e8611f9ae4e068fa3e56931fded356ff745e70987ff76924a6e0ab1c8ef2e3"
|
||||
checksum = "8c506244a13c87262f84bf16369740d0b7c3850901b6a642aa41b031a710c473"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bitflags",
|
||||
|
@ -754,9 +754,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.8"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba1eead9e94aa5a2e02de9e7839f96a007f686ae7a1d57c7797774810d24908a"
|
||||
checksum = "a86232ab60fa71287d7f2ddae4a7073f6b7aac33631c3015abb556f08c6d0a3e"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
|
|
|
@ -42,28 +42,56 @@ cfg_if! {
|
|||
fn init() -> Self {
|
||||
let mut info = infer::Infer::new();
|
||||
|
||||
// In addition to the file inferences provided by Infer, I've also added a few of my own below. Some of them
|
||||
// replace Infer's existing ones, some of them are less than perfect, and still others are for relatively
|
||||
// obscure formats, so I'm not really sure whether or not they should be contributed upstream.
|
||||
|
||||
// OpenDocument Text (used by e.g. LibreOffice Writer)
|
||||
info.add("application/vnd.oasis.opendocument.text", "odt", |buf| {
|
||||
open_document_check(buf, "text")
|
||||
});
|
||||
|
||||
// OpenDocument Spreadsheet (LibreOffice Calc)
|
||||
info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| {
|
||||
open_document_check(buf, "spreadsheet")
|
||||
});
|
||||
|
||||
// OpenOffice Presentation (LibreOffice Impress)
|
||||
info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| {
|
||||
open_document_check(buf, "presentation")
|
||||
});
|
||||
|
||||
// Ren'Py Archive (Ren'Py: https://www.renpy.org/)
|
||||
info.add("application/x-rpa", "rpa", |buf| {
|
||||
buf.len() >= 34 && buf.starts_with(b"RPA-") && buf[7] == b' ' && buf[24] ==b' '
|
||||
});
|
||||
|
||||
// Mach-O Binaries (The executable format used by macOS)
|
||||
// my source for most of this info is this article: https://h3adsh0tzz.com/2020/01/macho-file-format/
|
||||
// like linux's ELF binaries, mach-o binaries do not typically have an extension, but if they did, it'd
|
||||
// probably be something like ".macho", so, that'll do i guess. fif doesn't actually use the extensions
|
||||
// specified here anyway.
|
||||
info.add("application/x-mach-binary", "macho", |buf| {
|
||||
// a 32-bit mach-o header occupies 28 bits of space, so any input smaller than that cannot be a mach-o
|
||||
// binary, even if it starts with the magic numbers.
|
||||
// the three magic numbers that can appear are 0xFEEDFACF, 0xFEEDFACE, and 0xCAFEBABE. the code below
|
||||
// checks for all three of these, in both big and little endian order.
|
||||
|
||||
// java class files also start with 0xCAFEBABE. since infer doesn't support detecting these files,
|
||||
// collisions are not an issue. if, however, infer does gain support for identifying java class files, the
|
||||
// 0xCAFEBABE check should be removed, as java bytecode files are far more prevalent than 32-bit universal
|
||||
// mach-o binaries [citation needed].
|
||||
buf.len() >= 28 && [b"\xFE\xED\xFA\xCF", b"\xFE\xED\xFA\xCE", b"\xCA\xFE\xBA\xBE", b"\xCF\xFA\xED\xFE",
|
||||
b"\xCE\xFA\xED\xFE", b"\xBE\xBA\xFE\xCA"].iter().any(|magic_numbers| buf.starts_with(&magic_numbers[..]))
|
||||
});
|
||||
|
||||
// info.add("application/x-msi", "msi", |buf| {
|
||||
// TODO: find a way to detect MSI files properly - this just detects those weird windows OLE files and therefore
|
||||
// also picks up on .doc files
|
||||
// buf.starts_with(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1")
|
||||
// });
|
||||
|
||||
// Scalable Vector Graphics
|
||||
info.add("image/svg+xml", "svg", |buf| {
|
||||
// before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish,
|
||||
// by which i mean, starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign,
|
||||
|
@ -83,8 +111,8 @@ cfg_if! {
|
|||
// - split the buffer up into chunks separated by the less than sign
|
||||
// - check to see if this chunk starts with any of these identifiers:
|
||||
let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"];
|
||||
// - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to do
|
||||
// the same
|
||||
// - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to
|
||||
// do the same
|
||||
// - and finally, if none of the chunks match, we'll return false
|
||||
|
||||
// TODO: this is kind of messy, i'd like to clean it up somehow :(
|
||||
|
|
|
@ -7,7 +7,7 @@ use std::collections::BTreeSet;
|
|||
use std::path::PathBuf;
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use clap::{Parser, ArgEnum};
|
||||
use clap::{ArgEnum, Parser};
|
||||
|
||||
use crate::utils::{CLAP_LONG_VERSION, CLAP_VERSION};
|
||||
use crate::String as StringType;
|
||||
|
|
|
@ -351,6 +351,7 @@ fn accepts_good_args() {
|
|||
/// Ensures that output from the `-V` and `--version` flags is formatted properly.
|
||||
fn check_version_output() {
|
||||
use std::string::String;
|
||||
|
||||
use assert_cmd::Command;
|
||||
use regex::Regex;
|
||||
|
||||
|
@ -364,7 +365,6 @@ fn check_version_output() {
|
|||
output
|
||||
);
|
||||
|
||||
|
||||
// test `--version` matches the format of "fif x.y.z (OS, example backend, commit #1234abc)"
|
||||
let mut cmd = Command::cargo_bin("fif").unwrap();
|
||||
let output = cmd.arg("--version").ok().unwrap().stdout;
|
||||
|
|
Loading…
Reference in a new issue