Compare commits

..

6 commits

9 changed files with 215 additions and 255 deletions

1
.gitignore vendored
View file

@ -5,3 +5,4 @@ fif_*
*.sh *.sh
!clippy.sh !clippy.sh
cargo-timing*.html cargo-timing*.html
todo.txt

77
Cargo.lock generated
View file

@ -39,12 +39,6 @@ dependencies = [
"once_cell", "once_cell",
] ]
[[package]]
name = "cc"
version = "1.0.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd"
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
@ -106,28 +100,26 @@ dependencies = [
[[package]] [[package]]
name = "crossbeam-epoch" name = "crossbeam-epoch"
version = "0.9.2" version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d60ab4a8dba064f2fbb5aa270c28da5cf4bbd0e72dae1140a6b0353a779dbe00" checksum = "2584f639eb95fea8c798496315b297cf81b9b58b6d30ab066a75455333cf4b12"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"crossbeam-utils", "crossbeam-utils",
"lazy_static", "lazy_static",
"loom",
"memoffset", "memoffset",
"scopeguard", "scopeguard",
] ]
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.8.2" version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bae8f328835f8f5a6ceb6a7842a7f2d0c03692adb5c889347235d59194731fe3" checksum = "e7e9d99fa91428effe99c5c6d4634cdeba32b8cf784fc428a2a687f61a952c49"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"cfg-if", "cfg-if",
"lazy_static", "lazy_static",
"loom",
] ]
[[package]] [[package]]
@ -176,9 +168,10 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
[[package]] [[package]]
name = "fif" name = "fif"
version = "0.2.3" version = "0.2.5"
dependencies = [ dependencies = [
"cached", "cached",
"cfg-if",
"clap", "clap",
"env_logger", "env_logger",
"exitcode", "exitcode",
@ -193,19 +186,6 @@ dependencies = [
"xdg-mime", "xdg-mime",
] ]
[[package]]
name = "generator"
version = "0.6.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cdc09201b2e8ca1b19290cf7e65de2246b8e91fb6874279722189c4de7b94dc"
dependencies = [
"cc",
"libc",
"log",
"rustc_version",
"winapi",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.2.2" version = "0.2.2"
@ -297,17 +277,6 @@ dependencies = [
"cfg-if", "cfg-if",
] ]
[[package]]
name = "loom"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d44c73b4636e497b4917eb21c33539efa3816741a2d3ff26c6316f1b529481a4"
dependencies = [
"cfg-if",
"generator",
"scoped-tls",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.3.4" version = "2.3.4"
@ -362,9 +331,9 @@ dependencies = [
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.5.2" version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13bd41f508810a131401606d54ac32a467c97172d74ba7662562ebba5ad07fa0" checksum = "10acf907b94fc1b1a152d08ef97e7759650268cf986bf127f387e602b02c7e5a"
[[package]] [[package]]
name = "os_str_bytes" name = "os_str_bytes"
@ -458,15 +427,6 @@ dependencies = [
"redox_syscall", "redox_syscall",
] ]
[[package]]
name = "rustc_version"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
dependencies = [
"semver",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.5" version = "1.0.5"
@ -482,33 +442,12 @@ dependencies = [
"winapi-util", "winapi-util",
] ]
[[package]]
name = "scoped-tls"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
[[package]] [[package]]
name = "scopeguard" name = "scopeguard"
version = "1.1.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "semver"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
dependencies = [
"semver-parser",
]
[[package]]
name = "semver-parser"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]] [[package]]
name = "smartstring" name = "smartstring"
version = "0.2.6" version = "0.2.6"

View file

@ -1,7 +1,7 @@
[package] [package]
name = "fif" name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.2.3" version = "0.2.5"
authors = ["Lynnesbian <lynne@bune.city>"] authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018" edition = "2018"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
@ -15,10 +15,10 @@ exclude = [".idea/", "Cross.toml", "*.sh"]
#license-file = "LICENSE" #license-file = "LICENSE"
[features] [features]
default = ["multi-threaded", "infer-backend"] default = ["multi-threaded"]
multi-threaded = ["rayon"] multi-threaded = ["rayon"]
infer-backend = ["infer"] infer-backend = []
xdg-mime-backend = ["xdg-mime"] xdg-mime-backend = []
[dependencies] [dependencies]
walkdir = "2.3.1" walkdir = "2.3.1"
@ -27,13 +27,14 @@ smartstring = "0.2.6"
mime_guess = "2.0.3" mime_guess = "2.0.3"
snailquote = "0.3.0" snailquote = "0.3.0"
once_cell = "1.5.2" once_cell = "1.5.2"
infer = { version = "0.3.4" }
rayon = { version = "1.5.0", optional = true } rayon = { version = "1.5.0", optional = true }
infer = { version = "0.3.4", optional = true }
exitcode = "1.1.2" exitcode = "1.1.2"
cfg-if = "1.0.0"
# use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd # use git version while waiting on a release incorporating https://github.com/ebassi/xdg-mime-rs/commit/de5a6dd
[target.'cfg(not(target_os = "windows"))'.dependencies] [target.'cfg(not(target_os = "windows"))'.dependencies]
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd", optional = true } xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3", rev = "de5a6dd" }
[dependencies.clap] [dependencies.clap]
version = "3.0.0-beta.2" version = "3.0.0-beta.2"

View file

@ -16,13 +16,17 @@ impl ExtensionSet {
Self::Images => mime_guess::get_mime_extensions_str("image/*"), Self::Images => mime_guess::get_mime_extensions_str("image/*"),
Self::Videos => mime_guess::get_mime_extensions_str("video/*"), Self::Videos => mime_guess::get_mime_extensions_str("video/*"),
Self::Audio => mime_guess::get_mime_extensions_str("audio/*"), Self::Audio => mime_guess::get_mime_extensions_str("audio/*"),
Self::Documents => Some(&[ Self::Documents => Some(
"doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "pdf", "odt", "ods", "odp", &[
][..]), "pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
][..],
),
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used // many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
// somehow to extract extensions for compressed files from mime_guess? // somehow to extract extensions for compressed files from mime_guess?
Self::Archives => Some(&["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"][..]), Self::Archives => Some(&["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"][..]),
_ => todo!(), _ => todo!(),
}.unwrap().to_vec() }
.unwrap()
.to_vec()
} }
} }

View file

@ -105,37 +105,38 @@ impl Format for Script {
} }
fn rename<W: Write>(&self, f: &mut W, from: &PathBuf, to: &PathBuf) -> io::Result<()> { fn rename<W: Write>(&self, f: &mut W, from: &PathBuf, to: &PathBuf) -> io::Result<()> {
smart_write(f, &[ smart_write(
f,
&[
"mv -v -i -- ".into(), "mv -v -i -- ".into(),
from.into(), from.into(),
Writable::Space, Writable::Space,
to.into(), to.into(),
Writable::Newline Writable::Newline,
]) ],
)
} }
fn no_known_extension<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> { fn no_known_extension<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
smart_write(f, &[ smart_write(
"echo No known extension for ".into(), f,
path.into(), &["echo No known extension for ".into(), path.into(), Writable::Newline],
Writable::Newline )
])
} }
fn unreadable<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> { fn unreadable<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
smart_write(f, &[ smart_write(f, &["# Failed to read ".into(), path.into(), Writable::Newline])
"# Failed to read ".into(),
path.into(),
Writable::Newline
])
} }
fn unknown_type<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> { fn unknown_type<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
smart_write(f, &[ smart_write(
f,
&[
"# Failed to detect mime type for ".into(), "# Failed to detect mime type for ".into(),
path.into(), path.into(),
Writable::Newline Writable::Newline,
]) ],
)
} }
fn header<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> { fn header<W: Write>(&self, _: &Entries, f: &mut W) -> io::Result<()> {

View file

@ -10,24 +10,12 @@ use smartstring::alias::String;
use crate::mimedb::MimeDb; use crate::mimedb::MimeDb;
// use log::{debug, warn}; // rather than reading once into a large buffer, it tends to be faster to first try identifying the file from a small
// chunk read from the top, and *then* proceeding with the large buffer. many file formats can be easily identified by
// the first 128 bytes. of course, not all formats can, and some (OOXML...) require reading a long ways in.
// from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest const INITIAL_BUF_SIZE: usize = 128;
// buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131 const BUF_SIZE: usize = 4096;
// bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes.
// unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix
// world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway,
// so maybe it's fine...? maybe this should be configurable by the user? i don't know.
// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that xdg-mime requires
// at least 265 bytes to identify a tar file.
// additionally, since many formats can by identified with ≤64 bytes, it's worth reading 64 bytes, checking for the mime
// type, and then reading the full 512 bytes if necessary. in most cases, this will end up being faster on the whole,
// even though two reads are needed for certain formats, unless the directory being scanned is predominantly made up of
// such formats.
const INITIAL_BUF_SIZE: usize = 64;
const BUF_SIZE: usize = 512;
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> { pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let mut buffer = [0; INITIAL_BUF_SIZE]; let mut buffer = [0; INITIAL_BUF_SIZE];
@ -38,19 +26,27 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
#[allow(clippy::unused_io_amount)] #[allow(clippy::unused_io_amount)]
file.read(&mut buffer)?; file.read(&mut buffer)?;
let r = db.get_type(&buffer); let r = db.get_type(&buffer).filter(|mime|
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
mime != &mime_guess::mime::TEXT_XML
// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.
// determining that a file is in one of the MS office formats in particular requires looking quite far into the
// file.
&& mime != &Mime::from_str("application/zip").unwrap());
if r.is_some() { if r.is_some() {
return Ok(r); return Ok(r);
} }
// attempt to read up to the BUF_SIZE bytes of the file. // attempt to read up to the BUF_SIZE bytes of the file.
// we've already read the first 64 bytes into a buffer, but i can't see an obvious way to reuse those 64 bytes that's // we've already read the first 128 bytes into a buffer, but i can't see an obvious way to reuse them in a way that's
// faster than simply moving the seek position back to the start of the file and re-reading the whole 512 bytes. // faster than simply moving the seek position back to the start of the file and re-reading the whole BUF_SIZE bytes.
// for example, starting with a buffer of 64 bytes, then creating a new 512 byte buffer from the contents of the first // for example, reading (BUF_SIZE - INITIAL_BUF_SIZE) bytes, then concatenating the original INITIAL_BUF_SIZE buffer
// buffer with (512 - 64) blank bytes, then finally reading the rest, is much slower than simply reading the file // with this new one would presumably be faster - but it's not. i think it's more expensive to create the iterator,
// twice. i don't at all doubt that there IS a way to do this efficiently, and i can think of a way in principle, but // collect the contents, etc. i'll have to look into this more. i don't at all doubt that there IS a way to do this
// i'm not sure how to express it in a way that is both idiomatic/safe and fast. // efficiently, and i can think of a way in principle, but i'm not sure how to express it in a way that is both
// idiomatic/safe and fast.
let mut buffer = [0; BUF_SIZE]; let mut buffer = [0; BUF_SIZE];
file.seek(SeekFrom::Start(0))?; file.seek(SeekFrom::Start(0))?;
file.read(&mut buffer)?; file.read(&mut buffer)?;
@ -86,19 +82,14 @@ cached! {
// to have valid extensions. // to have valid extensions.
[vec![String::from("xml"), String::from("svg")], possible_exts].concat() [vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if mime == Mime::from_str("application/msword").unwrap() {
// classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if mime == Mime::from_str("application/zip").unwrap() { } else if mime == Mime::from_str("application/zip").unwrap() {
// until proper document support is added, treat all common document extensions as zips // neither xdg-mime nor infer seem to be able to detect office XML files properly...
[vec![ [vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], possible_exts].concat()
String::from("zip"),
String::from("docx"),
String::from("pptx"),
String::from("xlsx"),
String::from("odt"),
String::from("ods"),
String::from("odp"),
String::from("pages"),
String::from("key"),
String::from("numbers")], possible_exts].concat()
} else { } else {
possible_exts possible_exts
}) })

View file

@ -17,6 +17,7 @@
use std::io::{stdout, BufWriter}; use std::io::{stdout, BufWriter};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use cfg_if::cfg_if;
use clap::Clap; use clap::Clap;
use log::{debug, error, info, trace, warn}; use log::{debug, error, info, trace, warn};
use once_cell::sync::OnceCell; use once_cell::sync::OnceCell;
@ -40,14 +41,16 @@ mod mimedb;
mod parameters; mod parameters;
mod scanerror; mod scanerror;
#[cfg(feature = "infer-backend")] cfg_if! {
if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] {
static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new(); static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new();
} else {
#[cfg(feature = "xdg-mime-backend")]
static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new(); static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new();
}
}
// TODO: test if this actually works on a windows machine cfg_if! {
#[cfg(windows)] if #[cfg(windows)] {
fn is_hidden(entry: &DirEntry) -> bool { fn is_hidden(entry: &DirEntry) -> bool {
use std::os::windows::prelude::*; use std::os::windows::prelude::*;
std::fs::metadata(entry.path()) // try to get metadata for file std::fs::metadata(entry.path()) // try to get metadata for file
@ -56,14 +59,15 @@ fn is_hidden(entry: &DirEntry) -> bool {
|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants |f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
) )
} }
} else {
#[cfg(not(windows))]
fn is_hidden(entry: &DirEntry) -> bool { fn is_hidden(entry: &DirEntry) -> bool {
entry entry
.file_name() .file_name()
.to_str() .to_str()
.map_or(false, |f| f.starts_with('.') && f != ".") .map_or(false, |f| f.starts_with('.') && f != ".")
} }
}
}
fn wanted_file(args: &parameters::Parameters, exts: &[&str], entry: &DirEntry) -> bool { fn wanted_file(args: &parameters::Parameters, exts: &[&str], entry: &DirEntry) -> bool {
if !args.scan_hidden && is_hidden(entry) { if !args.scan_hidden && is_hidden(entry) {
@ -129,8 +133,8 @@ fn scan_file(entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> {
} }
fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, PathBuf)>> { fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, PathBuf)>> {
#[cfg(feature = "multi-threaded")] cfg_if! {
{ if #[cfg(feature = "multi-threaded")] {
// rather than using a standard par_iter, split the entries into chunks of 32 first. // rather than using a standard par_iter, split the entries into chunks of 32 first.
// this allows each spawned thread to handle 32 files before before closing, rather than creating a new thread for // this allows each spawned thread to handle 32 files before before closing, rather than creating a new thread for
// each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0 // each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0
@ -143,13 +147,11 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, P
.collect::<Vec<_>>() .collect::<Vec<_>>()
}) })
.collect() .collect()
} } else {
#[cfg(not(feature = "multi-threaded"))]
{
entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect() entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect()
} }
} }
}
fn main() { fn main() {
let args = parameters::Parameters::parse(); let args = parameters::Parameters::parse();
@ -162,17 +164,19 @@ fn main() {
// .target(env_logger::Target::Stdout) // log to stdout rather than stderr // .target(env_logger::Target::Stdout) // log to stdout rather than stderr
.init(); .init();
#[cfg(feature = "infer-backend")] cfg_if! {
if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] {
MIMEDB MIMEDB
.set(mimedb::InferDb::init()) .set(mimedb::InferDb::init())
.or(Err("Failed to initialise Infer backend!")) .or(Err("Failed to initialise Infer backend!"))
.unwrap(); .unwrap();
} else {
#[cfg(feature = "xdg-mime-backend")]
MIMEDB MIMEDB
.set(mimedb::XdgDb::init()) .set(mimedb::XdgDb::init())
.or(Err("Failed to initialise XDG Mime backend!")) .or(Err("Failed to initialise XDG Mime backend!"))
.unwrap(); .unwrap();
}
}
debug!("Iterating directory: {:?}", args.dirs); debug!("Iterating directory: {:?}", args.dirs);
@ -253,7 +257,8 @@ fn main() {
} }
if results.is_empty() { if results.is_empty() {
info!("All files have valid extensions!") info!("All files have valid extensions!");
exit(0);
} }
match args.output_format { match args.output_format {

View file

@ -1,6 +1,4 @@
#[cfg(feature = "infer-backend")] use cfg_if::cfg_if;
use std::str::FromStr;
use mime_guess::Mime; use mime_guess::Mime;
pub trait MimeDb { pub trait MimeDb {
@ -8,12 +6,21 @@ pub trait MimeDb {
fn get_type(&self, data: &[u8]) -> Option<Mime>; fn get_type(&self, data: &[u8]) -> Option<Mime>;
} }
#[cfg(feature = "infer-backend")] cfg_if! {
if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] {
use std::str::FromStr;
pub struct InferDb { pub struct InferDb {
db: infer::Infer, db: infer::Infer,
} }
#[cfg(feature = "infer-backend")] fn open_document_check(buf: &[u8], kind: &str) -> bool {
let mime = format!("application/vnd.oasis.opendocument.{}", kind);
let mime = mime.as_bytes();
buf.len() > 38 + mime.len() && buf.starts_with(b"PK\x03\x04") && buf[38..mime.len() + 38] == mime[..]
}
impl MimeDb for InferDb { impl MimeDb for InferDb {
fn init() -> Self { fn init() -> Self {
let mut info = infer::Infer::new(); let mut info = infer::Infer::new();
@ -23,6 +30,18 @@ impl MimeDb for InferDb {
buf.len() > 23 && buf[..23] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A\x6A\x70\x32\x20"[..] buf.len() > 23 && buf[..23] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A\x6A\x70\x32\x20"[..]
}); });
info.add("application/vnd.oasis.opendocument.text", "odt", |buf| {
open_document_check(buf, "text")
});
info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| {
open_document_check(buf, "spreadsheet")
});
info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| {
open_document_check(buf, "presentation")
});
info.add("image/svg+xml", "svg", |buf| { info.add("image/svg+xml", "svg", |buf| {
// before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish // before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish
// by "SGML-ish", i mean starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign, // by "SGML-ish", i mean starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign,
@ -62,21 +81,19 @@ impl MimeDb for InferDb {
self.db.get(data).map(|f| Mime::from_str(f.mime_type()).unwrap()) self.db.get(data).map(|f| Mime::from_str(f.mime_type()).unwrap())
} }
} }
} else {
#[cfg(feature = "xdg-mime-backend")]
pub struct XdgDb { pub struct XdgDb {
db: xdg_mime::SharedMimeInfo, db: xdg_mime::SharedMimeInfo,
} }
#[cfg(feature = "xdg-mime-backend")]
impl MimeDb for XdgDb { impl MimeDb for XdgDb {
fn init() -> Self { fn init() -> Self {
Self { Self { db: xdg_mime::SharedMimeInfo::new() }
db: xdg_mime::SharedMimeInfo::new(),
}
} }
fn get_type(&self, data: &[u8]) -> Option<Mime> { fn get_type(&self, data: &[u8]) -> Option<Mime> {
self.db.get_mime_type_for_data(&data).map(|m| m.0) self.db.get_mime_type_for_data(data).map(|m| m.0)
}
}
} }
} }

View file

@ -11,6 +11,7 @@ pub enum OutputFormat {
} }
#[derive(Clap, Debug)] #[derive(Clap, Debug)]
#[clap(version = option_env!("CARGO_PKG_VERSION").unwrap_or("???"))]
pub struct Parameters { pub struct Parameters {
/// Only examine files with these extensions (Comma-separated list) /// Only examine files with these extensions (Comma-separated list)
#[clap( #[clap(