use cfg_if to make the code more readable
This commit is contained in:
parent
b141b85ea7
commit
ebc2bcfb41
4 changed files with 134 additions and 132 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -176,7 +176,7 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
|
|||
|
||||
[[package]]
|
||||
name = "fif"
|
||||
version = "0.2.4"
|
||||
version = "0.2.5"
|
||||
dependencies = [
|
||||
"cached",
|
||||
"cfg-if",
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "fif"
|
||||
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
||||
version = "0.2.4"
|
||||
version = "0.2.5"
|
||||
authors = ["Lynnesbian <lynne@bune.city>"]
|
||||
edition = "2018"
|
||||
license = "GPL-3.0-or-later"
|
||||
|
|
107
src/main.rs
107
src/main.rs
|
@ -17,6 +17,7 @@
|
|||
use std::io::{stdout, BufWriter};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use clap::Clap;
|
||||
use log::{debug, error, info, trace, warn};
|
||||
use once_cell::sync::OnceCell;
|
||||
|
@ -40,29 +41,33 @@ mod mimedb;
|
|||
mod parameters;
|
||||
mod scanerror;
|
||||
|
||||
#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))]
|
||||
static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new();
|
||||
|
||||
#[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))]
|
||||
static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new();
|
||||
|
||||
// TODO: test if this actually works on a windows machine
|
||||
#[cfg(windows)]
|
||||
fn is_hidden(entry: &DirEntry) -> bool {
|
||||
use std::os::windows::prelude::*;
|
||||
std::fs::metadata(entry.path()) // try to get metadata for file
|
||||
.map_or(
|
||||
false, // if getting metadata/attributes fails, assume it's not hidden
|
||||
|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
|
||||
)
|
||||
cfg_if! {
|
||||
if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] {
|
||||
static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new();
|
||||
} else {
|
||||
static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
fn is_hidden(entry: &DirEntry) -> bool {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.map_or(false, |f| f.starts_with('.') && f != ".")
|
||||
cfg_if! {
|
||||
// TODO: test if this actually works on a windows machine
|
||||
if #[cfg(windows)] {
|
||||
fn is_hidden(entry: &DirEntry) -> bool {
|
||||
use std::os::windows::prelude::*;
|
||||
std::fs::metadata(entry.path()) // try to get metadata for file
|
||||
.map_or(
|
||||
false, // if getting metadata/attributes fails, assume it's not hidden
|
||||
|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
|
||||
)
|
||||
}
|
||||
} else {
|
||||
fn is_hidden(entry: &DirEntry) -> bool {
|
||||
entry
|
||||
.file_name()
|
||||
.to_str()
|
||||
.map_or(false, |f| f.starts_with('.') && f != ".")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn wanted_file(args: ¶meters::Parameters, exts: &[&str], entry: &DirEntry) -> bool {
|
||||
|
@ -129,25 +134,23 @@ fn scan_file(entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> {
|
|||
}
|
||||
|
||||
fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, PathBuf)>> {
|
||||
#[cfg(feature = "multi-threaded")]
|
||||
{
|
||||
// rather than using a standard par_iter, split the entries into chunks of 32 first.
|
||||
// this allows each spawned thread to handle 32 files before before closing, rather than creating a new thread for
|
||||
// each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0
|
||||
entries
|
||||
.par_chunks(32) // split into chunks of 32
|
||||
.flat_map(|chunk| {
|
||||
chunk // return Vec<...> instead of Chunk<Vec<...>>
|
||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||
.map(|entry| scan_file(entry))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "multi-threaded"))]
|
||||
{
|
||||
entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect()
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "multi-threaded")] {
|
||||
// rather than using a standard par_iter, split the entries into chunks of 32 first.
|
||||
// this allows each spawned thread to handle 32 files before before closing, rather than creating a new thread for
|
||||
// each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0
|
||||
entries
|
||||
.par_chunks(32) // split into chunks of 32
|
||||
.flat_map(|chunk| {
|
||||
chunk // return Vec<...> instead of Chunk<Vec<...>>
|
||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||
.map(|entry| scan_file(entry))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -162,17 +165,19 @@ fn main() {
|
|||
// .target(env_logger::Target::Stdout) // log to stdout rather than stderr
|
||||
.init();
|
||||
|
||||
#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))]
|
||||
MIMEDB
|
||||
.set(mimedb::InferDb::init())
|
||||
.or(Err("Failed to initialise Infer backend!"))
|
||||
.unwrap();
|
||||
|
||||
#[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))]
|
||||
MIMEDB
|
||||
.set(mimedb::XdgDb::init())
|
||||
.or(Err("Failed to initialise XDG Mime backend!"))
|
||||
.unwrap();
|
||||
cfg_if! {
|
||||
if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] {
|
||||
MIMEDB
|
||||
.set(mimedb::InferDb::init())
|
||||
.or(Err("Failed to initialise Infer backend!"))
|
||||
.unwrap();
|
||||
} else {
|
||||
MIMEDB
|
||||
.set(mimedb::XdgDb::init())
|
||||
.or(Err("Failed to initialise XDG Mime backend!"))
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
debug!("Iterating directory: {:?}", args.dirs);
|
||||
|
||||
|
|
155
src/mimedb.rs
155
src/mimedb.rs
|
@ -1,6 +1,4 @@
|
|||
#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))]
|
||||
use std::str::FromStr;
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use mime_guess::Mime;
|
||||
|
||||
pub trait MimeDb {
|
||||
|
@ -8,95 +6,94 @@ pub trait MimeDb {
|
|||
fn get_type(&self, data: &[u8]) -> Option<Mime>;
|
||||
}
|
||||
|
||||
#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))]
|
||||
pub struct InferDb {
|
||||
db: infer::Infer,
|
||||
}
|
||||
cfg_if! {
|
||||
if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] {
|
||||
use std::str::FromStr;
|
||||
|
||||
#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))]
|
||||
fn open_document_check(buf: &[u8], kind: &str) -> bool {
|
||||
let mime = format!("application/vnd.oasis.opendocument.{}", kind);
|
||||
let mime = mime.as_bytes();
|
||||
pub struct InferDb {
|
||||
db: infer::Infer,
|
||||
}
|
||||
|
||||
buf.len() > 38 + mime.len() && buf.starts_with(b"PK\x03\x04") && buf[38..mime.len() + 38] == mime[..]
|
||||
}
|
||||
fn open_document_check(buf: &[u8], kind: &str) -> bool {
|
||||
let mime = format!("application/vnd.oasis.opendocument.{}", kind);
|
||||
let mime = mime.as_bytes();
|
||||
|
||||
#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))]
|
||||
impl MimeDb for InferDb {
|
||||
fn init() -> Self {
|
||||
let mut info = infer::Infer::new();
|
||||
buf.len() > 38 + mime.len() && buf.starts_with(b"PK\x03\x04") && buf[38..mime.len() + 38] == mime[..]
|
||||
}
|
||||
|
||||
// jpeg2000 support because why the stinch not
|
||||
info.add("image/jpeg2000", ".jp2", |buf| {
|
||||
buf.len() > 23 && buf[..23] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A\x6A\x70\x32\x20"[..]
|
||||
});
|
||||
impl MimeDb for InferDb {
|
||||
fn init() -> Self {
|
||||
let mut info = infer::Infer::new();
|
||||
|
||||
info.add("application/vnd.oasis.opendocument.text", "odt", |buf| {
|
||||
open_document_check(buf, "text")
|
||||
});
|
||||
// jpeg2000 support because why the stinch not
|
||||
info.add("image/jpeg2000", ".jp2", |buf| {
|
||||
buf.len() > 23 && buf[..23] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A\x6A\x70\x32\x20"[..]
|
||||
});
|
||||
|
||||
info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| {
|
||||
open_document_check(buf, "spreadsheet")
|
||||
});
|
||||
info.add("application/vnd.oasis.opendocument.text", "odt", |buf| {
|
||||
open_document_check(buf, "text")
|
||||
});
|
||||
|
||||
info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| {
|
||||
open_document_check(buf, "presentation")
|
||||
});
|
||||
info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| {
|
||||
open_document_check(buf, "spreadsheet")
|
||||
});
|
||||
|
||||
info.add("image/svg+xml", "svg", |buf| {
|
||||
// before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish
|
||||
// by "SGML-ish", i mean starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign,
|
||||
// and then there's some other stuff we don't care about right now
|
||||
info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| {
|
||||
open_document_check(buf, "presentation")
|
||||
});
|
||||
|
||||
// so, here comes our fancy pants """""SGML-ish validator"""""
|
||||
for c in buf {
|
||||
match c {
|
||||
// whitespace (according to https://www.w3.org/TR/xml/#NT-S)
|
||||
b'\t' | b'\r' | b'\n' | b'\x20' => continue,
|
||||
b'<' => break,
|
||||
_ => return false,
|
||||
}
|
||||
info.add("image/svg+xml", "svg", |buf| {
|
||||
// before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish
|
||||
// by "SGML-ish", i mean starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign,
|
||||
// and then there's some other stuff we don't care about right now
|
||||
|
||||
// so, here comes our fancy pants """""SGML-ish validator"""""
|
||||
for c in buf {
|
||||
match c {
|
||||
// whitespace (according to https://www.w3.org/TR/xml/#NT-S)
|
||||
b'\t' | b'\r' | b'\n' | b'\x20' => continue,
|
||||
b'<' => break,
|
||||
_ => return false,
|
||||
}
|
||||
}
|
||||
|
||||
// finally, to check whether or not the file is an SVG:
|
||||
// - split the buffer up into chunks separated by the less than sign
|
||||
// - check to see if this chunk starts with any of these identifiers:
|
||||
let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"];
|
||||
// - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to do
|
||||
// the same
|
||||
// - and finally, if none of the chunks match, we'll return false
|
||||
|
||||
// TODO: this is kind of messy, i'd like to clean it up somehow :(
|
||||
buf
|
||||
.split(|c| *c == b'<')
|
||||
.any(|buf| identifiers.iter().any(|id| buf.starts_with(id)))
|
||||
});
|
||||
|
||||
// unmut
|
||||
let info = info;
|
||||
|
||||
Self { db: info }
|
||||
}
|
||||
|
||||
// finally, to check whether or not the file is an SVG:
|
||||
// - split the buffer up into chunks separated by the less than sign
|
||||
// - check to see if this chunk starts with any of these identifiers:
|
||||
let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"];
|
||||
// - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to do
|
||||
// the same
|
||||
// - and finally, if none of the chunks match, we'll return false
|
||||
fn get_type(&self, data: &[u8]) -> Option<Mime> {
|
||||
self.db.get(data).map(|f| Mime::from_str(f.mime_type()).unwrap())
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pub struct XdgDb {
|
||||
db: xdg_mime::SharedMimeInfo,
|
||||
}
|
||||
|
||||
// TODO: this is kind of messy, i'd like to clean it up somehow :(
|
||||
buf
|
||||
.split(|c| *c == b'<')
|
||||
.any(|buf| identifiers.iter().any(|id| buf.starts_with(id)))
|
||||
});
|
||||
impl MimeDb for XdgDb {
|
||||
fn init() -> Self {
|
||||
Self { db: xdg_mime::SharedMimeInfo::new() }
|
||||
}
|
||||
|
||||
// unmut
|
||||
let info = info;
|
||||
|
||||
Self { db: info }
|
||||
}
|
||||
|
||||
fn get_type(&self, data: &[u8]) -> Option<Mime> {
|
||||
self.db.get(data).map(|f| Mime::from_str(f.mime_type()).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))]
|
||||
pub struct XdgDb {
|
||||
db: xdg_mime::SharedMimeInfo,
|
||||
}
|
||||
|
||||
#[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))]
|
||||
impl MimeDb for XdgDb {
|
||||
fn init() -> Self {
|
||||
Self {
|
||||
db: xdg_mime::SharedMimeInfo::new(),
|
||||
fn get_type(&self, data: &[u8]) -> Option<Mime> {
|
||||
self.db.get_mime_type_for_data(&data).map(|m| m.0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_type(&self, data: &[u8]) -> Option<Mime> {
|
||||
self.db.get_mime_type_for_data(&data).map(|m| m.0)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue