From ebc2bcfb4180c47ad4b948c7521b480802cc15b8 Mon Sep 17 00:00:00 2001 From: Lynnesbian Date: Sat, 27 Feb 2021 12:13:57 +1000 Subject: [PATCH] use cfg_if to make the code more readable --- Cargo.lock | 2 +- Cargo.toml | 2 +- src/main.rs | 107 +++++++++++++++++----------------- src/mimedb.rs | 155 +++++++++++++++++++++++++------------------------- 4 files changed, 134 insertions(+), 132 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2876ad8..7971d4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -176,7 +176,7 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193" [[package]] name = "fif" -version = "0.2.4" +version = "0.2.5" dependencies = [ "cached", "cfg-if", diff --git a/Cargo.toml b/Cargo.toml index a64126f..7945b58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "fif" description = "A command-line tool for detecting and optionally correcting files with incorrect extensions." -version = "0.2.4" +version = "0.2.5" authors = ["Lynnesbian "] edition = "2018" license = "GPL-3.0-or-later" diff --git a/src/main.rs b/src/main.rs index e14d1c3..752f6cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ use std::io::{stdout, BufWriter}; use std::path::{Path, PathBuf}; +use cfg_if::cfg_if; use clap::Clap; use log::{debug, error, info, trace, warn}; use once_cell::sync::OnceCell; @@ -40,29 +41,33 @@ mod mimedb; mod parameters; mod scanerror; -#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] -static MIMEDB: OnceCell = OnceCell::new(); - -#[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))] -static MIMEDB: OnceCell = OnceCell::new(); - -// TODO: test if this actually works on a windows machine -#[cfg(windows)] -fn is_hidden(entry: &DirEntry) -> bool { - use std::os::windows::prelude::*; - std::fs::metadata(entry.path()) // try to get metadata for file - .map_or( - false, // if getting metadata/attributes fails, assume it's not hidden - |f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants - ) +cfg_if! { + if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] { + static MIMEDB: OnceCell = OnceCell::new(); + } else { + static MIMEDB: OnceCell = OnceCell::new(); + } } -#[cfg(not(windows))] -fn is_hidden(entry: &DirEntry) -> bool { - entry - .file_name() - .to_str() - .map_or(false, |f| f.starts_with('.') && f != ".") +cfg_if! { + // TODO: test if this actually works on a windows machine + if #[cfg(windows)] { + fn is_hidden(entry: &DirEntry) -> bool { + use std::os::windows::prelude::*; + std::fs::metadata(entry.path()) // try to get metadata for file + .map_or( + false, // if getting metadata/attributes fails, assume it's not hidden + |f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants + ) + } + } else { + fn is_hidden(entry: &DirEntry) -> bool { + entry + .file_name() + .to_str() + .map_or(false, |f| f.starts_with('.') && f != ".") + } + } } fn wanted_file(args: ¶meters::Parameters, exts: &[&str], entry: &DirEntry) -> bool { @@ -129,25 +134,23 @@ fn scan_file(entry: &DirEntry) -> Result { } fn scan_from_walkdir(entries: &[DirEntry]) -> Vec> { - #[cfg(feature = "multi-threaded")] - { - // rather than using a standard par_iter, split the entries into chunks of 32 first. - // this allows each spawned thread to handle 32 files before before closing, rather than creating a new thread for - // each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0 - entries - .par_chunks(32) // split into chunks of 32 - .flat_map(|chunk| { - chunk // return Vec<...> instead of Chunk> - .iter() // iter over the chunk, which is a slice of DirEntry structs - .map(|entry| scan_file(entry)) - .collect::>() - }) - .collect() - } - - #[cfg(not(feature = "multi-threaded"))] - { - entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect() + cfg_if! { + if #[cfg(feature = "multi-threaded")] { + // rather than using a standard par_iter, split the entries into chunks of 32 first. + // this allows each spawned thread to handle 32 files before before closing, rather than creating a new thread for + // each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0 + entries + .par_chunks(32) // split into chunks of 32 + .flat_map(|chunk| { + chunk // return Vec<...> instead of Chunk> + .iter() // iter over the chunk, which is a slice of DirEntry structs + .map(|entry| scan_file(entry)) + .collect::>() + }) + .collect() + } else { + entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect() + } } } @@ -162,17 +165,19 @@ fn main() { // .target(env_logger::Target::Stdout) // log to stdout rather than stderr .init(); - #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] - MIMEDB - .set(mimedb::InferDb::init()) - .or(Err("Failed to initialise Infer backend!")) - .unwrap(); - - #[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))] - MIMEDB - .set(mimedb::XdgDb::init()) - .or(Err("Failed to initialise XDG Mime backend!")) - .unwrap(); + cfg_if! { + if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] { + MIMEDB + .set(mimedb::InferDb::init()) + .or(Err("Failed to initialise Infer backend!")) + .unwrap(); + } else { + MIMEDB + .set(mimedb::XdgDb::init()) + .or(Err("Failed to initialise XDG Mime backend!")) + .unwrap(); + } + } debug!("Iterating directory: {:?}", args.dirs); diff --git a/src/mimedb.rs b/src/mimedb.rs index a7c8e4b..1e668a1 100644 --- a/src/mimedb.rs +++ b/src/mimedb.rs @@ -1,6 +1,4 @@ -#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] -use std::str::FromStr; - +use cfg_if::cfg_if; use mime_guess::Mime; pub trait MimeDb { @@ -8,95 +6,94 @@ pub trait MimeDb { fn get_type(&self, data: &[u8]) -> Option; } -#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] -pub struct InferDb { - db: infer::Infer, -} +cfg_if! { + if #[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] { + use std::str::FromStr; -#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] -fn open_document_check(buf: &[u8], kind: &str) -> bool { - let mime = format!("application/vnd.oasis.opendocument.{}", kind); - let mime = mime.as_bytes(); + pub struct InferDb { + db: infer::Infer, + } - buf.len() > 38 + mime.len() && buf.starts_with(b"PK\x03\x04") && buf[38..mime.len() + 38] == mime[..] -} + fn open_document_check(buf: &[u8], kind: &str) -> bool { + let mime = format!("application/vnd.oasis.opendocument.{}", kind); + let mime = mime.as_bytes(); -#[cfg(any(all(not(target_os = "linux"), not(feature = "xdg-mime-backend")), all(target_os = "linux", feature = "infer-backend")))] -impl MimeDb for InferDb { - fn init() -> Self { - let mut info = infer::Infer::new(); + buf.len() > 38 + mime.len() && buf.starts_with(b"PK\x03\x04") && buf[38..mime.len() + 38] == mime[..] + } - // jpeg2000 support because why the stinch not - info.add("image/jpeg2000", ".jp2", |buf| { - buf.len() > 23 && buf[..23] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A\x6A\x70\x32\x20"[..] - }); + impl MimeDb for InferDb { + fn init() -> Self { + let mut info = infer::Infer::new(); - info.add("application/vnd.oasis.opendocument.text", "odt", |buf| { - open_document_check(buf, "text") - }); + // jpeg2000 support because why the stinch not + info.add("image/jpeg2000", ".jp2", |buf| { + buf.len() > 23 && buf[..23] == b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A\x6A\x70\x32\x20"[..] + }); - info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| { - open_document_check(buf, "spreadsheet") - }); + info.add("application/vnd.oasis.opendocument.text", "odt", |buf| { + open_document_check(buf, "text") + }); - info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| { - open_document_check(buf, "presentation") - }); + info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| { + open_document_check(buf, "spreadsheet") + }); - info.add("image/svg+xml", "svg", |buf| { - // before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish - // by "SGML-ish", i mean starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign, - // and then there's some other stuff we don't care about right now + info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| { + open_document_check(buf, "presentation") + }); - // so, here comes our fancy pants """""SGML-ish validator""""" - for c in buf { - match c { - // whitespace (according to https://www.w3.org/TR/xml/#NT-S) - b'\t' | b'\r' | b'\n' | b'\x20' => continue, - b'<' => break, - _ => return false, - } + info.add("image/svg+xml", "svg", |buf| { + // before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish + // by "SGML-ish", i mean starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign, + // and then there's some other stuff we don't care about right now + + // so, here comes our fancy pants """""SGML-ish validator""""" + for c in buf { + match c { + // whitespace (according to https://www.w3.org/TR/xml/#NT-S) + b'\t' | b'\r' | b'\n' | b'\x20' => continue, + b'<' => break, + _ => return false, + } + } + + // finally, to check whether or not the file is an SVG: + // - split the buffer up into chunks separated by the less than sign + // - check to see if this chunk starts with any of these identifiers: + let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"]; + // - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to do + // the same + // - and finally, if none of the chunks match, we'll return false + + // TODO: this is kind of messy, i'd like to clean it up somehow :( + buf + .split(|c| *c == b'<') + .any(|buf| identifiers.iter().any(|id| buf.starts_with(id))) + }); + + // unmut + let info = info; + + Self { db: info } } - // finally, to check whether or not the file is an SVG: - // - split the buffer up into chunks separated by the less than sign - // - check to see if this chunk starts with any of these identifiers: - let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"]; - // - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to do - // the same - // - and finally, if none of the chunks match, we'll return false + fn get_type(&self, data: &[u8]) -> Option { + self.db.get(data).map(|f| Mime::from_str(f.mime_type()).unwrap()) + } + } + } else { + pub struct XdgDb { + db: xdg_mime::SharedMimeInfo, + } - // TODO: this is kind of messy, i'd like to clean it up somehow :( - buf - .split(|c| *c == b'<') - .any(|buf| identifiers.iter().any(|id| buf.starts_with(id))) - }); + impl MimeDb for XdgDb { + fn init() -> Self { + Self { db: xdg_mime::SharedMimeInfo::new() } + } - // unmut - let info = info; - - Self { db: info } - } - - fn get_type(&self, data: &[u8]) -> Option { - self.db.get(data).map(|f| Mime::from_str(f.mime_type()).unwrap()) - } -} - -#[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))] -pub struct XdgDb { - db: xdg_mime::SharedMimeInfo, -} - -#[cfg(any(all(target_os = "linux", not(feature = "infer-backend")), all(not(target_os = "linux"), not(feature = "xdg-mime-backend"))))] -impl MimeDb for XdgDb { - fn init() -> Self { - Self { - db: xdg_mime::SharedMimeInfo::new(), + fn get_type(&self, data: &[u8]) -> Option { + self.db.get_mime_type_for_data(&data).map(|m| m.0) + } } } - - fn get_type(&self, data: &[u8]) -> Option { - self.db.get_mime_type_for_data(&data).map(|m| m.0) - } }