split fif into lib and bin
This commit is contained in:
parent
1f46bef10b
commit
d625fef106
7 changed files with 274 additions and 258 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -311,9 +311,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.100"
|
version = "0.2.101"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a1fa8cddc8fbbee11227ef194b5317ed014b8acbf15139bd716a18ad3fe99ec5"
|
checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
|
|
10
clippy.sh
10
clippy.sh
|
@ -34,6 +34,9 @@ for backend in "${_backends[@]}"; do
|
||||||
-A clippy::multiple-crate-versions \
|
-A clippy::multiple-crate-versions \
|
||||||
-A clippy::cast-possible-truncation \
|
-A clippy::cast-possible-truncation \
|
||||||
-A clippy::cast-possible-wrap \
|
-A clippy::cast-possible-wrap \
|
||||||
|
-A clippy::must_use_candidate \
|
||||||
|
-A clippy::missing_panics_doc \
|
||||||
|
-A clippy::missing_errors_doc \
|
||||||
"$_extra"
|
"$_extra"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
@ -43,5 +46,8 @@ done
|
||||||
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
|
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
|
||||||
# option_if_let_else: the suggested code is usually harder to read than the original
|
# option_if_let_else: the suggested code is usually harder to read than the original
|
||||||
# multiple_crate_versions: cached uses an old version of hashbrown :c
|
# multiple_crate_versions: cached uses an old version of hashbrown :c
|
||||||
# cast-possible-truncation: only ever used where it would be totally fine
|
# cast_possible_truncation: only ever used where it would be totally fine
|
||||||
# cast-possible-wrap: ditto
|
# cast_possible_wrap: ditto
|
||||||
|
# must_use_candidate: useless
|
||||||
|
# missing_panics_doc: the docs are just for me, fif isn't really intended to be used as a library, so this is unneeded
|
||||||
|
# missing_errors_doc: ditto
|
||||||
|
|
|
@ -17,12 +17,16 @@ use itertools::{Either, Itertools};
|
||||||
/// A macro for creating an array of `Writable`s without needing to pepper your code with `into()`s.
|
/// A macro for creating an array of `Writable`s without needing to pepper your code with `into()`s.
|
||||||
/// # Usage
|
/// # Usage
|
||||||
/// ```
|
/// ```
|
||||||
/// let f = std::io::stdout();
|
/// use crate::fif::writables;
|
||||||
|
/// use crate::fif::formats::{Writable, smart_write};
|
||||||
|
/// let mut f = std::io::stdout();
|
||||||
|
///
|
||||||
/// // Instead of...
|
/// // Instead of...
|
||||||
/// smart_write(f, &["hello".into(), Writable::Newline]);
|
/// smart_write(&mut f, &["hello".into(), Writable::Newline]);
|
||||||
/// // ...just use:
|
/// // ...just use:
|
||||||
/// smart_write(f, writables!["hello", Newline]);
|
/// smart_write(&mut f, writables!["hello", Newline]);
|
||||||
/// ```
|
/// ```
|
||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! writables {
|
macro_rules! writables {
|
||||||
[$($args:tt),+] => {
|
[$($args:tt),+] => {
|
||||||
|
@ -71,7 +75,7 @@ impl<'a> From<&'a OsStr> for Writable<'a> {
|
||||||
|
|
||||||
fn generated_by() -> String { format!("Generated by fif {}", clap_long_version()) }
|
fn generated_by() -> String { format!("Generated by fif {}", clap_long_version()) }
|
||||||
|
|
||||||
fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
|
pub fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
|
||||||
// ehhhh
|
// ehhhh
|
||||||
for writeable in writeables {
|
for writeable in writeables {
|
||||||
match writeable {
|
match writeable {
|
||||||
|
|
|
@ -9,8 +9,8 @@ use std::str::FromStr;
|
||||||
use cached::cached;
|
use cached::cached;
|
||||||
use mime::Mime;
|
use mime::Mime;
|
||||||
|
|
||||||
use crate::mime_db::MimeDb;
|
|
||||||
use crate::string_type::String;
|
use crate::string_type::String;
|
||||||
|
use crate::MimeDb;
|
||||||
|
|
||||||
/// The number of bytes to read initially.
|
/// The number of bytes to read initially.
|
||||||
///
|
///
|
||||||
|
|
237
src/lib.rs
Normal file
237
src/lib.rs
Normal file
|
@ -0,0 +1,237 @@
|
||||||
|
#![forbid(unsafe_code)]
|
||||||
|
#![warn(trivial_casts, unused_lifetimes, unused_qualifications)]
|
||||||
|
|
||||||
|
pub mod mime_db;
|
||||||
|
pub mod findings;
|
||||||
|
pub mod formats;
|
||||||
|
pub mod inspectors;
|
||||||
|
pub mod parameters;
|
||||||
|
pub mod string_type;
|
||||||
|
pub mod utils;
|
||||||
|
|
||||||
|
use cfg_if::cfg_if;
|
||||||
|
use once_cell::sync::OnceCell;
|
||||||
|
use walkdir::{DirEntry, WalkDir};
|
||||||
|
use std::collections::BTreeSet;
|
||||||
|
use std::path::Path;
|
||||||
|
use log::{debug, error, warn};
|
||||||
|
use mime_guess::from_ext;
|
||||||
|
use crate::parameters::ScanOpts;
|
||||||
|
use crate::findings::{Findings, ScanError};
|
||||||
|
use crate::mime_db::MimeDb;
|
||||||
|
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
||||||
|
/// A [OnceCell] holding an instance of [mime_db::MimeDb].
|
||||||
|
pub static MIMEDB: OnceCell<mime_db::InferDb> = OnceCell::new();
|
||||||
|
} else {
|
||||||
|
/// A [OnceCell] holding an instance of [mime_db::MimeDb].
|
||||||
|
pub static MIMEDB: OnceCell<mime_db::XdgDb> = OnceCell::new();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(windows)] {
|
||||||
|
/// Determines whether or not a file is hidden by checking its win32 file attributes.
|
||||||
|
pub fn is_hidden(entry: &DirEntry) -> bool {
|
||||||
|
use std::os::windows::prelude::*;
|
||||||
|
std::fs::metadata(entry.path()) // try to get metadata for file
|
||||||
|
.map_or(
|
||||||
|
false, // if getting metadata/attributes fails, assume it's not hidden
|
||||||
|
|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
|
||||||
|
)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/// Determines whether or not a file is hidden by checking for a leading full stop.
|
||||||
|
pub fn is_hidden(entry: &DirEntry) -> bool {
|
||||||
|
entry
|
||||||
|
.file_name()
|
||||||
|
.to_str()
|
||||||
|
.map_or(false, |f| f.starts_with('.') && f != ".")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in
|
||||||
|
/// `exts` (if specified), potentially skipping over hidden files, and so on.
|
||||||
|
pub fn wanted_file(
|
||||||
|
entry: &DirEntry,
|
||||||
|
exts: Option<&BTreeSet<&str>>,
|
||||||
|
exclude: Option<&BTreeSet<&str>>,
|
||||||
|
scan_opts: &ScanOpts,
|
||||||
|
) -> bool {
|
||||||
|
if entry.depth() == 0 {
|
||||||
|
// the root directory should always be scanned.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !scan_opts.hidden && is_hidden(entry) {
|
||||||
|
// skip hidden files and directories. this check is performed first because it's very lightweight.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if entry.file_type().is_dir() {
|
||||||
|
// always allow directories - there's no point doing file extension matching on something that isn't a file.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(ext) = entry.path().extension() {
|
||||||
|
// file has extension - discard invalid UTF-8 and normalise it to lowercase.
|
||||||
|
let ext = ext.to_string_lossy().to_lowercase();
|
||||||
|
let ext = ext.as_str();
|
||||||
|
|
||||||
|
if scan_opts.ignore_unknown_exts && from_ext(ext).is_empty() {
|
||||||
|
// unknown extension, skip.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(exts) = exts {
|
||||||
|
// only scan if the file has one of the specified extensions.
|
||||||
|
exts.contains(&ext)
|
||||||
|
} else {
|
||||||
|
// no extensions specified - the file should be scanned unless its extension is on the exclude list.
|
||||||
|
exclude.map_or(true, |exclude| !exclude.contains(&ext))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// no file extension
|
||||||
|
scan_opts.extensionless
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure.
|
||||||
|
///
|
||||||
|
/// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a
|
||||||
|
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be
|
||||||
|
/// determined.
|
||||||
|
pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
|
||||||
|
let path = entry.path();
|
||||||
|
// try to determine mimetype for this entry
|
||||||
|
let result = match inspectors::mime_type(MIMEDB.get().unwrap(), path) {
|
||||||
|
// an error occurred while trying to read the file
|
||||||
|
Err(_) => return Err(ScanError::File(path)),
|
||||||
|
// the file was read successfully, but we were unable to determine its mimetype
|
||||||
|
Ok(None) => return Err(ScanError::Mime(path)),
|
||||||
|
// a mimetype was found!
|
||||||
|
Ok(Some(result)) => result,
|
||||||
|
};
|
||||||
|
|
||||||
|
// set of known extensions for the given mimetype
|
||||||
|
let known_exts = inspectors::mime_extension_lookup(result.essence_str().into());
|
||||||
|
// file extension for this particular file
|
||||||
|
let entry_ext = path.extension();
|
||||||
|
|
||||||
|
let valid = match known_exts {
|
||||||
|
// there is a known set of extensions for this mimetype, and the file has an extension
|
||||||
|
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()),
|
||||||
|
// either this file has no extension, or there is no known set of extensions for this mimetype :(
|
||||||
|
Some(_) | None => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let path = if canonical_paths {
|
||||||
|
match std::fs::canonicalize(path) {
|
||||||
|
Ok(path) => path,
|
||||||
|
Err(_) => return Err(ScanError::File(entry.path())),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
path.to_path_buf() // :c
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Findings {
|
||||||
|
file: path,
|
||||||
|
valid,
|
||||||
|
mime: result,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector.
|
||||||
|
pub fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec<Result<Findings, ScanError>> {
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(feature = "multi-threaded")] {
|
||||||
|
use rayon::prelude::*;
|
||||||
|
|
||||||
|
// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
|
||||||
|
entries
|
||||||
|
.par_chunks(32)
|
||||||
|
.flat_map(|chunk| {
|
||||||
|
chunk
|
||||||
|
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||||
|
.map(|entry| scan_file(entry, canonical_paths))
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
entries.iter().map(|entry: &DirEntry| scan_file(entry, canonical_paths)).collect()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
||||||
|
/// [DirEntry]s.
|
||||||
|
pub fn scan_directory(
|
||||||
|
dirs: &Path,
|
||||||
|
exts: Option<&BTreeSet<&str>>,
|
||||||
|
exclude: Option<&BTreeSet<&str>>,
|
||||||
|
scan_opts: &ScanOpts,
|
||||||
|
) -> Option<Vec<DirEntry>> {
|
||||||
|
let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter();
|
||||||
|
let mut probably_fatal_error = false;
|
||||||
|
let entries: Vec<DirEntry> = stepper
|
||||||
|
.filter_entry(|e| wanted_file(e, exts, exclude, scan_opts)) // filter out unwanted files
|
||||||
|
.filter_map(|e| {
|
||||||
|
if let Err(err) = &e {
|
||||||
|
debug!("uh oh spaghettio!! {:#?}", e);
|
||||||
|
// log errors to stdout, and remove them from the iterator
|
||||||
|
let path = err.path().map_or("General error".into(), Path::to_string_lossy);
|
||||||
|
|
||||||
|
if err.depth() == 0 {
|
||||||
|
// if something goes wrong while trying to read the root directory, we're probably not going to get much done
|
||||||
|
probably_fatal_error = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: is there a way to just say `map_or(x, |y| y).thing()` instead of `map_or(x.thing(), |y| y.thing())`?
|
||||||
|
// i don't care whether i'm returning a walkdir error or an io error, i just care about whether or not it
|
||||||
|
// implements ToString (which they both do). map_or doesn't work on trait objects though :(
|
||||||
|
error!(
|
||||||
|
"{}: {}",
|
||||||
|
path,
|
||||||
|
err.io_error().map_or(err.to_string(), |e| e.to_string())
|
||||||
|
);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
e.ok()
|
||||||
|
})
|
||||||
|
// remove directories from the final list
|
||||||
|
.filter(|e| !e.file_type().is_dir())
|
||||||
|
// if fif is invoked without `-f` on a symlinked directory, it will recurse into the symlink (as desired) and ignore
|
||||||
|
// any symlinks inside the symlinked root directory. however, the root directory will still be added to `entries` as
|
||||||
|
// if it were a file to be scanned, and `scan_file` will fail to scan it, adding "Failed to read ~/whatever" to the
|
||||||
|
// output. to avoid this, we can remove all symlinks from `entries` if `-f` is not set. i know this is kind of
|
||||||
|
// confusing, but it's honestly kind of hard to explain... maybe a screenshot is better:
|
||||||
|
// https://i.imgur.com/DYG7jlB.png
|
||||||
|
// adding the symlink filter removes the line that's being pointed to in the image. 0u0
|
||||||
|
.filter(|e| scan_opts.follow_symlinks || !e.file_type().is_symlink())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if probably_fatal_error {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initialises [`MIMEDB`] with a value dependent on the current backend.
|
||||||
|
pub fn init_db() {
|
||||||
|
cfg_if! {
|
||||||
|
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
||||||
|
MIMEDB
|
||||||
|
.set(crate::mime_db::InferDb::init())
|
||||||
|
.or(Err("Failed to initialise Infer backend!"))
|
||||||
|
.unwrap();
|
||||||
|
} else {
|
||||||
|
MIMEDB
|
||||||
|
.set(crate::mime_db::XdgDb::init())
|
||||||
|
.or(Err("Failed to initialise XDG Mime backend!"))
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
241
src/main.rs
241
src/main.rs
|
@ -18,44 +18,18 @@
|
||||||
#![warn(trivial_casts, unused_lifetimes, unused_qualifications)]
|
#![warn(trivial_casts, unused_lifetimes, unused_qualifications)]
|
||||||
|
|
||||||
use std::io::{stdout, BufWriter, Write};
|
use std::io::{stdout, BufWriter, Write};
|
||||||
use std::path::Path;
|
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
|
|
||||||
use cfg_if::cfg_if;
|
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
use log::{debug, error, info, trace, warn, Level};
|
use log::{debug, error, info, trace, warn, Level};
|
||||||
use once_cell::sync::OnceCell;
|
|
||||||
use walkdir::{DirEntry, WalkDir};
|
|
||||||
|
|
||||||
use crate::findings::Findings;
|
use fif::formats::Format;
|
||||||
use crate::findings::ScanError;
|
use fif::parameters::{OutputFormat};
|
||||||
use crate::formats::Format;
|
use fif::utils::{clap_long_version, os_name};
|
||||||
use crate::mime_db::MimeDb;
|
use fif::{init_db, scan_directory, parameters, formats};
|
||||||
use crate::parameters::{OutputFormat, ScanOpts};
|
|
||||||
use crate::utils::{clap_long_version, os_name};
|
|
||||||
use mime_guess::from_ext;
|
|
||||||
use std::collections::BTreeSet;
|
|
||||||
|
|
||||||
mod findings;
|
|
||||||
mod formats;
|
|
||||||
mod inspectors;
|
|
||||||
mod mime_db;
|
|
||||||
mod parameters;
|
|
||||||
pub(crate) mod string_type;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
mod utils;
|
|
||||||
|
|
||||||
cfg_if! {
|
|
||||||
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
|
||||||
/// A [OnceCell] holding an instance of [mime_db::MimeDb].
|
|
||||||
static MIMEDB: OnceCell<mime_db::InferDb> = OnceCell::new();
|
|
||||||
} else {
|
|
||||||
/// A [OnceCell] holding an instance of [mime_db::MimeDb].
|
|
||||||
static MIMEDB: OnceCell<mime_db::XdgDb> = OnceCell::new();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
#[allow(clippy::cognitive_complexity)]
|
#[allow(clippy::cognitive_complexity)]
|
||||||
|
@ -115,7 +89,7 @@ fn main() {
|
||||||
|
|
||||||
trace!("Found {} items to check", entries.len());
|
trace!("Found {} items to check", entries.len());
|
||||||
|
|
||||||
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths)
|
let results: Vec<_> = fif::scan_from_walkdir(&entries, args.canonical_paths)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter(
|
.filter(
|
||||||
|result| result.is_err() || !result.as_ref().unwrap().valid,
|
|result| result.is_err() || !result.as_ref().unwrap().valid,
|
||||||
|
@ -168,208 +142,3 @@ fn main() {
|
||||||
debug!("Done");
|
debug!("Done");
|
||||||
}
|
}
|
||||||
|
|
||||||
cfg_if! {
|
|
||||||
if #[cfg(windows)] {
|
|
||||||
/// Determines whether or not a file is hidden by checking its win32 file attributes.
|
|
||||||
fn is_hidden(entry: &DirEntry) -> bool {
|
|
||||||
use std::os::windows::prelude::*;
|
|
||||||
std::fs::metadata(entry.path()) // try to get metadata for file
|
|
||||||
.map_or(
|
|
||||||
false, // if getting metadata/attributes fails, assume it's not hidden
|
|
||||||
|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/// Determines whether or not a file is hidden by checking for a leading full stop.
|
|
||||||
fn is_hidden(entry: &DirEntry) -> bool {
|
|
||||||
entry
|
|
||||||
.file_name()
|
|
||||||
.to_str()
|
|
||||||
.map_or(false, |f| f.starts_with('.') && f != ".")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in
|
|
||||||
/// `exts` (if specified), potentially skipping over hidden files, and so on.
|
|
||||||
fn wanted_file(
|
|
||||||
entry: &DirEntry,
|
|
||||||
exts: Option<&BTreeSet<&str>>,
|
|
||||||
exclude: Option<&BTreeSet<&str>>,
|
|
||||||
scan_opts: &ScanOpts,
|
|
||||||
) -> bool {
|
|
||||||
if entry.depth() == 0 {
|
|
||||||
// the root directory should always be scanned.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if !scan_opts.hidden && is_hidden(entry) {
|
|
||||||
// skip hidden files and directories. this check is performed first because it's very lightweight.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if entry.file_type().is_dir() {
|
|
||||||
// always allow directories - there's no point doing file extension matching on something that isn't a file.
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(ext) = entry.path().extension() {
|
|
||||||
// file has extension - discard invalid UTF-8 and normalise it to lowercase.
|
|
||||||
let ext = ext.to_string_lossy().to_lowercase();
|
|
||||||
let ext = ext.as_str();
|
|
||||||
|
|
||||||
if scan_opts.ignore_unknown_exts && from_ext(ext).is_empty() {
|
|
||||||
// unknown extension, skip.
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(exts) = exts {
|
|
||||||
// only scan if the file has one of the specified extensions.
|
|
||||||
exts.contains(&ext)
|
|
||||||
} else {
|
|
||||||
// no extensions specified - the file should be scanned unless its extension is on the exclude list.
|
|
||||||
exclude.map_or(true, |exclude| !exclude.contains(&ext))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// no file extension
|
|
||||||
scan_opts.extensionless
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure.
|
|
||||||
///
|
|
||||||
/// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a
|
|
||||||
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be
|
|
||||||
/// determined.
|
|
||||||
fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
|
|
||||||
let path = entry.path();
|
|
||||||
// try to determine mimetype for this entry
|
|
||||||
let result = match inspectors::mime_type(MIMEDB.get().unwrap(), path) {
|
|
||||||
// an error occurred while trying to read the file
|
|
||||||
Err(_) => return Err(ScanError::File(path)),
|
|
||||||
// the file was read successfully, but we were unable to determine its mimetype
|
|
||||||
Ok(None) => return Err(ScanError::Mime(path)),
|
|
||||||
// a mimetype was found!
|
|
||||||
Ok(Some(result)) => result,
|
|
||||||
};
|
|
||||||
|
|
||||||
// set of known extensions for the given mimetype
|
|
||||||
let known_exts = inspectors::mime_extension_lookup(result.essence_str().into());
|
|
||||||
// file extension for this particular file
|
|
||||||
let entry_ext = path.extension();
|
|
||||||
|
|
||||||
let valid = match known_exts {
|
|
||||||
// there is a known set of extensions for this mimetype, and the file has an extension
|
|
||||||
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()),
|
|
||||||
// either this file has no extension, or there is no known set of extensions for this mimetype :(
|
|
||||||
Some(_) | None => false,
|
|
||||||
};
|
|
||||||
|
|
||||||
let path = if canonical_paths {
|
|
||||||
match std::fs::canonicalize(path) {
|
|
||||||
Ok(path) => path,
|
|
||||||
Err(_) => return Err(ScanError::File(entry.path())),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
path.to_path_buf() // :c
|
|
||||||
};
|
|
||||||
|
|
||||||
Ok(Findings {
|
|
||||||
file: path,
|
|
||||||
valid,
|
|
||||||
mime: result,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector.
|
|
||||||
fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec<Result<Findings, ScanError>> {
|
|
||||||
cfg_if! {
|
|
||||||
if #[cfg(feature = "multi-threaded")] {
|
|
||||||
use rayon::prelude::*;
|
|
||||||
|
|
||||||
// split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread
|
|
||||||
entries
|
|
||||||
.par_chunks(32)
|
|
||||||
.flat_map(|chunk| {
|
|
||||||
chunk
|
|
||||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
|
||||||
.map(|entry| scan_file(entry, canonical_paths))
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
} else {
|
|
||||||
entries.iter().map(|entry: &DirEntry| scan_file(entry, canonical_paths)).collect()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
|
|
||||||
/// [DirEntry]s.
|
|
||||||
fn scan_directory(
|
|
||||||
dirs: &Path,
|
|
||||||
exts: Option<&BTreeSet<&str>>,
|
|
||||||
exclude: Option<&BTreeSet<&str>>,
|
|
||||||
scan_opts: &ScanOpts,
|
|
||||||
) -> Option<Vec<DirEntry>> {
|
|
||||||
let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter();
|
|
||||||
let mut probably_fatal_error = false;
|
|
||||||
let entries: Vec<DirEntry> = stepper
|
|
||||||
.filter_entry(|e| wanted_file(e, exts, exclude, scan_opts)) // filter out unwanted files
|
|
||||||
.filter_map(|e| {
|
|
||||||
if let Err(err) = &e {
|
|
||||||
debug!("uh oh spaghettio!! {:#?}", e);
|
|
||||||
// log errors to stdout, and remove them from the iterator
|
|
||||||
let path = err.path().map_or("General error".into(), Path::to_string_lossy);
|
|
||||||
|
|
||||||
if err.depth() == 0 {
|
|
||||||
// if something goes wrong while trying to read the root directory, we're probably not going to get much done
|
|
||||||
probably_fatal_error = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: is there a way to just say `map_or(x, |y| y).thing()` instead of `map_or(x.thing(), |y| y.thing())`?
|
|
||||||
// i don't care whether i'm returning a walkdir error or an io error, i just care about whether or not it
|
|
||||||
// implements ToString (which they both do). map_or doesn't work on trait objects though :(
|
|
||||||
error!(
|
|
||||||
"{}: {}",
|
|
||||||
path,
|
|
||||||
err.io_error().map_or(err.to_string(), |e| e.to_string())
|
|
||||||
);
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
e.ok()
|
|
||||||
})
|
|
||||||
// remove directories from the final list
|
|
||||||
.filter(|e| !e.file_type().is_dir())
|
|
||||||
// if fif is invoked without `-f` on a symlinked directory, it will recurse into the symlink (as desired) and ignore
|
|
||||||
// any symlinks inside the symlinked root directory. however, the root directory will still be added to `entries` as
|
|
||||||
// if it were a file to be scanned, and `scan_file` will fail to scan it, adding "Failed to read ~/whatever" to the
|
|
||||||
// output. to avoid this, we can remove all symlinks from `entries` if `-f` is not set. i know this is kind of
|
|
||||||
// confusing, but it's honestly kind of hard to explain... maybe a screenshot is better:
|
|
||||||
// https://i.imgur.com/DYG7jlB.png
|
|
||||||
// adding the symlink filter removes the line that's being pointed to in the image. 0u0
|
|
||||||
.filter(|e| scan_opts.follow_symlinks || !e.file_type().is_symlink())
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if probably_fatal_error {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(entries)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Initialises [`MIMEDB`] with a value dependent on the current backend.
|
|
||||||
fn init_db() {
|
|
||||||
cfg_if! {
|
|
||||||
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
|
||||||
MIMEDB
|
|
||||||
.set(mime_db::InferDb::init())
|
|
||||||
.or(Err("Failed to initialise Infer backend!"))
|
|
||||||
.unwrap();
|
|
||||||
} else {
|
|
||||||
MIMEDB
|
|
||||||
.set(mime_db::XdgDb::init())
|
|
||||||
.or(Err("Failed to initialise XDG Mime backend!"))
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
use crate::findings::Findings;
|
use fif::findings::Findings;
|
||||||
use crate::formats::{Format, PowerShell, Shell};
|
use fif::formats::{Format, PowerShell, Shell};
|
||||||
use crate::inspectors::{mime_extension_lookup, BUF_SIZE};
|
use fif::inspectors::{mime_extension_lookup, BUF_SIZE};
|
||||||
use crate::mime_db::MimeDb;
|
use fif::mime_db::MimeDb;
|
||||||
use crate::string_type::String;
|
use fif::string_type::String;
|
||||||
use crate::{scan_directory, scan_from_walkdir};
|
use fif::{scan_directory, scan_from_walkdir};
|
||||||
|
|
||||||
use crate::parameters::Parameters;
|
use crate::parameters::Parameters;
|
||||||
use clap::Clap;
|
use clap::Clap;
|
||||||
|
@ -21,12 +21,12 @@ const ZIP_BYTES: &[u8] = b"PK\x03\x04";
|
||||||
|
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
||||||
fn get_mime_db() -> crate::mime_db::InferDb {
|
fn get_mime_db() -> fif::mime_db::InferDb {
|
||||||
crate::mime_db::InferDb::init()
|
fif::mime_db::InferDb::init()
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fn get_mime_db() -> crate::mime_db::XdgDb {
|
fn get_mime_db() -> fif::mime_db::XdgDb {
|
||||||
crate::mime_db::XdgDb::init()
|
fif::mime_db::XdgDb::init()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -335,7 +335,7 @@ fn identify_random_bytes() {
|
||||||
}
|
}
|
||||||
println!(
|
println!(
|
||||||
"No type found:\t{} counts",
|
"No type found:\t{} counts",
|
||||||
results.values().len() as i32 - results.values().sum::<i32>()
|
1000 - results.values().sum::<i32>()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -432,8 +432,8 @@ fn media_contains_audio_video_images() {
|
||||||
#[test]
|
#[test]
|
||||||
/// Ensure that the `writables!` macro produces the output it should.
|
/// Ensure that the `writables!` macro produces the output it should.
|
||||||
fn writables_is_correct() {
|
fn writables_is_correct() {
|
||||||
use crate::formats::Writable;
|
use fif::formats::Writable;
|
||||||
use crate::writables;
|
use fif::writables;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
&["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
|
&["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
|
||||||
|
|
Loading…
Reference in a new issue