// fif - a command-line tool for detecting and optionally correcting files with incorrect extensions. // Copyright (C) 2021 Lynnesbian // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . #![forbid(unsafe_code)] #![warn(trivial_casts, unused_lifetimes, unused_qualifications)] use std::ffi::OsStr; use std::io::{stdout, BufWriter, Write}; use std::path::Path; use std::process::exit; use cfg_if::cfg_if; use clap::Clap; use log::{debug, error, info, trace, warn, Level}; use once_cell::sync::OnceCell; use walkdir::{DirEntry, WalkDir}; use crate::findings::Findings; use crate::findings::ScanError; use crate::formats::Format; use crate::mime_db::MimeDb; use crate::parameters::{OutputFormat, ScanOpts}; use crate::utils::{clap_long_version, os_name}; use std::collections::BTreeSet; mod findings; mod formats; mod inspectors; mod mime_db; mod parameters; pub(crate) mod string_type; #[cfg(test)] mod tests; mod utils; cfg_if! { if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] { /// A [OnceCell] holding an instance of [mime_db::MimeDb]. static MIMEDB: OnceCell = OnceCell::new(); } else { /// A [OnceCell] holding an instance of [mime_db::MimeDb]. static MIMEDB: OnceCell = OnceCell::new(); } } #[doc(hidden)] #[allow(clippy::cognitive_complexity)] fn main() { let args: parameters::Parameters = parameters::Parameters::parse(); let mut builder = env_logger::Builder::new(); builder .filter_level(args.default_verbosity()) // set default log level .parse_default_env() // set log level from RUST_LOG .parse_env("FIF_LOG") // set log level from FIF_LOG .format(|buf, r| { let mut style = buf.default_level_style(r.level()); // use bold for warnings and errors style.set_bold(r.level() <= Level::Warn); // only use the first character of the log level name let abbreviation = style.value(r.level().to_string().chars().next().unwrap()); // e.g. [D] Debug message writeln!(buf, "[{}] {}", abbreviation, r.args()) }) .init(); trace!( "fif {}, running on {} {}", clap_long_version(), std::env::consts::ARCH, os_name() ); trace!("Initialising mimetype database"); init_db(); debug!("Iterating directory: {:?}", args.dir); let extensions = args.extensions(); let excludes = args.excluded_extensions(); if let Some(extensions) = &extensions { debug!("Checking files with extensions: {:?}", extensions); } else if let Some(excludes) = &excludes { debug!("Skipping files with extensions: {:?}", excludes); } else { debug!("Checking files regardless of extensions"); } let entries = match scan_directory(&args.dir, extensions.as_ref(), excludes.as_ref(), &args.get_scan_opts()) { // no need to log anything for fatal errors - fif will already have printed something obvious like // "[ERROR] /fake/path: No such file or directory (os error 2)". we can assume that if this has happened, the dir // given as input doesn't exist or is otherwise unreadable. None => exit(exitcode::NOINPUT), Some(e) => e, }; if entries.is_empty() { warn!("No files matching requested options found."); exit(exitcode::OK); } trace!("Found {} items to check", entries.len()); let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths) .into_iter() .filter( |result| result.is_err() || !result.as_ref().unwrap().valid, // TODO: find a way to trace! the valid files without doing ↓ // || if result.as_ref().unwrap().valid { trace!("{:?} ok", result.as_ref().unwrap().file); false } else { true } ) .collect(); trace!("Scanning complete"); for result in &results { match result { Ok(r) => { debug!( "{:?} is of type {}, should have extension \"{}\"", r.file, r.mime, r.recommended_extension().unwrap_or_else(|| "???".into()) ); } Err(f) => warn!("{}", f), } } if results.is_empty() { info!("All files have valid extensions!"); exit(exitcode::OK); } let mut buffered_stdout = BufWriter::new(stdout()); let result = match args.output_format { OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &results), OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &results), #[cfg(feature = "json")] OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &results), OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &results), }; if result.is_err() { error!("Failed to write to stdout."); exit(exitcode::IOERR); } if buffered_stdout.flush().is_err() { error!("Failed to flush stdout."); exit(exitcode::IOERR); } debug!("Done"); } cfg_if! { if #[cfg(windows)] { /// Determines whether or not a file is hidden by checking its win32 file attributes. fn is_hidden(entry: &DirEntry) -> bool { use std::os::windows::prelude::*; std::fs::metadata(entry.path()) // try to get metadata for file .map_or( false, // if getting metadata/attributes fails, assume it's not hidden |f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants ) } } else { /// Determines whether or not a file is hidden by checking for a leading full stop. fn is_hidden(entry: &DirEntry) -> bool { entry .file_name() .to_str() .map_or(false, |f| f.starts_with('.') && f != ".") } } } /// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in /// `exts` (if specified), potentially skipping over hidden files, and so on. fn wanted_file( entry: &DirEntry, exts: Option<&BTreeSet<&str>>, exclude: Option<&BTreeSet<&str>>, scan_opts: &ScanOpts, ) -> bool { if entry.depth() == 0 { // the root directory should always be scanned. return true; } if !scan_opts.hidden && is_hidden(entry) { // skip hidden files and directories. this check is performed first because it's very lightweight. return false; } if entry.file_type().is_dir() { // always allow directories - there's no point doing file extension matching on something that isn't a file. return true; } if let Some(ext) = extension_from_path(entry.path()) { // file has extension - discard invalid UTF-8 and normalise it to lowercase. let ext = ext.to_string_lossy().to_lowercase(); let ext = ext.as_str(); if let Some(exts) = exts { // only scan if the file has one of the specified extensions. exts.contains(&ext) } else { // no extensions specified - the file should be scanned unless its extension is on the exclude list. exclude.map_or(true, |exclude| !exclude.contains(&ext)) } } else { // no file extension scan_opts.extensionless } } /// Given a file path, returns its extension, using [`std::path::Path::extension`]. fn extension_from_path(path: &Path) -> Option<&OsStr> { path.extension() } /// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure. /// /// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a /// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be /// determined. fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result { let path = entry.path(); // try to determine mimetype for this entry let result = match inspectors::mime_type(MIMEDB.get().unwrap(), path) { // an error occurred while trying to read the file Err(_) => return Err(ScanError::File(path)), // the file was read successfully, but we were unable to determine its mimetype Ok(None) => return Err(ScanError::Mime(path)), // a mimetype was found! Ok(Some(result)) => result, }; // set of known extensions for the given mimetype let known_exts = inspectors::mime_extension_lookup(result.essence_str().into()); // file extension for this particular file let entry_ext = extension_from_path(path); let valid = match known_exts { // there is a known set of extensions for this mimetype, and the file has an extension Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()), // either this file has no extension, or there is no known set of extensions for this mimetype :( Some(_) | None => false, }; let path = if canonical_paths { match std::fs::canonicalize(path) { Ok(path) => path, Err(_) => return Err(ScanError::File(entry.path())), } } else { path.to_path_buf() // :c }; Ok(Findings { file: path, valid, mime: result, }) } /// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector. fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec> { cfg_if! { if #[cfg(feature = "multi-threaded")] { use rayon::prelude::*; // split the entries into chunks of 32, and iterate over each chunk of entries in a separate thread entries .par_chunks(32) .flat_map(|chunk| { chunk .iter() // iter over the chunk, which is a slice of DirEntry structs .map(|entry| scan_file(entry, canonical_paths)) .collect::>() }) .collect() } else { entries.iter().map(|entry: &DirEntry| scan_file(entry, canonical_paths)).collect() } } } /// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of /// [DirEntry]s. fn scan_directory( dirs: &Path, exts: Option<&BTreeSet<&str>>, exclude: Option<&BTreeSet<&str>>, scan_opts: &ScanOpts, ) -> Option> { let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter(); let mut probably_fatal_error = false; let entries: Vec = stepper .filter_entry(|e| wanted_file(e, exts, exclude, scan_opts)) // filter out unwanted files .filter_map(|e| { if let Err(err) = &e { debug!("uh oh spaghettio!! {:#?}", e); // log errors to stdout, and remove them from the iterator let path = err.path().map_or("General error".into(), Path::to_string_lossy); if err.depth() == 0 { // if something goes wrong while trying to read the root directory, we're probably not going to get much done probably_fatal_error = true; } // TODO: is there a way to just say `map_or(x, |y| y).thing()` instead of `map_or(x.thing(), |y| y.thing())`? // i don't care whether i'm returning a walkdir error or an io error, i just care about whether or not it // implements ToString (which they both do). map_or doesn't work on trait objects though :( error!( "{}: {}", path, err.io_error().map_or(err.to_string(), |e| e.to_string()) ); return None; } e.ok() }) // remove directories from the final list .filter(|e| !e.file_type().is_dir()) // if fif is invoked without `-f` on a symlinked directory, it will recurse into the symlink (as desired) and ignore // any symlinks inside the symlinked root directory. however, the root directory will still be added to `entries` as // if it were a file to be scanned, and `scan_file` will fail to scan it, adding "Failed to read ~/whatever" to the // output. to avoid this, we can remove all symlinks from `entries` if `-f` is not set. i know this is kind of // confusing, but it's honestly kind of hard to explain... maybe a screenshot is better: // https://i.imgur.com/DYG7jlB.png // adding the symlink filter removes the line that's being pointed to in the image. 0u0 .filter(|e| scan_opts.follow_symlinks || !e.file_type().is_symlink()) .collect(); if probably_fatal_error { None } else { Some(entries) } } /// Initialises [`MIMEDB`] with a value dependent on the current backend. fn init_db() { cfg_if! { if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] { MIMEDB .set(mime_db::InferDb::init()) .or(Err("Failed to initialise Infer backend!")) .unwrap(); } else { MIMEDB .set(mime_db::XdgDb::init()) .or(Err("Failed to initialise XDG Mime backend!")) .unwrap(); } } }