// fif - File Info Fixer // Copyright (C) 2021 Lynnesbian // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . mod parameters; mod inspectors; use std::path::{Path, PathBuf}; use walkdir::{WalkDir, DirEntry}; use mime_guess::Mime; use smartstring::alias::String; use clap::Clap; use log::{debug, trace, info, warn, error}; use rayon::prelude::*; struct Findings { file: PathBuf, valid: bool, mime: Mime, } impl Findings { fn recommended_extension(&self) -> Option { inspectors::mime_extension_lookup(self.mime.clone()) .map(|extensions| extensions[0].to_owned()) } } // TODO: test if this actually works on a windows machine #[cfg(windows)] fn is_hidden(entry: &DirEntry) -> bool { use std::os::windows::prelude::*; use std::fs; fs::metadata(entry) // try to get metadata for file .map_or( false, // if getting metadata/attributes fails, assume it's not hidden |f| f.file_attributes() & 0x2 // flag for hidden - https://docs.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants ) } #[cfg(not(windows))] fn is_hidden(entry: &DirEntry) -> bool { entry.file_name().to_str().map_or(false, |f| f.starts_with('.') && f != ".") } fn wanted_file(args: ¶meters::Parameters, entry: &DirEntry) -> bool { if !args.scan_hidden && is_hidden(entry) { // skip hidden files and directories. this check is performed first because it's very lightweight. return false; } if entry.file_type().is_dir() { // always allow directories - there's no point doing file extension matching on something that isn't a file. return true; } let ext = extension_from_path(entry.path()); if ext.is_none() { return false } // don't scan files without extensions. TODO - this should be configurable if let Some(extensions) = &args.extensions { // if the user has specified a list of extensions to check against, make sure this file ends in one of them. return extensions.contains(&ext.unwrap().to_lowercase().into()) } true } fn extension_from_path(path: &Path) -> Option { path.extension(). // Get the path's extension map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String } fn main() { let args = parameters::Parameters::parse(); let mut builder = env_logger::Builder::from_default_env(); builder // .format(|buf, r| writeln!(buf, "{} - {}", r.level(), r.args())) .format_module_path(false) .format_timestamp(None) .init(); let db = xdg_mime::SharedMimeInfo::new(); debug!("Iterating directory: {:?}", args.dirs); let stepper = WalkDir::new(&args.dirs).into_iter(); let entries: Vec = stepper .filter_entry(|e| wanted_file(&args, e)) // filter out unwanted files .filter_map(|e| e.ok()) // ignore anything that fails, e.g. files we don't have read access on .filter(|e| !e.file_type().is_dir()) // remove directories from the final list .collect(); info!("Found {} items to check", entries.len()); let results: Vec> = entries .par_iter() .map(|entry: &DirEntry | { // try to determine mimetype for this entry let result = inspectors::mime_type(&db, entry.path()); if let Err(error) = result { // an error occurred while trying to read the file error!("{}: {}", entry.path().to_string_lossy(), error); return Err(entry.path().to_path_buf()); } let result = result.unwrap(); if result.is_none() { // the file was read successfully, but we were unable to determine its mimetype warn!("Couldn't determine mimetype for {}", entry.path().to_string_lossy()); return Err(entry.path().to_path_buf()); } let result = result.unwrap(); // set of known extensions for the given mimetype let known_exts = inspectors::mime_extension_lookup(result.clone()); // file extension for this particular file let entry_ext = extension_from_path(entry.path()); let valid = match known_exts { // there is a known set of extensions for this mimetype, and the file has an extension Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_lowercase().into()), // there is a known set of extensions for this mimetype, but the file has no extension Some(_) => false, // there is no known set of extensions for this mimetype -- assume it's correct None => true }; Ok(Findings { file: entry.path().to_path_buf(), valid, // make this a function mime: result, }) }) .collect(); for result in results { match result { Ok(r) => trace!("{:#?}: {:#?} - {:?} - {:?}", r.file, r.mime, r.valid, r.recommended_extension()), Err(f) => warn!("{:#?}: Error 0uo", f) } } debug!("Done"); }