
206 lines
6.6 KiB
Raw Normal View History

// fif - File Info Fixer
// Copyright (C) 2021 Lynnesbian
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <>.
2021-02-04 11:22:19 +00:00
mod parameters;
2021-02-05 09:15:12 +00:00
mod inspectors;
mod formats;
2021-02-04 11:22:19 +00:00
use std::path::{Path, PathBuf};
2021-02-04 11:22:19 +00:00
use walkdir::{WalkDir, DirEntry};
2021-02-05 13:34:02 +00:00
use mime_guess::Mime;
2021-02-04 11:22:19 +00:00
use smartstring::alias::String;
2021-02-05 09:24:08 +00:00
use clap::Clap;
use log::{debug, trace, info, warn};
use rayon::prelude::*;
2021-02-06 03:24:13 +00:00
use std::fmt::{self, Display};
use xdg_mime::SharedMimeInfo;
struct Findings {
file: PathBuf,
valid: bool,
mime: Mime,
impl Findings {
fn recommended_extension(&self) -> Option<String> {
.map(|extensions| extensions[0].to_owned())
2021-02-04 11:22:19 +00:00
2021-02-06 03:24:13 +00:00
enum ScanError {
impl Display for ScanError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}",
match self {
Self::File => "Couldn't read file",
Self::Mime => "Couldn't determine mime type"
2021-02-04 11:22:19 +00:00
// TODO: test if this actually works on a windows machine
fn is_hidden(entry: &DirEntry) -> bool {
use std::os::windows::prelude::*;
2021-02-06 11:53:44 +00:00
std::fs::metadata(entry) // try to get metadata for file
2021-02-04 11:22:19 +00:00
false, // if getting metadata/attributes fails, assume it's not hidden
2021-02-06 11:53:44 +00:00
|f| f.file_attributes() & 0x2 > 0 // flag for hidden -
2021-02-04 11:22:19 +00:00
fn is_hidden(entry: &DirEntry) -> bool {
entry.file_name().to_str().map_or(false, |f| f.starts_with('.') && f != ".")
2021-02-04 11:22:19 +00:00
2021-02-05 09:15:12 +00:00
fn wanted_file(args: &parameters::Parameters, entry: &DirEntry) -> bool {
if !args.scan_hidden && is_hidden(entry) {
// skip hidden files and directories. this check is performed first because it's very lightweight.
return false;
2021-02-04 11:22:19 +00:00
if entry.file_type().is_dir() {
// always allow directories - there's no point doing file extension matching on something that isn't a file.
2021-02-04 11:22:19 +00:00
return true;
let ext = extension_from_path(entry.path());
2021-02-04 11:22:19 +00:00
if ext.is_none() { return false } // don't scan files without extensions. TODO - this should be configurable
2021-02-04 11:22:19 +00:00
if let Some(extensions) = &args.extensions {
// if the user has specified a list of extensions to check against, make sure this file ends in one of them.
return extensions.contains(&ext.unwrap().to_lowercase().into())
2021-02-04 11:22:19 +00:00
2021-02-04 11:22:19 +00:00
fn extension_from_path(path: &Path) -> Option<String> {
path.extension(). // Get the path's extension
map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String
fn scan_file(db: &SharedMimeInfo, entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> {
// try to determine mimetype for this entry
let result = inspectors::mime_type(&db, entry.path());
if let Err(_) = result {
// an error occurred while trying to read the file
// error!("{}: {}", entry.path().to_string_lossy(), error);
return Err((ScanError::File, entry.path().to_path_buf()));
let result = result.unwrap();
if result.is_none() {
// the file was read successfully, but we were unable to determine its mimetype
// warn!("Couldn't determine mimetype for {}", entry.path().to_string_lossy());
return Err((ScanError::Mime, entry.path().to_path_buf()));
let result = result.unwrap();
// set of known extensions for the given mimetype
let known_exts = inspectors::mime_extension_lookup(result.clone());
// file extension for this particular file
let entry_ext = extension_from_path(entry.path());
let valid = match known_exts {
// there is a known set of extensions for this mimetype, and the file has an extension
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_lowercase().into()),
// there is a known set of extensions for this mimetype, but the file has no extension
Some(_) => false,
// there is no known set of extensions for this mimetype -- assume it's correct
None => true
Ok(Findings {
file: entry.path().to_path_buf(),
valid, // make this a function
mime: result,
fn scan_from_walkdir(db: &SharedMimeInfo, entries: Vec<DirEntry>) -> Vec<Result<Findings, (ScanError, PathBuf)>> {
#[cfg(feature = "multi-threaded")] {
// rather than using a standard par_iter, split the entries into chunks of 16 first.
// this allows each spawned thread to handle 16 files before before closing, rather than creating a new thread for
// each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0
.par_chunks(16) // split into chunks of 16
.flat_map(|chunk| chunk // return Vec<...> instead of Chunk<Vec<...>>
.iter() // iter over the chunk, which is a slice of DirEntry structs
.map(|entry| scan_file(db, entry))
#[cfg(not(feature = "multi-threaded"))] {
.map(|entry: &DirEntry | scan_file(db, entry))
2021-02-04 11:22:19 +00:00
fn main() {
2021-02-05 09:24:08 +00:00
let args = parameters::Parameters::parse();
2021-02-05 13:34:02 +00:00
let mut builder = env_logger::Builder::from_default_env();
// .format(|buf, r| writeln!(buf, "{} - {}", r.level(), r.args()))
2021-02-06 03:24:13 +00:00
.format_module_path(false) // don't include module in logs, as it's not necessary
.format_timestamp(None) // don't include timestamps (unnecessary, and the feature flag is disabled anyway)
.target(env_logger::Target::Stdout) // log to stdout rather than stderr
2021-02-05 13:34:02 +00:00
2021-02-04 11:22:19 +00:00
let db = xdg_mime::SharedMimeInfo::new();
2021-02-05 13:34:02 +00:00
debug!("Iterating directory: {:?}", args.dirs);
2021-02-04 11:22:19 +00:00
let stepper = WalkDir::new(&args.dirs).into_iter();
2021-02-05 09:15:12 +00:00
let entries: Vec<DirEntry> = stepper
.filter_entry(|e| wanted_file(&args, e)) // filter out unwanted files
.filter_map(|e| e.ok()) // ignore anything that fails, e.g. files we don't have read access on
.filter(|e| !e.file_type().is_dir()) // remove directories from the final list
2021-02-05 09:15:12 +00:00
2021-02-05 13:49:36 +00:00
trace!("Found {} items to check", entries.len());
let results = scan_from_walkdir(&db, entries);
for result in results {
match result {
2021-02-05 13:49:36 +00:00
Ok(r) => {
if !r.valid {
info!("{:?} should have file extension {}", r.file, r.recommended_extension().unwrap())
} else {
trace!("{:?} is totally fine", r.file)
2021-02-06 03:24:13 +00:00
Err(f) => warn!("{:#?}: Error 0uo - {}", f.1, f.0)
2021-02-04 11:22:19 +00:00