Compare commits

..

No commits in common. "741048839cf8688d2a8694fda40876118b7a3647" and "da998d019a6119967234c1d3f102728d6d0c2d47" have entirely different histories.

15 changed files with 216 additions and 180 deletions

View file

@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## Unreleased ## Unreleased
### Added ### Added
- AIFF (Audio Interchange File Format, a PCM audio format like WAV) detection to [`infer`] backend - AIFF (Audio Interchange File Format, a PCM audio format like WAV) detection to [`infer`]
- `--version` output now includes the (short) hash of the git commit fif was built from - `--version` output now includes the (short) hash of the git commit fif was built from
### Other ### Other
- Refactoring - split fif into `main.rs` and `lib.rs`, moved file-related functionality (directory scanning, etc.) into - Refactoring - split fif into `main.rs` and `lib.rs`, moved file-related functionality (directory scanning, etc.) into
@ -16,8 +16,6 @@ files module, removed string module, etc.
"Features" heading into "Added" and "Changed" sections, renaming "Bugfixes" to "Fixed", and removing the "Features" heading into "Added" and "Changed" sections, renaming "Bugfixes" to "Fixed", and removing the
headings that (pointlessly?) previously divided the changelog into v0.3, v0.2, and v0.1 headings that (pointlessly?) previously divided the changelog into v0.3, v0.2, and v0.1
- A few minor grammar tweaks and reorganisations - A few minor grammar tweaks and reorganisations
- Replaced [`cached`] dependency with a simple HashMap-backed store
- Replace all occurrences of [`once_cell`]'s `OnceCell` with equivalent `Lazy`-based implementations
## v0.3.6 - 2021-08-16 ## v0.3.6 - 2021-08-16
### Other ### Other
@ -246,12 +244,10 @@ Initial commit!
- Only supported flags are `-e` (specify extensions) and `-s` (scan hidden files) - Only supported flags are `-e` (specify extensions) and `-s` (scan hidden files)
<!-- links --> <!-- links -->
[`cached`]: https://crates.io/crates/cached [`xdg-mime`]: https://crates.io/crates/xdg-mime
[`structopt`]: https://crates.io/crates/structopt
[`clap`]: https://crates.io/crates/clap [`clap`]: https://crates.io/crates/clap
[`infer`]: https://crates.io/crates/infer [`infer`]: https://crates.io/crates/infer
[`mime_guess`]: https://crates.io/crates/mime_guess [`mime_guess`]: https://crates.io/crates/mime_guess
[`new_mime_guess`]: https://crates.io/crates/new_mime_guess [`new_mime_guess`]: https://crates.io/crates/new_mime_guess
[`once_cell`]: https://crates.io/crates/once_cell
[`snailquote`]: https://crates.io/crates/snailquote [`snailquote`]: https://crates.io/crates/snailquote
[`structopt`]: https://crates.io/crates/structopt
[`xdg-mime`]: https://crates.io/crates/xdg-mime

21
Cargo.lock generated
View file

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3
[[package]] [[package]]
name = "arrayvec" name = "arrayvec"
version = "0.5.2" version = "0.5.2"
@ -35,6 +37,16 @@ version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cached"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b99e696f7b2696ed5eae0d462a9eeafaea111d99e39b2c8ceb418afe1013bcfc"
dependencies = [
"hashbrown 0.9.1",
"once_cell",
]
[[package]] [[package]]
name = "cfb" name = "cfb"
version = "0.4.0" version = "0.4.0"
@ -176,6 +188,7 @@ name = "fif"
version = "0.3.6" version = "0.3.6"
dependencies = [ dependencies = [
"bitflags", "bitflags",
"cached",
"cfg-if", "cfg-if",
"clap", "clap",
"clap_derive", "clap_derive",
@ -215,6 +228,12 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "hashbrown"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
[[package]] [[package]]
name = "hashbrown" name = "hashbrown"
version = "0.11.2" version = "0.11.2"
@ -246,7 +265,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"hashbrown", "hashbrown 0.11.2",
] ]
[[package]] [[package]]

View file

@ -5,12 +5,14 @@ version = "0.3.6"
authors = ["Lynnesbian <lynne@bune.city>"] authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018" edition = "2018"
license = "GPL-3.0-or-later" license = "GPL-3.0-or-later"
rust-version = "1.43.0" # this can actually go as low as 1.41.0 after removing cached, but i'll leave it 1.43.0 rust-version = "1.43.0" # cached breaks on 1.42.0, i think it needs https://github.com/rust-lang/rust/pull/67642/
repository = "https://gitlab.com/Lynnesbian/fif" repository = "https://gitlab.com/Lynnesbian/fif"
readme = "README.md" readme = "README.md"
keywords = ["mime", "mimetype", "utilities", "tools"] keywords = ["mime", "mimetype", "utilities", "tools"]
categories = ["command-line-utilities"] categories = ["command-line-utilities"]
exclude = [".idea/", "*.toml", "!Cargo.toml", "*.sh", "*.py", "*.yml", "*.md", ".mailmap", "pkg/"] exclude = [".idea/", "*.toml", "!Cargo.toml", "*.sh", "*.py", "*.yml", "*.md", ".mailmap", "pkg/"]
#resolver = "2"
#license-file = "LICENSE"
[badges] [badges]
maintenance = { status = "experimental" } maintenance = { status = "experimental" }
@ -73,6 +75,10 @@ version = "0.9.0"
default-features = false default-features = false
features = ["termcolor", "atty"] features = ["termcolor", "atty"]
[dependencies.cached]
version = "0.25.0"
default-features = false
[dev-dependencies] [dev-dependencies]
tempfile = "3.2.0" tempfile = "3.2.0"
rand = "0.8.3" rand = "0.8.3"

View file

@ -57,10 +57,10 @@ cargo install --locked fif
To update, simply re-run the `install` command, or use a tool like [cargo-update To update, simply re-run the `install` command, or use a tool like [cargo-update
](https://github.com/nabijaczleweli/cargo-update), which can update crates installed via `cargo install`. ](https://github.com/nabijaczleweli/cargo-update), which can update crates installed via `cargo install`.
On macOS, `fif` can be installed through [MacPorts](https://www.macports.org): On macOS, `fif` can now be installed using [MacPorts](https://www.macports.org), via the following steps:
```bash ```bash
sudo port selfupdate $ sudo port selfupdate
sudo port install fif $ sudo port install fif
``` ```
### Cargo Features ### Cargo Features

Binary file not shown.

Before

Width:  |  Height:  |  Size: 61 KiB

After

Width:  |  Height:  |  Size: 12 KiB

View file

@ -3,4 +3,3 @@ fn_single_line = true
hard_tabs = true hard_tabs = true
tab_spaces = 2 tab_spaces = 2
newline_style = "Unix" newline_style = "Unix"
group_imports = "StdExternalCrate"

View file

@ -1,26 +1,21 @@
//! File handling - scanning, detecting MIME types, and so on.
use std::collections::{BTreeSet, HashMap};
use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom};
use std::path::Path;
use std::str::FromStr;
use std::sync::RwLock;
use cfg_if::cfg_if;
use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
use once_cell::sync::Lazy;
use walkdir::{DirEntry, WalkDir};
use crate::findings::{Findings, ScanError}; use crate::findings::{Findings, ScanError};
use crate::mime_db::MimeDb; use crate::mime_db::MimeDb;
use crate::parameters::ScanOpts; use crate::parameters::ScanOpts;
use crate::{String, MIMEDB}; use crate::{String, MIMEDB};
/// Cache of mimetypes and their associated extensions, used by [`mime_extension_lookup()`] use std::collections::BTreeSet;
static MIMEXT: Lazy<RwLock<HashMap<String, Option<Vec<String>>>>> = Lazy::new(|| RwLock::new(HashMap::new())); use std::fs::File;
use std::io;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
use std::str::FromStr;
use cached::cached;
use cfg_if::cfg_if;
use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
use walkdir::{DirEntry, WalkDir};
cfg_if! { cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {
@ -98,7 +93,7 @@ pub fn wanted_file(
pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> { pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
let path = entry.path(); let path = entry.path();
// try to determine mimetype for this entry // try to determine mimetype for this entry
let result = match mime_type(&*MIMEDB, path) { let result = match mime_type(MIMEDB.get().unwrap(), path) {
// an error occurred while trying to read the file // an error occurred while trying to read the file
Err(_) => return Err(ScanError::File(path)), Err(_) => return Err(ScanError::File(path)),
// the file was read successfully, but we were unable to determine its mimetype // the file was read successfully, but we were unable to determine its mimetype
@ -260,85 +255,73 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
Ok(db.get_type(&buffer)) Ok(db.get_type(&buffer))
} }
// Returns a list of known extensions for this mime type, if any. cached! {
// This function uses the [Mime]'s "essence" rather than the [Mime] itself - mime_guess::get_mime_extensions ignores MIMEXT;
// the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the fn mime_extension_lookup(essence: String) -> Option<Vec<String>> = {
// essence_str (which includes the suffix) fixes this. // Returns a list of known extensions for this mime type, if any.
pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> { // This function uses the [Mime]'s "essence" rather than the [Mime] itself - mime_guess::get_mime_extensions ignores
if let Ok(cache) = MIMEXT.read() { // the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
if let Some(exts) = cache.get(&essence) { // essence_str (which includes the suffix) fixes this.
return exts.clone(); // ↑ this is supposed to be a doc comment, but the cached! macro doesn't support that... i would switch to the
} // proc_macro version of cached, but it has a huge number of deps :c
}
let essence = essence; let essence = essence.as_str();
let mut exts = mime_guess::get_mime_extensions_str(essence.as_str()); let mut exts = mime_guess::get_mime_extensions_str(essence);
if exts.is_none() { if exts.is_none() {
// no matches :c // no matches :c
// mime_guess' database isn't exactly perfect... there are a lot of times where the db will return "some/x-thing" // mime_guess' database isn't exactly perfect... there are a lot of times where the db will return "some/x-thing"
// but mime_guess only understands "some/thing", or vice-versa. // but mime_guess only understands "some/thing", or vice-versa.
// so, if there appear to be no extensions, try replacing "some/x-thing" with "some/thing", or "some/thing" with // so, if there appear to be no extensions, try replacing "some/x-thing" with "some/thing", or "some/thing" with
// "some/x-thing". // "some/x-thing".
if essence.contains("/x-") { if essence.contains("/x-") {
// replace e.g. "application/x-gzip" with "application/gzip" // replace e.g. "application/x-gzip" with "application/gzip"
exts = mime_guess::get_mime_extensions_str(&essence.replace("/x-", "/")); exts = mime_guess::get_mime_extensions_str(&essence.replace("/x-", "/"));
} else {
// replace e.g. "video/mp2t" with "video/x-mp2t"
exts = mime_guess::get_mime_extensions_str(&essence.replace("/", "/x-"));
}
}
let exts = match exts {
Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
Some(if essence == mime::IMAGE_JPEG.essence_str() {
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
// *are* valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read
// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"
// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered
// to have valid extensions.
// TODO: if a file is detected as application/xml, but it has an extension like "xht" which corresponds to
// "application/xhtml+xml", let it through - in other words, if it's identified as application/xml, but its
// extension is classed as application/*+xml, consider it OK
[vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if essence == "application/msword" {
// classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly...
[
vec![
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
],
possible_exts,
]
.concat()
} else if essence == "application/x-ms-dos-executable" {
// both .dll and .exe files are given the same mime type... but you definitely don't want to rename one to the
// other!
[vec![String::from("dll"), String::from("exe")], possible_exts].concat()
} else { } else {
possible_exts // replace e.g. "video/mp2t" with "video/x-mp2t"
}) exts = mime_guess::get_mime_extensions_str(&essence.replace("/", "/x-"));
}
} }
None => None,
};
if let Ok(mut cache) = MIMEXT.write() { match exts {
cache.insert(essence, exts.clone()); Some(exts) => {
exts let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
} else {
unreachable!() Some(if essence == mime::IMAGE_JPEG.essence_str() {
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
// *are* valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read
// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"
// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered
// to have valid extensions.
// TODO: if a file is detected as application/xml, but it has an extension like "xht" which corresponds to
// "application/xhtml+xml", let it through - in other words, if it's identified as application/xml, but its
// extension is classed as application/*+xml, consider it OK
[vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if essence == "application/msword" {
// classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly...
[vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], possible_exts].concat()
} else if essence == "application/x-ms-dos-executable" {
// both .dll and .exe files are given the same mime type... but you definitely don't want to rename one to the
// other!
[vec![String::from("dll"), String::from("exe")], possible_exts].concat()
} else {
possible_exts
})
},
None => None
}
} }
} }

View file

@ -1,16 +1,14 @@
//! The [`Findings`] and [`ScanError`] structs, used for conveying whether a given file was able to be scanned and
//! whether its MIME type could be inferred.
use std::fmt::{Display, Formatter};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use mime::Mime; use mime::Mime;
#[cfg(feature = "json")]
use serde::{ser::SerializeStruct, Serializer};
use crate::files::mime_extension_lookup; use crate::files::mime_extension_lookup;
use crate::String; use crate::String;
#[cfg(feature = "json")]
use serde::{ser::SerializeStruct, Serializer};
use std::fmt::{Display, Formatter};
/// Information about a scanned file. /// Information about a scanned file.
#[derive(Ord, PartialOrd, Eq, PartialEq)] #[derive(Ord, PartialOrd, Eq, PartialEq)]
pub struct Findings { pub struct Findings {

View file

@ -11,7 +11,7 @@ use itertools::{Either, Itertools};
use snailquote::escape; use snailquote::escape;
use crate::findings::ScanError; use crate::findings::ScanError;
use crate::utils::CLAP_LONG_VERSION; use crate::utils::clap_long_version;
use crate::Findings; use crate::Findings;
use crate::String; use crate::String;
@ -74,7 +74,7 @@ impl<'a> From<&'a OsStr> for Writable<'a> {
fn from(p: &'a OsStr) -> Writable<'a> { Writable::Path(p.as_ref()) } fn from(p: &'a OsStr) -> Writable<'a> { Writable::Path(p.as_ref()) }
} }
fn generated_by() -> String { format!("Generated by fif {}", CLAP_LONG_VERSION.as_str()).into() } fn generated_by() -> String { format!("Generated by fif {}", clap_long_version()).into() }
pub fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> { pub fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
// ehhhh // ehhhh

View file

@ -1,8 +1,5 @@
#![forbid(unsafe_code)] #![forbid(unsafe_code)]
#![warn(trivial_casts, unused_lifetimes, unused_qualifications)] #![warn(trivial_casts, unused_lifetimes, unused_qualifications)]
//! This library consists of all of the things fif needs to run. It only exists as a library to separate code, and to
//! make testing a bit easier. I don't recommend using this as a library for your crate, as it may have breaking
//! changes without incrementing the major version, as it's really only meant to be a place for fif's internals to live.
pub mod files; pub mod files;
pub mod findings; pub mod findings;
@ -11,12 +8,12 @@ pub mod mime_db;
pub mod parameters; pub mod parameters;
pub mod utils; pub mod utils;
use cfg_if::cfg_if;
use once_cell::sync::Lazy;
use crate::findings::Findings; use crate::findings::Findings;
use crate::mime_db::MimeDb; use crate::mime_db::MimeDb;
use cfg_if::cfg_if;
use once_cell::sync::OnceCell;
cfg_if! { cfg_if! {
if #[cfg(not(all(target_endian = "big", target_pointer_width = "32")))] { if #[cfg(not(all(target_endian = "big", target_pointer_width = "32")))] {
// most architectures // most architectures
@ -29,10 +26,27 @@ cfg_if! {
cfg_if! { cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] { if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
/// A [Lazy] holding an instance of [mime_db::MimeDb]. /// A [OnceCell] holding an instance of [mime_db::MimeDb].
pub static MIMEDB: Lazy<mime_db::InferDb> = Lazy::new(crate::mime_db::InferDb::init); pub static MIMEDB: OnceCell<mime_db::InferDb> = OnceCell::new();
} else { } else {
/// A [Lazy] holding an instance of [mime_db::MimeDb]. /// A [OnceCell] holding an instance of [mime_db::MimeDb].
pub static MIMEDB: Lazy<mime_db::XdgDb> = Lazy::new(crate::mime_db::XdgDb::init); pub static MIMEDB: OnceCell<mime_db::XdgDb> = OnceCell::new();
}
}
/// Initialises [`MIMEDB`] with a value dependent on the current backend.
pub fn init_db() {
cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
MIMEDB
.set(crate::mime_db::InferDb::init())
.or(Err("Failed to initialise Infer backend!"))
.unwrap();
} else {
MIMEDB
.set(crate::mime_db::XdgDb::init())
.or(Err("Failed to initialise XDG Mime backend!"))
.unwrap();
}
} }
} }

View file

@ -21,12 +21,13 @@ use std::io::{stdout, BufWriter, Write};
use std::process::exit; use std::process::exit;
use clap::Clap; use clap::Clap;
use log::{debug, error, info, trace, warn, Level};
use fif::files::{scan_directory, scan_from_walkdir}; use fif::files::{scan_directory, scan_from_walkdir};
use fif::formats::Format; use fif::formats::Format;
use fif::parameters::OutputFormat; use fif::parameters::OutputFormat;
use fif::utils::{os_name, CLAP_LONG_VERSION}; use fif::utils::{clap_long_version, os_name};
use fif::{formats, parameters}; use fif::{formats, init_db, parameters};
use log::{debug, error, info, trace, warn, Level};
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
@ -54,10 +55,12 @@ fn main() {
trace!( trace!(
"fif {}, running on {} {}", "fif {}, running on {} {}",
CLAP_LONG_VERSION.as_str(), clap_long_version(),
std::env::consts::ARCH, std::env::consts::ARCH,
os_name() os_name()
); );
trace!("Initialising mimetype database");
init_db();
debug!("Iterating directory: {:?}", args.dir); debug!("Iterating directory: {:?}", args.dir);

View file

@ -4,9 +4,7 @@ use cfg_if::cfg_if;
use mime::Mime; use mime::Mime;
pub trait MimeDb { pub trait MimeDb {
/// Initialise the database.
fn init() -> Self; fn init() -> Self;
/// Given a slice of bytes, returns the inferred mimetype, if any.
fn get_type(&self, data: &[u8]) -> Option<Mime>; fn get_type(&self, data: &[u8]) -> Option<Mime>;
} }

View file

@ -1,13 +1,11 @@
//! [Clap] struct used to parse command line arguments. //! [Clap] struct used to parse command line arguments.
use std::collections::BTreeSet; use crate::utils::{clap_long_version, clap_version};
use std::path::PathBuf; use crate::String as StringType;
use cfg_if::cfg_if; use cfg_if::cfg_if;
use clap::{AppSettings, Clap}; use clap::{AppSettings, Clap};
use std::collections::BTreeSet;
use crate::utils::{CLAP_LONG_VERSION, CLAP_VERSION}; use std::path::PathBuf;
use crate::String as StringType;
cfg_if! { cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {
@ -35,12 +33,12 @@ pub enum OutputFormat {
#[derive(Clap, Debug)] #[derive(Clap, Debug)]
#[allow(clippy::struct_excessive_bools)] #[allow(clippy::struct_excessive_bools)]
#[clap( #[clap(
version = CLAP_VERSION.as_str(), version = clap_version(),
long_version = CLAP_LONG_VERSION.as_str(), long_version = clap_long_version(),
author = option_env!("CARGO_PKG_AUTHORS").unwrap_or("Lynnesbian"), author = option_env!("CARGO_PKG_AUTHORS").unwrap_or("Lynnesbian"),
about = option_env!("CARGO_PKG_DESCRIPTION").unwrap_or("File Info Fixer"), about = option_env!("CARGO_PKG_DESCRIPTION").unwrap_or("File Info Fixer"),
before_help = "Copyright © 2021 Lynnesbian under the GPL3 (or later) License.", before_help = "Copyright © 2021 Lynnesbian under the GPL3 (or later) License.",
after_long_help = "Copyright © 2021 Lynnesbian\n\ before_long_help = "Copyright © 2021 Lynnesbian\n\
This program is free software: you can redistribute it and/or modify \ This program is free software: you can redistribute it and/or modify \
it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 \ it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 \
of the License, or (at your option) any later version.", of the License, or (at your option) any later version.",
@ -258,7 +256,7 @@ pub enum ExtensionSet {
#[clap(alias = "videos")] #[clap(alias = "videos")]
Video, Video,
/// Extensions used for media file formats. This acts as a combination of the [Images](ExtensionSet::Images), /// Extensions used for media file formats. This acts as a combination of the [Images](ExtensionSet::Images),
/// [Audio](ExtensionSet::Audio) and [Video](ExtensionSet::Video) variants. /// [Audio](ExtensionSet::Audio) and [Videos](ExtensionSet::Videos) variants.
Media, Media,
/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc. /// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
Documents, Documents,

View file

@ -1,23 +1,36 @@
use std::collections::HashMap; use fif::files::{mime_extension_lookup, BUF_SIZE};
use std::ffi::OsStr; use fif::files::{scan_directory, scan_from_walkdir};
use std::path::{Path, PathBuf};
use clap::Clap;
use fif::files::{mime_extension_lookup, scan_directory, scan_from_walkdir, BUF_SIZE};
use fif::findings::Findings; use fif::findings::Findings;
use fif::formats::{Format, PowerShell, Shell}; use fif::formats::{Format, PowerShell, Shell};
use fif::mime_db::MimeDb; use fif::mime_db::MimeDb;
use fif::{String, MIMEDB}; use fif::String;
use crate::parameters::Parameters;
use clap::Clap;
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use crate::parameters::ExtensionSet; use crate::parameters::ExtensionSet;
use crate::parameters::Parameters; use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"; const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
const PDF_BYTES: &[u8] = b"%PDF-"; const PDF_BYTES: &[u8] = b"%PDF-";
const ZIP_BYTES: &[u8] = b"PK\x03\x04"; const ZIP_BYTES: &[u8] = b"PK\x03\x04";
cfg_if::cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
fn get_mime_db() -> fif::mime_db::InferDb {
fif::mime_db::InferDb::init()
}
} else {
fn get_mime_db() -> fif::mime_db::XdgDb {
fif::mime_db::XdgDb::init()
}
}
}
fn application_zip() -> Mime { fn application_zip() -> Mime {
use std::str::FromStr; use std::str::FromStr;
Mime::from_str("application/zip").unwrap() Mime::from_str("application/zip").unwrap()
@ -42,10 +55,11 @@ fn get_ext() {
#[test] #[test]
/// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers. /// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
fn detect_type() { fn detect_type() {
assert_eq!(MIMEDB.get_type(JPEG_BYTES), Some(IMAGE_JPEG)); let db = get_mime_db();
assert_eq!(MIMEDB.get_type(PNG_BYTES), Some(IMAGE_PNG)); assert_eq!(db.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
assert_eq!(MIMEDB.get_type(PDF_BYTES), Some(APPLICATION_PDF)); assert_eq!(db.get_type(PNG_BYTES), Some(IMAGE_PNG));
assert_eq!(MIMEDB.get_type(ZIP_BYTES), Some(application_zip())); assert_eq!(db.get_type(PDF_BYTES), Some(APPLICATION_PDF));
assert_eq!(db.get_type(ZIP_BYTES), Some(application_zip()));
} }
#[test] #[test]
@ -70,15 +84,13 @@ fn recommend_ext() {
/// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their /// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their
/// associated mime types correctly deduced. /// associated mime types correctly deduced.
fn simple_directory() { fn simple_directory() {
use crate::parameters::ScanOpts;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::env::set_current_dir; use std::env::set_current_dir;
use std::fs::{canonicalize, File}; use std::fs::{canonicalize, File};
use std::io::Write; use std::io::Write;
use tempfile::tempdir; use tempfile::tempdir;
use crate::parameters::ScanOpts;
// set of files to scan. all but the last files have magic numbers corresponding to their extension, except for // set of files to scan. all but the last files have magic numbers corresponding to their extension, except for
// "wrong.jpg", which is actually a png. // "wrong.jpg", which is actually a png.
let mut files = HashMap::new(); let mut files = HashMap::new();
@ -114,6 +126,9 @@ fn simple_directory() {
// there should be one file missing: "ignore.fake_ext" // there should be one file missing: "ignore.fake_ext"
assert_eq!(entries.len(), files.len() - 1); assert_eq!(entries.len(), files.len() - 1);
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
crate::init_db();
let results = scan_from_walkdir(&entries, false); let results = scan_from_walkdir(&entries, false);
let canonical_results = scan_from_walkdir(&entries, true); let canonical_results = scan_from_walkdir(&entries, true);
assert_eq!(results.len(), canonical_results.len()); assert_eq!(results.len(), canonical_results.len());
@ -303,13 +318,14 @@ fn rejects_bad_args() {
/// mime database somehow panics or hangs. /// mime database somehow panics or hangs.
fn identify_random_bytes() { fn identify_random_bytes() {
use rand::RngCore; use rand::RngCore;
let db = get_mime_db();
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2]; let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2];
let mut results: HashMap<Mime, i32> = HashMap::new(); let mut results: HashMap<Mime, i32> = HashMap::new();
for _ in 1..1000 { for _ in 1..1000 {
rng.fill_bytes(&mut bytes); rng.fill_bytes(&mut bytes);
if let Some(detected_type) = MIMEDB.get_type(&bytes) { if let Some(detected_type) = db.get_type(&bytes) {
*results.entry(detected_type).or_insert(0) += 1; *results.entry(detected_type).or_insert(0) += 1;
} }
} }
@ -363,9 +379,8 @@ fn outputs_move_commands() {
#[cfg(feature = "json")] #[cfg(feature = "json")]
/// Ensure JSON output is valid. /// Ensure JSON output is valid.
fn test_json() { fn test_json() {
use std::io::Read;
use crate::formats::Json; use crate::formats::Json;
use std::io::Read;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings { let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png").to_path_buf(), file: Path::new("misnamed_file.png").to_path_buf(),
@ -451,9 +466,8 @@ fn verbosity() {
#[test] #[test]
/// Ensures that smart strings don't deviate from std's Strings /// Ensures that smart strings don't deviate from std's Strings
fn validate_string_type() { fn validate_string_type() {
use std::string::String as StdString;
use fif::String as SmartString; use fif::String as SmartString;
use std::string::String as StdString;
assert_eq!(SmartString::new(), StdString::new()); assert_eq!(SmartString::new(), StdString::new());
assert_eq!(SmartString::from("smol"), StdString::from("smol")); assert_eq!(SmartString::from("smol"), StdString::from("smol"));
assert_eq!( assert_eq!(

View file

@ -1,7 +1,5 @@
//! Various minor utilities.
use cfg_if::cfg_if; use cfg_if::cfg_if;
use once_cell::sync::Lazy; use once_cell::sync::OnceCell;
use crate::String; use crate::String;
@ -18,20 +16,30 @@ cfg_if! {
} }
} }
/// The version defined in Cargo.toml, prefixed with a v (e.g. "v0.3.1") // the version and long_version given to clap need to be a &str, but we want to use format!, which returns a String.
pub(crate) static CLAP_VERSION: Lazy<String> = Lazy::new(|| String::from("v") + VERSION.unwrap_or("???")); // we can't just do something like `version = format!(...).as_str()`, because clap needs to know that the version will
// live for a given lifetime, which we need to satisfy by making our String static. of course, you can't use format!
// statically, so we need to use a OnceCell or similar to get around this.
static CLAP_VERSION: OnceCell<String> = OnceCell::new();
static CLAP_LONG_VERSION: OnceCell<String> = OnceCell::new();
/// Similar to [`CLAP_VERSION`], followed by the chosen backend and abbreviated git commit hash in parentheses - For /// Sets [`CLAP_VERSION`] to be the version defined in Cargo.toml, prefixed with a v (e.g. "v0.3.1"), then returns it as
/// example, "v0.3.6 (XDG-Mime backend, commit #043e097)" /// an str.
pub static CLAP_LONG_VERSION: Lazy<String> = Lazy::new(|| { pub fn clap_version() -> &'static str { CLAP_VERSION.get_or_init(|| String::from("v") + VERSION.unwrap_or("???")) }
format!(
"v{} ({} backend, commit #{})", /// Sets [`CLAP_LONG_VERSION`] to be similar to [`CLAP_VERSION`], followed by the chosen backend and abbreviated git
VERSION.unwrap_or("???"), /// commit hash in parentheses (e.g. "v0.3.6 (XDG-Mime backend, commit #043e097)"), then returns it as an str.
BACKEND, pub fn clap_long_version() -> &'static str {
option_env!("GIT_SHA").unwrap_or("???") CLAP_LONG_VERSION.get_or_init(|| {
) format!(
.into() "v{} ({} backend, commit #{})",
}); VERSION.unwrap_or("???"),
BACKEND,
option_env!("GIT_SHA").unwrap_or("???")
)
.into()
})
}
/// Returns the name of the target operating system with proper casing, like "Windows" or "macOS". /// Returns the name of the target operating system with proper casing, like "Windows" or "macOS".
#[allow(clippy::option_map_unit_fn)] #[allow(clippy::option_map_unit_fn)]