Compare commits

...

9 Commits

Author SHA1 Message Date
Lynne Megido 741048839c
replace remaining OnceCells w/ Lazys, simplifying
i prefer Lazy because it simplifies initialisation by a lot, allowing for the removal of stuff like `init_db()` and the `clap_version()` functions
2021-09-25 00:54:52 +10:00
Lynne Megido 0c31277191
new screenshot, minor readme changes 2021-09-25 00:24:01 +10:00
Lynne Megido 6718f879f7
use Lazy instead of OnceCell for MIMEDB 2021-09-24 23:57:01 +10:00
Lynne Megido c2e6738f6b
use Lazy instead of OnceCell for MIMEDB 2021-09-24 23:53:02 +10:00
Lynne Megido b368be3e6b
reorder links in CHANGELOG.md 2021-09-24 22:53:33 +10:00
Lynne Megido 3e753c2a0b
replace cached dep with custom hashmap thing
i benchmarked it with hyperfine and in terms of performance it's pretty much identical, with a slight (fraction of a percent) advantage to my implementation
2021-09-24 22:08:28 +10:00
Lynne Megido 97b0a6edaa
replace cached dep with custom hashmap thing
i benchmarked it with hyperfine and in terms of performance it's pretty much identical, with a slight (fraction of a percent) advantage to my implementation
2021-09-24 22:05:12 +10:00
Lynne Megido 3d41183f1c
move license to the end of `--help` output 2021-09-24 18:12:27 +10:00
Lynne Megido 8a7cbca461
cargo fmt, rearranging use statements
by setting `group_imports = "StdExternalCrate"` in `rustfmt.toml`, cargo fmt/rustfmt rearranges your use statements into three groups:
- std/core/alloc,
- followed by external crates,
- followed by self/super/crate
2021-09-24 18:11:25 +10:00
15 changed files with 186 additions and 222 deletions

View File

@ -6,7 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## Unreleased
### Added
- AIFF (Audio Interchange File Format, a PCM audio format like WAV) detection to [`infer`]
- AIFF (Audio Interchange File Format, a PCM audio format like WAV) detection to [`infer`] backend
- `--version` output now includes the (short) hash of the git commit fif was built from
### Other
- Refactoring - split fif into `main.rs` and `lib.rs`, moved file-related functionality (directory scanning, etc.) into
@ -16,6 +16,8 @@ files module, removed string module, etc.
"Features" heading into "Added" and "Changed" sections, renaming "Bugfixes" to "Fixed", and removing the
headings that (pointlessly?) previously divided the changelog into v0.3, v0.2, and v0.1
- A few minor grammar tweaks and reorganisations
- Replaced [`cached`] dependency with a simple HashMap-backed store
- Replace all occurrences of [`once_cell`]'s `OnceCell` with equivalent `Lazy`-based implementations
## v0.3.6 - 2021-08-16
### Other
@ -244,10 +246,12 @@ Initial commit!
- Only supported flags are `-e` (specify extensions) and `-s` (scan hidden files)
<!-- links -->
[`xdg-mime`]: https://crates.io/crates/xdg-mime
[`structopt`]: https://crates.io/crates/structopt
[`cached`]: https://crates.io/crates/cached
[`clap`]: https://crates.io/crates/clap
[`infer`]: https://crates.io/crates/infer
[`mime_guess`]: https://crates.io/crates/mime_guess
[`new_mime_guess`]: https://crates.io/crates/new_mime_guess
[`once_cell`]: https://crates.io/crates/once_cell
[`snailquote`]: https://crates.io/crates/snailquote
[`structopt`]: https://crates.io/crates/structopt
[`xdg-mime`]: https://crates.io/crates/xdg-mime

21
Cargo.lock generated
View File

@ -1,7 +1,5 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "arrayvec"
version = "0.5.2"
@ -37,16 +35,6 @@ version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"
[[package]]
name = "cached"
version = "0.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b99e696f7b2696ed5eae0d462a9eeafaea111d99e39b2c8ceb418afe1013bcfc"
dependencies = [
"hashbrown 0.9.1",
"once_cell",
]
[[package]]
name = "cfb"
version = "0.4.0"
@ -188,7 +176,6 @@ name = "fif"
version = "0.3.6"
dependencies = [
"bitflags",
"cached",
"cfg-if",
"clap",
"clap_derive",
@ -228,12 +215,6 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "hashbrown"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
[[package]]
name = "hashbrown"
version = "0.11.2"
@ -265,7 +246,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5"
dependencies = [
"autocfg",
"hashbrown 0.11.2",
"hashbrown",
]
[[package]]

View File

@ -5,14 +5,12 @@ version = "0.3.6"
authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018"
license = "GPL-3.0-or-later"
rust-version = "1.43.0" # cached breaks on 1.42.0, i think it needs https://github.com/rust-lang/rust/pull/67642/
rust-version = "1.43.0" # this can actually go as low as 1.41.0 after removing cached, but i'll leave it 1.43.0
repository = "https://gitlab.com/Lynnesbian/fif"
readme = "README.md"
keywords = ["mime", "mimetype", "utilities", "tools"]
categories = ["command-line-utilities"]
exclude = [".idea/", "*.toml", "!Cargo.toml", "*.sh", "*.py", "*.yml", "*.md", ".mailmap", "pkg/"]
#resolver = "2"
#license-file = "LICENSE"
[badges]
maintenance = { status = "experimental" }
@ -75,10 +73,6 @@ version = "0.9.0"
default-features = false
features = ["termcolor", "atty"]
[dependencies.cached]
version = "0.25.0"
default-features = false
[dev-dependencies]
tempfile = "3.2.0"
rand = "0.8.3"
@ -94,4 +88,4 @@ opt-level = 3
opt-level = 3
[package.metadata]
msrv = "1.43.0"
msrv = "1.43.0"

View File

@ -57,10 +57,10 @@ cargo install --locked fif
To update, simply re-run the `install` command, or use a tool like [cargo-update
](https://github.com/nabijaczleweli/cargo-update), which can update crates installed via `cargo install`.
On macOS, `fif` can now be installed using [MacPorts](https://www.macports.org), via the following steps:
On macOS, `fif` can be installed through [MacPorts](https://www.macports.org):
```bash
$ sudo port selfupdate
$ sudo port install fif
sudo port selfupdate
sudo port install fif
```
### Cargo Features

Binary file not shown.

Before

Width:  |  Height:  |  Size: 12 KiB

After

Width:  |  Height:  |  Size: 61 KiB

View File

@ -2,4 +2,5 @@ max_width = 120
fn_single_line = true
hard_tabs = true
tab_spaces = 2
newline_style = "Unix"
newline_style = "Unix"
group_imports = "StdExternalCrate"

View File

@ -1,21 +1,26 @@
//! File handling - scanning, detecting MIME types, and so on.
use std::collections::{BTreeSet, HashMap};
use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom};
use std::path::Path;
use std::str::FromStr;
use std::sync::RwLock;
use cfg_if::cfg_if;
use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
use once_cell::sync::Lazy;
use walkdir::{DirEntry, WalkDir};
use crate::findings::{Findings, ScanError};
use crate::mime_db::MimeDb;
use crate::parameters::ScanOpts;
use crate::{String, MIMEDB};
use std::collections::BTreeSet;
use std::fs::File;
use std::io;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
use std::str::FromStr;
use cached::cached;
use cfg_if::cfg_if;
use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
use walkdir::{DirEntry, WalkDir};
/// Cache of mimetypes and their associated extensions, used by [`mime_extension_lookup()`]
static MIMEXT: Lazy<RwLock<HashMap<String, Option<Vec<String>>>>> = Lazy::new(|| RwLock::new(HashMap::new()));
cfg_if! {
if #[cfg(windows)] {
@ -93,7 +98,7 @@ pub fn wanted_file(
pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
let path = entry.path();
// try to determine mimetype for this entry
let result = match mime_type(MIMEDB.get().unwrap(), path) {
let result = match mime_type(&*MIMEDB, path) {
// an error occurred while trying to read the file
Err(_) => return Err(ScanError::File(path)),
// the file was read successfully, but we were unable to determine its mimetype
@ -255,73 +260,85 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
Ok(db.get_type(&buffer))
}
cached! {
MIMEXT;
fn mime_extension_lookup(essence: String) -> Option<Vec<String>> = {
// Returns a list of known extensions for this mime type, if any.
// This function uses the [Mime]'s "essence" rather than the [Mime] itself - mime_guess::get_mime_extensions ignores
// the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
// essence_str (which includes the suffix) fixes this.
// ↑ this is supposed to be a doc comment, but the cached! macro doesn't support that... i would switch to the
// proc_macro version of cached, but it has a huge number of deps :c
let essence = essence.as_str();
let mut exts = mime_guess::get_mime_extensions_str(essence);
if exts.is_none() {
// no matches :c
// mime_guess' database isn't exactly perfect... there are a lot of times where the db will return "some/x-thing"
// but mime_guess only understands "some/thing", or vice-versa.
// so, if there appear to be no extensions, try replacing "some/x-thing" with "some/thing", or "some/thing" with
// "some/x-thing".
if essence.contains("/x-") {
// replace e.g. "application/x-gzip" with "application/gzip"
exts = mime_guess::get_mime_extensions_str(&essence.replace("/x-", "/"));
} else {
// replace e.g. "video/mp2t" with "video/x-mp2t"
exts = mime_guess::get_mime_extensions_str(&essence.replace("/", "/x-"));
}
}
match exts {
Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
Some(if essence == mime::IMAGE_JPEG.essence_str() {
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
// *are* valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read
// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"
// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered
// to have valid extensions.
// TODO: if a file is detected as application/xml, but it has an extension like "xht" which corresponds to
// "application/xhtml+xml", let it through - in other words, if it's identified as application/xml, but its
// extension is classed as application/*+xml, consider it OK
[vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if essence == "application/msword" {
// classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly...
[vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], possible_exts].concat()
} else if essence == "application/x-ms-dos-executable" {
// both .dll and .exe files are given the same mime type... but you definitely don't want to rename one to the
// other!
[vec![String::from("dll"), String::from("exe")], possible_exts].concat()
} else {
possible_exts
})
},
None => None
// Returns a list of known extensions for this mime type, if any.
// This function uses the [Mime]'s "essence" rather than the [Mime] itself - mime_guess::get_mime_extensions ignores
// the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
// essence_str (which includes the suffix) fixes this.
pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
if let Ok(cache) = MIMEXT.read() {
if let Some(exts) = cache.get(&essence) {
return exts.clone();
}
}
let essence = essence;
let mut exts = mime_guess::get_mime_extensions_str(essence.as_str());
if exts.is_none() {
// no matches :c
// mime_guess' database isn't exactly perfect... there are a lot of times where the db will return "some/x-thing"
// but mime_guess only understands "some/thing", or vice-versa.
// so, if there appear to be no extensions, try replacing "some/x-thing" with "some/thing", or "some/thing" with
// "some/x-thing".
if essence.contains("/x-") {
// replace e.g. "application/x-gzip" with "application/gzip"
exts = mime_guess::get_mime_extensions_str(&essence.replace("/x-", "/"));
} else {
// replace e.g. "video/mp2t" with "video/x-mp2t"
exts = mime_guess::get_mime_extensions_str(&essence.replace("/", "/x-"));
}
}
let exts = match exts {
Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
Some(if essence == mime::IMAGE_JPEG.essence_str() {
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
// *are* valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read
// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"
// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered
// to have valid extensions.
// TODO: if a file is detected as application/xml, but it has an extension like "xht" which corresponds to
// "application/xhtml+xml", let it through - in other words, if it's identified as application/xml, but its
// extension is classed as application/*+xml, consider it OK
[vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if essence == "application/msword" {
// classic office files considered harmful
vec![String::from("doc"), String::from("xls"), String::from("ppt")]
} else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly...
[
vec![
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
],
possible_exts,
]
.concat()
} else if essence == "application/x-ms-dos-executable" {
// both .dll and .exe files are given the same mime type... but you definitely don't want to rename one to the
// other!
[vec![String::from("dll"), String::from("exe")], possible_exts].concat()
} else {
possible_exts
})
}
None => None,
};
if let Ok(mut cache) = MIMEXT.write() {
cache.insert(essence, exts.clone());
exts
} else {
unreachable!()
}
}

View File

@ -1,14 +1,16 @@
//! The [`Findings`] and [`ScanError`] structs, used for conveying whether a given file was able to be scanned and
//! whether its MIME type could be inferred.
use std::fmt::{Display, Formatter};
use std::path::{Path, PathBuf};
use mime::Mime;
#[cfg(feature = "json")]
use serde::{ser::SerializeStruct, Serializer};
use crate::files::mime_extension_lookup;
use crate::String;
#[cfg(feature = "json")]
use serde::{ser::SerializeStruct, Serializer};
use std::fmt::{Display, Formatter};
/// Information about a scanned file.
#[derive(Ord, PartialOrd, Eq, PartialEq)]
pub struct Findings {

View File

@ -11,7 +11,7 @@ use itertools::{Either, Itertools};
use snailquote::escape;
use crate::findings::ScanError;
use crate::utils::clap_long_version;
use crate::utils::CLAP_LONG_VERSION;
use crate::Findings;
use crate::String;
@ -74,7 +74,7 @@ impl<'a> From<&'a OsStr> for Writable<'a> {
fn from(p: &'a OsStr) -> Writable<'a> { Writable::Path(p.as_ref()) }
}
fn generated_by() -> String { format!("Generated by fif {}", clap_long_version()).into() }
fn generated_by() -> String { format!("Generated by fif {}", CLAP_LONG_VERSION.as_str()).into() }
pub fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
// ehhhh

View File

@ -1,5 +1,8 @@
#![forbid(unsafe_code)]
#![warn(trivial_casts, unused_lifetimes, unused_qualifications)]
//! This library consists of all of the things fif needs to run. It only exists as a library to separate code, and to
//! make testing a bit easier. I don't recommend using this as a library for your crate, as it may have breaking
//! changes without incrementing the major version, as it's really only meant to be a place for fif's internals to live.
pub mod files;
pub mod findings;
@ -8,12 +11,12 @@ pub mod mime_db;
pub mod parameters;
pub mod utils;
use cfg_if::cfg_if;
use once_cell::sync::Lazy;
use crate::findings::Findings;
use crate::mime_db::MimeDb;
use cfg_if::cfg_if;
use once_cell::sync::OnceCell;
cfg_if! {
if #[cfg(not(all(target_endian = "big", target_pointer_width = "32")))] {
// most architectures
@ -26,27 +29,10 @@ cfg_if! {
cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
/// A [OnceCell] holding an instance of [mime_db::MimeDb].
pub static MIMEDB: OnceCell<mime_db::InferDb> = OnceCell::new();
/// A [Lazy] holding an instance of [mime_db::MimeDb].
pub static MIMEDB: Lazy<mime_db::InferDb> = Lazy::new(crate::mime_db::InferDb::init);
} else {
/// A [OnceCell] holding an instance of [mime_db::MimeDb].
pub static MIMEDB: OnceCell<mime_db::XdgDb> = OnceCell::new();
}
}
/// Initialises [`MIMEDB`] with a value dependent on the current backend.
pub fn init_db() {
cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
MIMEDB
.set(crate::mime_db::InferDb::init())
.or(Err("Failed to initialise Infer backend!"))
.unwrap();
} else {
MIMEDB
.set(crate::mime_db::XdgDb::init())
.or(Err("Failed to initialise XDG Mime backend!"))
.unwrap();
}
/// A [Lazy] holding an instance of [mime_db::MimeDb].
pub static MIMEDB: Lazy<mime_db::XdgDb> = Lazy::new(crate::mime_db::XdgDb::init);
}
}

View File

@ -21,13 +21,12 @@ use std::io::{stdout, BufWriter, Write};
use std::process::exit;
use clap::Clap;
use log::{debug, error, info, trace, warn, Level};
use fif::files::{scan_directory, scan_from_walkdir};
use fif::formats::Format;
use fif::parameters::OutputFormat;
use fif::utils::{clap_long_version, os_name};
use fif::{formats, init_db, parameters};
use fif::utils::{os_name, CLAP_LONG_VERSION};
use fif::{formats, parameters};
use log::{debug, error, info, trace, warn, Level};
#[cfg(test)]
mod tests;
@ -55,12 +54,10 @@ fn main() {
trace!(
"fif {}, running on {} {}",
clap_long_version(),
CLAP_LONG_VERSION.as_str(),
std::env::consts::ARCH,
os_name()
);
trace!("Initialising mimetype database");
init_db();
debug!("Iterating directory: {:?}", args.dir);

View File

@ -4,7 +4,9 @@ use cfg_if::cfg_if;
use mime::Mime;
pub trait MimeDb {
/// Initialise the database.
fn init() -> Self;
/// Given a slice of bytes, returns the inferred mimetype, if any.
fn get_type(&self, data: &[u8]) -> Option<Mime>;
}

View File

@ -1,12 +1,14 @@
//! [Clap] struct used to parse command line arguments.
use crate::utils::{clap_long_version, clap_version};
use crate::String as StringType;
use cfg_if::cfg_if;
use clap::{AppSettings, Clap};
use std::collections::BTreeSet;
use std::path::PathBuf;
use cfg_if::cfg_if;
use clap::{AppSettings, Clap};
use crate::utils::{CLAP_LONG_VERSION, CLAP_VERSION};
use crate::String as StringType;
cfg_if! {
if #[cfg(windows)] {
const DEFAULT_FORMAT: &str = "powershell";
@ -33,12 +35,12 @@ pub enum OutputFormat {
#[derive(Clap, Debug)]
#[allow(clippy::struct_excessive_bools)]
#[clap(
version = clap_version(),
long_version = clap_long_version(),
version = CLAP_VERSION.as_str(),
long_version = CLAP_LONG_VERSION.as_str(),
author = option_env!("CARGO_PKG_AUTHORS").unwrap_or("Lynnesbian"),
about = option_env!("CARGO_PKG_DESCRIPTION").unwrap_or("File Info Fixer"),
before_help = "Copyright © 2021 Lynnesbian under the GPL3 (or later) License.",
before_long_help = "Copyright © 2021 Lynnesbian\n\
after_long_help = "Copyright © 2021 Lynnesbian\n\
This program is free software: you can redistribute it and/or modify \
it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 \
of the License, or (at your option) any later version.",
@ -256,7 +258,7 @@ pub enum ExtensionSet {
#[clap(alias = "videos")]
Video,
/// Extensions used for media file formats. This acts as a combination of the [Images](ExtensionSet::Images),
/// [Audio](ExtensionSet::Audio) and [Videos](ExtensionSet::Videos) variants.
/// [Audio](ExtensionSet::Audio) and [Video](ExtensionSet::Video) variants.
Media,
/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
Documents,

View File

@ -1,36 +1,23 @@
use fif::files::{mime_extension_lookup, BUF_SIZE};
use fif::files::{scan_directory, scan_from_walkdir};
use fif::findings::Findings;
use fif::formats::{Format, PowerShell, Shell};
use fif::mime_db::MimeDb;
use fif::String;
use crate::parameters::Parameters;
use clap::Clap;
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use crate::parameters::ExtensionSet;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use clap::Clap;
use fif::files::{mime_extension_lookup, scan_directory, scan_from_walkdir, BUF_SIZE};
use fif::findings::Findings;
use fif::formats::{Format, PowerShell, Shell};
use fif::mime_db::MimeDb;
use fif::{String, MIMEDB};
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use crate::parameters::ExtensionSet;
use crate::parameters::Parameters;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
const PDF_BYTES: &[u8] = b"%PDF-";
const ZIP_BYTES: &[u8] = b"PK\x03\x04";
cfg_if::cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
fn get_mime_db() -> fif::mime_db::InferDb {
fif::mime_db::InferDb::init()
}
} else {
fn get_mime_db() -> fif::mime_db::XdgDb {
fif::mime_db::XdgDb::init()
}
}
}
fn application_zip() -> Mime {
use std::str::FromStr;
Mime::from_str("application/zip").unwrap()
@ -55,11 +42,10 @@ fn get_ext() {
#[test]
/// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
fn detect_type() {
let db = get_mime_db();
assert_eq!(db.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
assert_eq!(db.get_type(PNG_BYTES), Some(IMAGE_PNG));
assert_eq!(db.get_type(PDF_BYTES), Some(APPLICATION_PDF));
assert_eq!(db.get_type(ZIP_BYTES), Some(application_zip()));
assert_eq!(MIMEDB.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
assert_eq!(MIMEDB.get_type(PNG_BYTES), Some(IMAGE_PNG));
assert_eq!(MIMEDB.get_type(PDF_BYTES), Some(APPLICATION_PDF));
assert_eq!(MIMEDB.get_type(ZIP_BYTES), Some(application_zip()));
}
#[test]
@ -84,13 +70,15 @@ fn recommend_ext() {
/// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their
/// associated mime types correctly deduced.
fn simple_directory() {
use crate::parameters::ScanOpts;
use std::borrow::Borrow;
use std::env::set_current_dir;
use std::fs::{canonicalize, File};
use std::io::Write;
use tempfile::tempdir;
use crate::parameters::ScanOpts;
// set of files to scan. all but the last files have magic numbers corresponding to their extension, except for
// "wrong.jpg", which is actually a png.
let mut files = HashMap::new();
@ -126,9 +114,6 @@ fn simple_directory() {
// there should be one file missing: "ignore.fake_ext"
assert_eq!(entries.len(), files.len() - 1);
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
crate::init_db();
let results = scan_from_walkdir(&entries, false);
let canonical_results = scan_from_walkdir(&entries, true);
assert_eq!(results.len(), canonical_results.len());
@ -318,14 +303,13 @@ fn rejects_bad_args() {
/// mime database somehow panics or hangs.
fn identify_random_bytes() {
use rand::RngCore;
let db = get_mime_db();
let mut rng = rand::thread_rng();
let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2];
let mut results: HashMap<Mime, i32> = HashMap::new();
for _ in 1..1000 {
rng.fill_bytes(&mut bytes);
if let Some(detected_type) = db.get_type(&bytes) {
if let Some(detected_type) = MIMEDB.get_type(&bytes) {
*results.entry(detected_type).or_insert(0) += 1;
}
}
@ -379,8 +363,9 @@ fn outputs_move_commands() {
#[cfg(feature = "json")]
/// Ensure JSON output is valid.
fn test_json() {
use crate::formats::Json;
use std::io::Read;
use crate::formats::Json;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png").to_path_buf(),
@ -466,8 +451,9 @@ fn verbosity() {
#[test]
/// Ensures that smart strings don't deviate from std's Strings
fn validate_string_type() {
use fif::String as SmartString;
use std::string::String as StdString;
use fif::String as SmartString;
assert_eq!(SmartString::new(), StdString::new());
assert_eq!(SmartString::from("smol"), StdString::from("smol"));
assert_eq!(

View File

@ -1,5 +1,7 @@
//! Various minor utilities.
use cfg_if::cfg_if;
use once_cell::sync::OnceCell;
use once_cell::sync::Lazy;
use crate::String;
@ -16,30 +18,20 @@ cfg_if! {
}
}
// the version and long_version given to clap need to be a &str, but we want to use format!, which returns a String.
// we can't just do something like `version = format!(...).as_str()`, because clap needs to know that the version will
// live for a given lifetime, which we need to satisfy by making our String static. of course, you can't use format!
// statically, so we need to use a OnceCell or similar to get around this.
static CLAP_VERSION: OnceCell<String> = OnceCell::new();
static CLAP_LONG_VERSION: OnceCell<String> = OnceCell::new();
/// The version defined in Cargo.toml, prefixed with a v (e.g. "v0.3.1")
pub(crate) static CLAP_VERSION: Lazy<String> = Lazy::new(|| String::from("v") + VERSION.unwrap_or("???"));
/// Sets [`CLAP_VERSION`] to be the version defined in Cargo.toml, prefixed with a v (e.g. "v0.3.1"), then returns it as
/// an str.
pub fn clap_version() -> &'static str { CLAP_VERSION.get_or_init(|| String::from("v") + VERSION.unwrap_or("???")) }
/// Sets [`CLAP_LONG_VERSION`] to be similar to [`CLAP_VERSION`], followed by the chosen backend and abbreviated git
/// commit hash in parentheses (e.g. "v0.3.6 (XDG-Mime backend, commit #043e097)"), then returns it as an str.
pub fn clap_long_version() -> &'static str {
CLAP_LONG_VERSION.get_or_init(|| {
format!(
"v{} ({} backend, commit #{})",
VERSION.unwrap_or("???"),
BACKEND,
option_env!("GIT_SHA").unwrap_or("???")
)
.into()
})
}
/// Similar to [`CLAP_VERSION`], followed by the chosen backend and abbreviated git commit hash in parentheses - For
/// example, "v0.3.6 (XDG-Mime backend, commit #043e097)"
pub static CLAP_LONG_VERSION: Lazy<String> = Lazy::new(|| {
format!(
"v{} ({} backend, commit #{})",
VERSION.unwrap_or("???"),
BACKEND,
option_env!("GIT_SHA").unwrap_or("???")
)
.into()
});
/// Returns the name of the target operating system with proper casing, like "Windows" or "macOS".
#[allow(clippy::option_map_unit_fn)]