Compare commits

...

10 Commits

Author SHA1 Message Date
Lynne Megido b6c2d75bdb
clippy: replace deref() with &*, remove into_iter 2021-11-25 08:45:20 +10:00
Lynne Megido 65a560d5f4
update new_mime_guess 2021-11-25 08:40:57 +10:00
Lynne Megido 313afe7cc1
test improvements
- fix tests on big endian 32 bit archs
- condense the recommend_ext test
- also test writablesln!
- general engoodening
2021-11-25 06:40:34 +10:00
Lynne Megido 33f4eb4135
replace occurrences of "mimetype" w/ "MIME type" 2021-11-25 06:29:27 +10:00
Lynne Megido ec10b58482
reorganise files.rs, use lazy Mimes in mime_type 2021-11-25 06:26:57 +10:00
Lynne Megido 667ee440e0
use parking_lot's rwlock, cargo update 2021-11-23 08:38:43 +10:00
Lynne Megido 44e14fbfba
updated formatting options 2021-11-06 02:43:08 +10:00
Lynne Megido 28aa3ad783
updated formatting options 2021-11-06 01:30:34 +10:00
Lynne Megido 855211f458
clean up windows hidden file code 2021-11-06 01:11:09 +10:00
Lynne Megido 5df8545906
cargo update, ignore sonarlint 2021-10-28 21:53:28 +10:00
15 changed files with 222 additions and 160 deletions

1
.gitignore vendored
View File

@ -19,3 +19,4 @@ cargo-timing*.html
/fif.exe
.idea/workspace.xml
*.sync-conflict*
.idea/sonarlint

View File

@ -4,6 +4,31 @@
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/target" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/bitflags-23acb0a9d3874edf/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/crossbeam-epoch-bbe747f70e6c82f8/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/crossbeam-utils-9309f4d80e34d8b7/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/fif-a01af1abc82b3550/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/indexmap-f70d5e1948c750ec/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/lexical-core-3a59a0b45542312b/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/libc-21832a2a55a1017a/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/log-21f459ebfa013554/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/memchr-a5ad9a9ced7a185e/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/memoffset-1f9c809cb66f47ad/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/new_mime_guess-5a19c37b0debe30e/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/nom-f20626721c72cba3/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/proc-macro-error-97f2fc25d9b7fc89/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/proc-macro-error-attr-db3651ef459829ce/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/proc-macro2-8803fbc7990cf7aa/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/rayon-137998806e417f6b/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/rayon-core-d3b7d50c097a4664/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/ryu-84c31e8e8ee65998/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/serde-8fd409d40cb77b1e/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/serde_derive-ed28d1546b1c3ad0/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/serde_json-5bd3c27125506bb8/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/syn-311c4ed26db329d6/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/unicase-1bc56b5db838db2b/out" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/parking_lot_core-f8d2e72482af1f8b/out" isTestSource="false" />
<excludeFolder url="file://$MODULE_DIR$/target" />
<excludeFolder url="file://$MODULE_DIR$/imgs" />
<excludeFolder url="file://$MODULE_DIR$/old" />

View File

@ -4,6 +4,14 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Changed
- Updated [`new_mime_guess`] to 4.0.0
### Fixed
- Disabled [`smartstring`] test on unsupported architectures
### Other
- Use [`parking_lot`]'s `RwLock` instead of the built-in one for a slight performance increase
## v0.4.0 - 2021-10-14
### Added
- `--fix` mode - instead of outputting a shell script or text file, fif will rename the misnamed files for you!
@ -128,7 +136,7 @@ this version of `clap`, which caused the build to fail. Also, `clap` 3 beta 4 de
- System extension set (`.dll`, `.so`, `.exe`...)
- [`infer`] backend now supports Ren'Py archive (`.rpa`) files
### Changed
- Output is now sorted: Files that couldn't be read, then files with no known mimetype, then files with no known
- Output is now sorted: Files that couldn't be read, then files with no known MIME type, then files with no known
extensions, then files with the wrong extension
- Added Apple iWork document formats to Documents extension set (`.pages`, `.key`, `.numbers`)
### Fixed
@ -160,7 +168,7 @@ this version of `clap`, which caused the build to fail. Also, `clap` 3 beta 4 de
### Added
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
### Changed
- Better mime type detection:
- Better MIME type detection:
- Consider "some/x-thing" and "some/thing" to be identical
- Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension/type mappings
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension
@ -278,6 +286,8 @@ Initial commit!
[`mime_guess`]: https://crates.io/crates/mime_guess
[`new_mime_guess`]: https://crates.io/crates/new_mime_guess
[`once_cell`]: https://crates.io/crates/once_cell
[`parking_lot`]: https://crates.io/crates/parking_lot
[`smartstring]: https://crates.io/crates/smartstring
[`snailquote`]: https://crates.io/crates/snailquote
[`structopt`]: https://crates.io/crates/structopt
[`xdg-mime`]: https://crates.io/crates/xdg-mime

79
Cargo.lock generated
View File

@ -190,6 +190,7 @@ dependencies = [
"new_mime_guess",
"num_cpus",
"once_cell",
"parking_lot",
"rand",
"rayon",
"serde",
@ -261,6 +262,15 @@ dependencies = [
"cfb",
]
[[package]]
name = "instant"
version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
dependencies = [
"cfg-if",
]
[[package]]
name = "itertools"
version = "0.10.1"
@ -297,9 +307,18 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.103"
version = "0.2.108"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6"
checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
[[package]]
name = "lock_api"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109"
dependencies = [
"scopeguard",
]
[[package]]
name = "log"
@ -339,11 +358,10 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
[[package]]
name = "new_mime_guess"
version = "3.0.1"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b101054f57b10576d116bc0da7ce8b05362b4490b0d21c7748395eaaf9359047"
checksum = "bb3253391c5e7939360b2f7f27c7e7821dbc1a0bdab65884dde484fd71132764"
dependencies = [
"bitflags",
"mime",
"phf",
"phf_codegen",
@ -383,6 +401,31 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85"
[[package]]
name = "parking_lot"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
dependencies = [
"instant",
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
dependencies = [
"cfg-if",
"instant",
"libc",
"redox_syscall",
"smallvec",
"winapi",
]
[[package]]
name = "phf"
version = "0.10.0"
@ -424,9 +467,9 @@ dependencies = [
[[package]]
name = "ppv-lite86"
version = "0.2.10"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "proc-macro-error"
@ -454,9 +497,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.30"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edc3358ebc67bc8b7fa0c007f945b0b18226f78437d61bec735a9eb96b61ee70"
checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43"
dependencies = [
"unicode-xid",
]
@ -606,9 +649,9 @@ dependencies = [
[[package]]
name = "serde_json"
version = "1.0.68"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8"
checksum = "063bf466a64011ac24040a49009724ee60a57da1b437617ceb32e53ad61bfb19"
dependencies = [
"itoa",
"ryu",
@ -621,6 +664,12 @@ version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b"
[[package]]
name = "smallvec"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
[[package]]
name = "smartstring"
version = "0.2.9"
@ -632,9 +681,9 @@ dependencies = [
[[package]]
name = "snailquote"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f34b729d802f52194598858ac852c3fb3b33f6e026cd03195072ccb7bf3fc810"
checksum = "ec62a949bda7f15800481a711909f946e1204f2460f89210eaf7f57730f88f86"
dependencies = [
"thiserror",
"unicode_categories",
@ -648,9 +697,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.80"
version = "1.0.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d010a1623fbd906d51d650a9916aaefc05ffa0e4053ff7fe601167f3e715d194"
checksum = "f2afee18b8beb5a596ecb4a2dce128c719b4ba399d34126b9e4396e3f9860966"
dependencies = [
"proc-macro2",
"quote",

View File

@ -29,7 +29,7 @@ json = ["serde", "serde_json"]
walkdir = "~2.3.2"
log = "0.4.14"
mime = "0.3.16"
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "3.0.0" }
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "4.0.0" }
snailquote = "0.3.0"
once_cell = "1.8.0"
rayon = { version = "1.5.0", optional = true }
@ -40,6 +40,7 @@ serde = { version = "1.0", features = ["derive"], optional = true }
serde_json = { version = "1.0", optional = true }
num_cpus = { version = "1.13.0", optional = true }
maplit = "1.0.2"
parking_lot = "0.11.2"
[target.'cfg(not(unix))'.dependencies]
xdg-mime = { version = "0.3.3", optional = true }

View File

@ -176,7 +176,7 @@ The five logging levels are used as follows:
| Level | Description | Example |
|-|-|-|
| error | Errors that cause fif to stop running | fif was unable to open the provided directory |
| warn | Warnings that don't cause fif to stop running | fif was unable to determine the mime type of a given file |
| warn | Warnings that don't cause fif to stop running | fif was unable to determine the MIME type of a given file |
| info | Information pertaining to fif's status | The provided directory was scanned without issue, and no files are in need of renaming |
| debug | Debug information - usually not important to end users | The list of extensions fif will consider |
| trace | Trace info - usually not important to end users | "Found 15 items to check", "Scan successful", etc. |
@ -199,10 +199,19 @@ number will be updated as follows:
when adding the ability to exclude extensions, and when fif gained the ability to output a bash script rather than a
list of invalid filenames). The MINOR version will also be bumped when increasing the MSRV.
- The PATCH version will be bumped in all other cases, including minor feature additions (in the past, this has occurred
when adding features such as more output formats and the ignore flag).
when adding features such as more output formats and the "ignore unknown extensions" flag).
If/when fif hits version 1.0, these rules will likely remain the same as they are now.
## Code style
fif is formatted with `rustfmt` using a nightly toolchain, specifically with the command `cargo +nightly fmt`. Tabs are
used for indentation, and are assumed to be two spaces wide. Lines are 120 characters wide. Braces are placed on the
same line (["OTBS"](https://en.wikipedia.org/wiki/Indentation_style#Variant:_1TBS_(OTBS)) format), except in the case of
`where` clauses in generic parameters.
For more detailed information on the formatting rules used by this project, see the configured options in
[`rustfmt.toml`](https://gitlab.com/Lynnesbian/fif/-/blob/master/rustfmt.toml).
## License
Copyright (C) 2021 Lynnesbian

View File

@ -7,3 +7,5 @@ hard_tabs = true
tab_spaces = 2
newline_style = "Unix"
group_imports = "StdExternalCrate"
array_width = 100
fn_call_width = 80

View File

@ -8,7 +8,6 @@ use std::fs::File;
use std::io::{self, Read, Seek, SeekFrom};
use std::path::Path;
use std::str::FromStr;
use std::sync::RwLock;
use cfg_if::cfg_if;
use itertools::{Either, Itertools};
@ -16,26 +15,43 @@ use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
use once_cell::sync::Lazy;
use parking_lot::RwLock;
use walkdir::{DirEntry, WalkDir};
use crate::findings::{Findings, ScanError};
use crate::mime_db::MimeDb;
use crate::parameters::ScanOpts;
use crate::utils::APPLICATION_ZIP;
use crate::{String, MIMEDB};
/// Cache of mimetypes and their associated extensions, used by [`mime_extension_lookup()`]
/// Cache of MIME types and their associated extensions, used by [`mime_extension_lookup()`]
static MIMEXT: Lazy<RwLock<HashMap<String, Option<Vec<String>>>>> = Lazy::new(|| RwLock::new(HashMap::new()));
/// The number of bytes to read initially when identifying a file's MIME type. Used in the [`mime_type`] function.
///
/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
pub const INITIAL_BUF_SIZE: usize = 128;
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes. Used in
/// the [`mime_type`] function.
pub const BUF_SIZE: usize = 8192;
/// A [`Mime`] representing the "application/x-ole-storage" MIME type.
static APPLICATION_X_OLE_STORAGE: Lazy<Mime> = Lazy::new(|| Mime::from_str("application/x-ole-storage").unwrap());
cfg_if! {
if #[cfg(windows)] {
/// Determines whether or not a file is hidden by checking its win32 file attributes.
pub fn is_hidden(entry: &DirEntry) -> bool {
use std::os::windows::prelude::*;
std::fs::metadata(entry.path()) // try to get metadata for file
.map_or(
false, // if getting metadata/attributes fails, assume it's not hidden
|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
)
use std::os::windows::prelude::*;
const FILE_ATTRIBUTE_HIDDEN: u32 = 0x2; // http://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
std::fs::metadata(entry.path()) // try to get metadata for file
.map_or(
false, // if getting metadata/attributes fails, assume it's not hidden
|f| f.file_attributes() & FILE_ATTRIBUTE_HIDDEN > 0,
)
}
} else {
/// Determines whether or not a file is hidden by checking for a leading full stop.
@ -97,29 +113,29 @@ pub fn wanted_file(
/// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure.
///
/// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a MIME type could not be
/// determined.
pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
let path = entry.path();
// try to determine mimetype for this entry
// try to determine MIME type for this entry
let result = match mime_type(&*MIMEDB, path) {
// an error occurred while trying to read the file
Err(_) => return Err(ScanError::File(path)),
// the file was read successfully, but we were unable to determine its mimetype
// the file was read successfully, but we were unable to determine its MIME type
Ok(None) => return Err(ScanError::Mime(path)),
// a mimetype was found!
// a MIME type was found!
Ok(Some(result)) => result,
};
// set of known extensions for the given mimetype
// set of known extensions for the given MIME type
let known_exts = mime_extension_lookup(result.essence_str().into());
// file extension for this particular file
let entry_ext = path.extension();
let valid = match known_exts {
// there is a known set of extensions for this mimetype, and the file has an extension
// there is a known set of extensions for this MIME type, and the file has an extension
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()),
// either this file has no extension, or there is no known set of extensions for this mimetype :(
// either this file has no extension, or there is no known set of extensions for this MIME type :(
Some(_) | None => false,
};
@ -158,7 +174,7 @@ pub fn scan_from_walkdir(
chunk
.iter() // iter over the chunk, which is a slice of DirEntry structs
.map(|entry| scan_file(entry, canonical_paths))
.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here?
.collect::<Vec<_>>()
}).partition_map(|result| match result {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
@ -208,11 +224,7 @@ pub fn scan_directory(
// TODO: is there a way to just say `map_or(x, |y| y).thing()` instead of `map_or(x.thing(), |y| y.thing())`?
// i don't care whether i'm returning a walkdir error or an io error, i just care about whether or not it
// implements ToString (which they both do). map_or doesn't work on trait objects though :(
error!(
"{}: {}",
path,
err.io_error().map_or(err.to_string(), |e| e.to_string())
);
error!("{}: {}", path, err.io_error().map_or(err.to_string(), |e| e.to_string()));
return None;
}
e.ok()
@ -236,17 +248,7 @@ pub fn scan_directory(
}
}
/// The number of bytes to read initially.
///
/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
pub const INITIAL_BUF_SIZE: usize = 128;
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes.
pub const BUF_SIZE: usize = 8192;
/// Tries to identify the mimetype of a file from a given path.
/// Tries to identify the MIME type of a file from a given path.
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let mut buffer = [0; INITIAL_BUF_SIZE];
let mut file = File::open(path)?;
@ -275,17 +277,17 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
}
let r = r.filter(|mime|
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
// some MIME types should be investigated further, reading up to BUF_SIZE even if they've been determined already
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
mime != &mime::TEXT_XML
// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.
// determining that a file is in one of the MS office formats in particular requires looking quite far into the
// file.
&& mime != &Mime::from_str("application/zip").unwrap()
&& mime != &*APPLICATION_ZIP
// doc/ppt/xls files are a subset of what's known as an "OLE2 compound document storage", at least according to
// shared-mime-info. if a pre-OOXML era MS office file is scanned and identified as x-ole-storage, reading further
// will allow it to be detected correctly as the appropriate filetype.
&& mime != &Mime::from_str("application/x-ole-storage").unwrap());
&& mime != &*APPLICATION_X_OLE_STORAGE);
if r.is_some() {
return Ok(r);
@ -298,15 +300,13 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
Ok(db.get_type(&buffer))
}
/// Returns a list of known extensions for this mime type, if any.
/// Returns a list of known extensions for this MIME type, if any.
/// This function uses the [`Mime`]'s "essence" rather than the [`Mime`] itself - [`mime_guess::get_mime_extensions`]
/// ignores the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
/// `essence_str` (which includes the suffix) fixes this.
pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
if let Ok(cache) = MIMEXT.read() {
if let Some(exts) = cache.get(&essence) {
return exts.clone();
}
if let Some(exts) = MIMEXT.read().get(&essence) {
return exts.clone();
}
let mut exts = mime_guess::get_mime_extensions_str(essence.as_str());
@ -352,23 +352,14 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
} else if essence == "application/zip" {
// neither xdg-mime nor infer seem to be able to detect office XML files properly...
[
vec![
String::from("zip"),
String::from("docx"),
String::from("xlsx"),
String::from("pptx"),
],
vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")],
possible_exts,
]
.concat()
} else if essence == "application/x-ms-dos-executable" {
// .dll, .exe, and .scr files are given the same mime type... but you definitely don't want to rename one to the
// .dll, .exe, and .scr files are given the same MIME type... but you definitely don't want to rename one to the
// other!
[
vec![String::from("dll"), String::from("exe"), String::from("scr")],
possible_exts,
]
.concat()
[vec![String::from("dll"), String::from("exe"), String::from("scr")], possible_exts].concat()
} else {
possible_exts
})
@ -376,10 +367,6 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
None => None,
};
if let Ok(mut cache) = MIMEXT.write() {
cache.insert(essence, exts.clone());
exts
} else {
unreachable!()
}
MIMEXT.write().insert(essence, exts.clone());
exts
}

View File

@ -20,9 +20,9 @@ use crate::String;
pub struct Findings {
/// The location of the scanned file.
pub file: PathBuf,
/// Whether or not the file's extension is valid for its mimetype.
/// Whether or not the file's extension is valid for its MIME type.
pub valid: bool,
/// The file's mimetype.
/// The file's MIME type.
pub mime: Mime,
}
@ -79,7 +79,7 @@ impl serde::Serialize for Findings {
pub enum ScanError<'a> {
/// Something went wrong while trying to read the given file.
File(&'a Path),
/// Failed to determine the mimetype of the given file.
/// Failed to determine the MIME type of the given file.
Mime(&'a Path),
}
@ -90,7 +90,7 @@ impl<'a> Display for ScanError<'a> {
"Couldn't {} file: {}",
match self {
Self::File(_) => "read",
Self::Mime(_) => "determine mime type of",
Self::Mime(_) => "determine MIME type of",
},
match self {
Self::File(f) | Self::Mime(f) => f.to_string_lossy(),

View File

@ -136,7 +136,7 @@ pub trait FormatSteps {
match error {
// failed to read the file
ScanError::File(path) => self.unreadable(f, path)?,
// file was read successfully, but we couldn't determine a mimetype
// file was read successfully, but we couldn't determine a MIME type
ScanError::Mime(path) => self.unknown_type(f, path)?,
}
}
@ -179,14 +179,7 @@ impl FormatSteps for Shell {
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
smart_write(
f,
writablesln![
"cat <<- '???'",
Newline,
"No known extension for ",
path,
Newline,
"???"
],
writablesln!["cat <<- '???'", Newline, "No known extension for ", path, Newline, "???"],
)
}
@ -195,14 +188,11 @@ impl FormatSteps for Shell {
}
fn unknown_type<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
smart_write(f, writablesln!["# Failed to detect mime type for ", path])
smart_write(f, writablesln!["# Failed to detect MIME type for ", path])
}
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(
f,
writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())],
)?;
smart_write(f, writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())])?;
if let Ok(working_directory) = std::env::current_dir() {
smart_write(f, writablesln!["# Run from ", (working_directory.as_path())])?;
@ -232,26 +222,14 @@ impl FormatSteps for PowerShell {
// there doesn't seem to be a way to rename the file, prompting only if the target already exists.
smart_write(
f,
writablesln![
"Rename-Item -Verbose -Path ",
from,
" -NewName ",
(to.file_name().unwrap())
],
writablesln!["Rename-Item -Verbose -Path ", from, " -NewName ", (to.file_name().unwrap())],
)
}
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
smart_write(
f,
writablesln![
"Write-Output @'",
Newline,
"No known extension for ",
path,
Newline,
"'@"
],
writablesln!["Write-Output @'", Newline, "No known extension for ", path, Newline, "'@"],
)
}
@ -263,7 +241,7 @@ impl FormatSteps for PowerShell {
}
fn unknown_type<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"])
smart_write(f, writablesln!["<# Failed to detect MIME type for ", path, " #>"])
}
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {

View File

@ -215,16 +215,16 @@ fn main() {
} else {
let mut buffered_stdout = BufWriter::new(stdout());
let result = match args.output_format {
if match args.output_format {
// TODO: simplify this to something like formats::write_all(args.output_format, ...)
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors),
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors),
#[cfg(feature = "json")]
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors),
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors),
};
if result.is_err() {
}
.is_err()
{
error!("Failed to write to stdout.");
exit(exitcode::IOERR);
}

View File

@ -18,7 +18,7 @@ use mime::Mime;
pub trait MimeDb {
/// Initialise the database.
fn init() -> Self;
/// Given a slice of bytes, returns the inferred mimetype, if any.
/// Given a slice of bytes, returns the inferred MIME type, if any.
fn get_type(&self, data: &[u8]) -> Option<Mime>;
}

View File

@ -319,12 +319,7 @@ impl ExtensionSet {
Self::Images => mime_guess::get_mime_extensions_str("image/*").unwrap().to_vec(),
Self::Audio => mime_guess::get_mime_extensions_str("audio/*").unwrap().to_vec(),
Self::Video => mime_guess::get_mime_extensions_str("video/*").unwrap().to_vec(),
Self::Media => [
Self::Images.extensions(),
Self::Audio.extensions(),
Self::Video.extensions(),
]
.concat(),
Self::Media => [Self::Images.extensions(), Self::Audio.extensions(), Self::Video.extensions()].concat(),
Self::Documents => vec![
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
"pages", "key", "numbers",

View File

@ -29,13 +29,13 @@ const ZIP_BYTES: &[u8] = b"PK\x03\x04";
/// Ensure that `extension_from_path` successfully returns the extension from a set of paths.
fn get_ext() {
let ext_checks: HashMap<_, Option<&OsStr>> = hashmap![
Path::new("test.txt") => Some(OsStr::new("txt")),
Path::new("test.zip") => Some(OsStr::new("zip")),
Path::new("test.tar.gz") => Some(OsStr::new("gz")),
Path::new("test.") => Some(OsStr::new("")),
Path::new("test") => None,
Path::new(".hidden") => None,
];
Path::new("test.txt") => Some(OsStr::new("txt")),
Path::new("test.zip") => Some(OsStr::new("zip")),
Path::new("test.tar.gz") => Some(OsStr::new("gz")),
Path::new("test.") => Some(OsStr::new("")),
Path::new("test") => None,
Path::new(".hidden") => None,
];
for (path, ext) in ext_checks {
assert_eq!(path.extension(), ext);
@ -43,7 +43,7 @@ fn get_ext() {
}
#[test]
/// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
/// Ensure that the MIME types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
fn detect_type() {
assert_eq!(MIMEDB.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
assert_eq!(MIMEDB.get_type(PNG_BYTES), Some(IMAGE_PNG));
@ -55,23 +55,28 @@ fn detect_type() {
/// Ensure that `mime_extension_lookup` works as expected, and that the set of extensions for JPEG, PNG, PDF, and ZIP
/// contain "jpg", "png", "pdf", and "zip", respectively.
fn recommend_ext() {
assert!(mime_extension_lookup(IMAGE_JPEG.essence_str().into())
.unwrap()
.contains(&String::from("jpg")));
assert!(mime_extension_lookup(IMAGE_PNG.essence_str().into())
.unwrap()
.contains(&String::from("png")));
assert!(mime_extension_lookup(APPLICATION_PDF.essence_str().into())
.unwrap()
.contains(&String::from("pdf")));
assert!(mime_extension_lookup(APPLICATION_ZIP.essence_str().into())
.unwrap()
.contains(&String::from("zip")));
let tests = hashmap![
&IMAGE_JPEG => "jpg",
&IMAGE_PNG => "png",
&APPLICATION_PDF => "pdf",
&*APPLICATION_ZIP => "zip",
];
for (mime, ext) in tests {
assert!(
mime_extension_lookup(mime.essence_str().into())
.unwrap()
.contains(&String::from(ext)),
"mime_extension_lookup for {} didn't contain {}!",
mime.essence_str(),
ext
);
}
}
#[test]
/// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their
/// associated mime types correctly deduced.
/// associated MIME types correctly deduced.
fn simple_directory() {
use std::borrow::Borrow;
use std::env::set_current_dir;
@ -136,19 +141,16 @@ fn simple_directory() {
// the only invalid file detected should be "wrong.jpg", which is a misnamed png file
// 1. ensure detected extension is "jpg"
assert_eq!(result.file.as_path().extension().unwrap(), OsStr::new("jpg"));
// 2. ensure detected mime type is IMAGE_PNG
// 2. ensure detected MIME type is IMAGE_PNG
assert_eq!(result.mime, IMAGE_PNG);
// 3. ensure the recommended extension for "wrong.jpg" is "png"
assert_eq!(&result.recommended_extension().unwrap(), &String::from("png"));
// 4. ensure the recommended filename for "wrong.jpg" is "wrong.png"
assert_eq!(
result.recommended_path().unwrap().file_name(),
Some(OsStr::new("wrong.png"))
);
assert_eq!(result.recommended_path().unwrap().file_name(), Some(OsStr::new("wrong.png")));
continue;
}
// check if the recommended extension for this file is in the list of known extensions for its mimetype - for
// check if the recommended extension for this file is in the list of known extensions for its MIME type - for
// example, if the file is determined to be an IMAGE_PNG, its recommended extension should be one of the extensions
// returned by `mime_extension_lookup(IMAGE_PNG)`.
assert!(mime_extension_lookup(result.mime.essence_str().into())
@ -164,8 +166,8 @@ fn simple_directory() {
.to_string_lossy()
.starts_with("test"));
// make sure the guessed mimetype is correct based on the extension of the scanned file
// because we already know that the extensions match the mimetype (as we created these files ourselves earlier in
// make sure the guessed MIME type is correct based on the extension of the scanned file
// because we already know that the extensions match the MIME type (as we created these files ourselves earlier in
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
let ext = result.file.as_path().extension().unwrap();
assert_eq!(
@ -403,7 +405,7 @@ fn test_json() {
.read_to_string(&mut contents)
.expect("Failed to read from cursor to string");
// the output should contain the file's mime type
// the output should contain the file's MIME type
assert!(
contents.contains(IMAGE_JPEG.essence_str()),
"JSON output doesn't contain move command!\n===\n{}",
@ -430,15 +432,20 @@ fn media_contains_audio_video_images() {
}
#[test]
/// Ensure that the `writables!` macro produces the output it should.
/// Ensure that the `writables!` and `writablesln!` macros produce the output they should.
fn writables_is_correct() {
use fif::formats::Writable;
use fif::writables;
use fif::{writables, writablesln};
assert_eq!(
&["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
writables!["henlo", (Path::new("henlo")), Newline]
);
assert_eq!(
&["henlo".into(), Path::new("henlo").into(), Writable::Newline, Writable::Newline],
writablesln!["henlo", (Path::new("henlo")), Newline]
);
}
#[test]
@ -470,10 +477,7 @@ fn verbosity() {
#[test]
/// Ensures `os_name()`'s output is the same as [`std::env::consts::OS`], capitalisation notwithstanding
fn validate_os_name() {
assert_eq!(
fif::utils::os_name().to_lowercase(),
std::env::consts::OS.to_lowercase()
);
assert_eq!(fif::utils::os_name().to_lowercase(), std::env::consts::OS.to_lowercase());
}
#[test]
@ -505,6 +509,7 @@ fn sort_findings() {
}
#[test]
#[cfg(not(all(target_endian = "big", target_pointer_width = "32")))]
/// Ensures that [`SmartString`]s don't deviate from std's Strings
fn validate_string_type() {
use std::string::String as StdString;
@ -516,6 +521,6 @@ fn validate_string_type() {
SmartString::from("A long and therefore heap-allocated string"),
StdString::from("A long and therefore heap-allocated string")
);
// uncomment if i ever update to smartstring >= 0.2.9
smartstring::validate();
}

View File

@ -39,7 +39,7 @@ pub static CLAP_LONG_VERSION: Lazy<String> = Lazy::new(|| {
.into()
});
/// A [`Mime`] representing the "application/zip" mimetype.
/// A [`Mime`] representing the "application/zip" MIME type.
pub static APPLICATION_ZIP: Lazy<Mime> = Lazy::new(|| Mime::from_str("application/zip").unwrap());
/// Returns the name of the target operating system with proper casing, like "Windows" or "macOS".