Compare commits
No commits in common. "b6c2d75bdb555542e8f6e0d7313602fbec77f209" and "672cc8da908146f62029125e1d4deec58725c8d7" have entirely different histories.
b6c2d75bdb
...
672cc8da90
15 changed files with 160 additions and 222 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -19,4 +19,3 @@ cargo-timing*.html
|
|||
/fif.exe
|
||||
.idea/workspace.xml
|
||||
*.sync-conflict*
|
||||
.idea/sonarlint
|
||||
|
|
|
@ -4,31 +4,6 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/bitflags-23acb0a9d3874edf/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/crossbeam-epoch-bbe747f70e6c82f8/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/crossbeam-utils-9309f4d80e34d8b7/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/fif-a01af1abc82b3550/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/indexmap-f70d5e1948c750ec/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/lexical-core-3a59a0b45542312b/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/libc-21832a2a55a1017a/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/log-21f459ebfa013554/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/memchr-a5ad9a9ced7a185e/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/memoffset-1f9c809cb66f47ad/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/new_mime_guess-5a19c37b0debe30e/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/nom-f20626721c72cba3/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/proc-macro-error-97f2fc25d9b7fc89/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/proc-macro-error-attr-db3651ef459829ce/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/proc-macro2-8803fbc7990cf7aa/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/rayon-137998806e417f6b/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/rayon-core-d3b7d50c097a4664/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/ryu-84c31e8e8ee65998/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/serde-8fd409d40cb77b1e/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/serde_derive-ed28d1546b1c3ad0/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/serde_json-5bd3c27125506bb8/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/syn-311c4ed26db329d6/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/unicase-1bc56b5db838db2b/out" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/target/debug/build/parking_lot_core-f8d2e72482af1f8b/out" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/imgs" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/old" />
|
||||
|
|
14
CHANGELOG.md
14
CHANGELOG.md
|
@ -4,14 +4,6 @@ Dates are given in YYYY-MM-DD format - for example, the 15th of October 2021 is
|
|||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to
|
||||
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Unreleased
|
||||
### Changed
|
||||
- Updated [`new_mime_guess`] to 4.0.0
|
||||
### Fixed
|
||||
- Disabled [`smartstring`] test on unsupported architectures
|
||||
### Other
|
||||
- Use [`parking_lot`]'s `RwLock` instead of the built-in one for a slight performance increase
|
||||
|
||||
## v0.4.0 - 2021-10-14
|
||||
### Added
|
||||
- `--fix` mode - instead of outputting a shell script or text file, fif will rename the misnamed files for you!
|
||||
|
@ -136,7 +128,7 @@ this version of `clap`, which caused the build to fail. Also, `clap` 3 beta 4 de
|
|||
- System extension set (`.dll`, `.so`, `.exe`...)
|
||||
- [`infer`] backend now supports Ren'Py archive (`.rpa`) files
|
||||
### Changed
|
||||
- Output is now sorted: Files that couldn't be read, then files with no known MIME type, then files with no known
|
||||
- Output is now sorted: Files that couldn't be read, then files with no known mimetype, then files with no known
|
||||
extensions, then files with the wrong extension
|
||||
- Added Apple iWork document formats to Documents extension set (`.pages`, `.key`, `.numbers`)
|
||||
### Fixed
|
||||
|
@ -168,7 +160,7 @@ this version of `clap`, which caused the build to fail. Also, `clap` 3 beta 4 de
|
|||
### Added
|
||||
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
|
||||
### Changed
|
||||
- Better MIME type detection:
|
||||
- Better mime type detection:
|
||||
- Consider "some/x-thing" and "some/thing" to be identical
|
||||
- Use a patched version of mime_guess (which took a while to make 0u0;) with many more extension/type mappings
|
||||
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension
|
||||
|
@ -286,8 +278,6 @@ Initial commit!
|
|||
[`mime_guess`]: https://crates.io/crates/mime_guess
|
||||
[`new_mime_guess`]: https://crates.io/crates/new_mime_guess
|
||||
[`once_cell`]: https://crates.io/crates/once_cell
|
||||
[`parking_lot`]: https://crates.io/crates/parking_lot
|
||||
[`smartstring]: https://crates.io/crates/smartstring
|
||||
[`snailquote`]: https://crates.io/crates/snailquote
|
||||
[`structopt`]: https://crates.io/crates/structopt
|
||||
[`xdg-mime`]: https://crates.io/crates/xdg-mime
|
||||
|
|
79
Cargo.lock
generated
79
Cargo.lock
generated
|
@ -190,7 +190,6 @@ dependencies = [
|
|||
"new_mime_guess",
|
||||
"num_cpus",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"rand",
|
||||
"rayon",
|
||||
"serde",
|
||||
|
@ -262,15 +261,6 @@ dependencies = [
|
|||
"cfb",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.10.1"
|
||||
|
@ -307,18 +297,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.108"
|
||||
version = "0.2.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8521a1b57e76b1ec69af7599e75e38e7b7fad6610f037db8c79b127201b5d119"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109"
|
||||
dependencies = [
|
||||
"scopeguard",
|
||||
]
|
||||
checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
|
@ -358,10 +339,11 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
|
|||
|
||||
[[package]]
|
||||
name = "new_mime_guess"
|
||||
version = "4.0.0"
|
||||
version = "3.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb3253391c5e7939360b2f7f27c7e7821dbc1a0bdab65884dde484fd71132764"
|
||||
checksum = "b101054f57b10576d116bc0da7ce8b05362b4490b0d21c7748395eaaf9359047"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"mime",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
|
@ -401,31 +383,6 @@ version = "2.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
|
||||
dependencies = [
|
||||
"instant",
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"instant",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.10.0"
|
||||
|
@ -467,9 +424,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.15"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
|
||||
checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
|
@ -497,9 +454,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.32"
|
||||
version = "1.0.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43"
|
||||
checksum = "edc3358ebc67bc8b7fa0c007f945b0b18226f78437d61bec735a9eb96b61ee70"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
@ -649,9 +606,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.71"
|
||||
version = "1.0.68"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "063bf466a64011ac24040a49009724ee60a57da1b437617ceb32e53ad61bfb19"
|
||||
checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -664,12 +621,6 @@ version = "0.3.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b"
|
||||
|
||||
[[package]]
|
||||
name = "smallvec"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
version = "0.2.9"
|
||||
|
@ -681,9 +632,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "snailquote"
|
||||
version = "0.3.1"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec62a949bda7f15800481a711909f946e1204f2460f89210eaf7f57730f88f86"
|
||||
checksum = "f34b729d802f52194598858ac852c3fb3b33f6e026cd03195072ccb7bf3fc810"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
"unicode_categories",
|
||||
|
@ -697,9 +648,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.81"
|
||||
version = "1.0.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2afee18b8beb5a596ecb4a2dce128c719b4ba399d34126b9e4396e3f9860966"
|
||||
checksum = "d010a1623fbd906d51d650a9916aaefc05ffa0e4053ff7fe601167f3e715d194"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
|
@ -29,7 +29,7 @@ json = ["serde", "serde_json"]
|
|||
walkdir = "~2.3.2"
|
||||
log = "0.4.14"
|
||||
mime = "0.3.16"
|
||||
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "4.0.0" }
|
||||
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "3.0.0" }
|
||||
snailquote = "0.3.0"
|
||||
once_cell = "1.8.0"
|
||||
rayon = { version = "1.5.0", optional = true }
|
||||
|
@ -40,7 +40,6 @@ serde = { version = "1.0", features = ["derive"], optional = true }
|
|||
serde_json = { version = "1.0", optional = true }
|
||||
num_cpus = { version = "1.13.0", optional = true }
|
||||
maplit = "1.0.2"
|
||||
parking_lot = "0.11.2"
|
||||
|
||||
[target.'cfg(not(unix))'.dependencies]
|
||||
xdg-mime = { version = "0.3.3", optional = true }
|
||||
|
|
13
README.md
13
README.md
|
@ -176,7 +176,7 @@ The five logging levels are used as follows:
|
|||
| Level | Description | Example |
|
||||
|-|-|-|
|
||||
| error | Errors that cause fif to stop running | fif was unable to open the provided directory |
|
||||
| warn | Warnings that don't cause fif to stop running | fif was unable to determine the MIME type of a given file |
|
||||
| warn | Warnings that don't cause fif to stop running | fif was unable to determine the mime type of a given file |
|
||||
| info | Information pertaining to fif's status | The provided directory was scanned without issue, and no files are in need of renaming |
|
||||
| debug | Debug information - usually not important to end users | The list of extensions fif will consider |
|
||||
| trace | Trace info - usually not important to end users | "Found 15 items to check", "Scan successful", etc. |
|
||||
|
@ -199,19 +199,10 @@ number will be updated as follows:
|
|||
when adding the ability to exclude extensions, and when fif gained the ability to output a bash script rather than a
|
||||
list of invalid filenames). The MINOR version will also be bumped when increasing the MSRV.
|
||||
- The PATCH version will be bumped in all other cases, including minor feature additions (in the past, this has occurred
|
||||
when adding features such as more output formats and the "ignore unknown extensions" flag).
|
||||
when adding features such as more output formats and the ignore flag).
|
||||
|
||||
If/when fif hits version 1.0, these rules will likely remain the same as they are now.
|
||||
|
||||
## Code style
|
||||
fif is formatted with `rustfmt` using a nightly toolchain, specifically with the command `cargo +nightly fmt`. Tabs are
|
||||
used for indentation, and are assumed to be two spaces wide. Lines are 120 characters wide. Braces are placed on the
|
||||
same line (["OTBS"](https://en.wikipedia.org/wiki/Indentation_style#Variant:_1TBS_(OTBS)) format), except in the case of
|
||||
`where` clauses in generic parameters.
|
||||
|
||||
For more detailed information on the formatting rules used by this project, see the configured options in
|
||||
[`rustfmt.toml`](https://gitlab.com/Lynnesbian/fif/-/blob/master/rustfmt.toml).
|
||||
|
||||
## License
|
||||
Copyright (C) 2021 Lynnesbian
|
||||
|
||||
|
|
|
@ -7,5 +7,3 @@ hard_tabs = true
|
|||
tab_spaces = 2
|
||||
newline_style = "Unix"
|
||||
group_imports = "StdExternalCrate"
|
||||
array_width = 100
|
||||
fn_call_width = 80
|
||||
|
|
103
src/files.rs
103
src/files.rs
|
@ -8,6 +8,7 @@ use std::fs::File;
|
|||
use std::io::{self, Read, Seek, SeekFrom};
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
use std::sync::RwLock;
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use itertools::{Either, Itertools};
|
||||
|
@ -15,43 +16,26 @@ use log::{debug, error};
|
|||
use mime::Mime;
|
||||
use mime_guess::from_ext;
|
||||
use once_cell::sync::Lazy;
|
||||
use parking_lot::RwLock;
|
||||
use walkdir::{DirEntry, WalkDir};
|
||||
|
||||
use crate::findings::{Findings, ScanError};
|
||||
use crate::mime_db::MimeDb;
|
||||
use crate::parameters::ScanOpts;
|
||||
use crate::utils::APPLICATION_ZIP;
|
||||
use crate::{String, MIMEDB};
|
||||
|
||||
/// Cache of MIME types and their associated extensions, used by [`mime_extension_lookup()`]
|
||||
/// Cache of mimetypes and their associated extensions, used by [`mime_extension_lookup()`]
|
||||
static MIMEXT: Lazy<RwLock<HashMap<String, Option<Vec<String>>>>> = Lazy::new(|| RwLock::new(HashMap::new()));
|
||||
|
||||
/// The number of bytes to read initially when identifying a file's MIME type. Used in the [`mime_type`] function.
|
||||
///
|
||||
/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small
|
||||
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
|
||||
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
|
||||
pub const INITIAL_BUF_SIZE: usize = 128;
|
||||
|
||||
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes. Used in
|
||||
/// the [`mime_type`] function.
|
||||
pub const BUF_SIZE: usize = 8192;
|
||||
|
||||
/// A [`Mime`] representing the "application/x-ole-storage" MIME type.
|
||||
static APPLICATION_X_OLE_STORAGE: Lazy<Mime> = Lazy::new(|| Mime::from_str("application/x-ole-storage").unwrap());
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(windows)] {
|
||||
/// Determines whether or not a file is hidden by checking its win32 file attributes.
|
||||
pub fn is_hidden(entry: &DirEntry) -> bool {
|
||||
use std::os::windows::prelude::*;
|
||||
const FILE_ATTRIBUTE_HIDDEN: u32 = 0x2; // http://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
|
||||
std::fs::metadata(entry.path()) // try to get metadata for file
|
||||
.map_or(
|
||||
false, // if getting metadata/attributes fails, assume it's not hidden
|
||||
|f| f.file_attributes() & FILE_ATTRIBUTE_HIDDEN > 0,
|
||||
)
|
||||
use std::os::windows::prelude::*;
|
||||
std::fs::metadata(entry.path()) // try to get metadata for file
|
||||
.map_or(
|
||||
false, // if getting metadata/attributes fails, assume it's not hidden
|
||||
|f| f.file_attributes() & 0x2 > 0, // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
|
||||
)
|
||||
}
|
||||
} else {
|
||||
/// Determines whether or not a file is hidden by checking for a leading full stop.
|
||||
|
@ -113,29 +97,29 @@ pub fn wanted_file(
|
|||
/// Inspects the given entry, returning a [`Findings`] on success and a [`ScanError`] on failure.
|
||||
///
|
||||
/// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a
|
||||
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a MIME type could not be
|
||||
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be
|
||||
/// determined.
|
||||
pub fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
|
||||
let path = entry.path();
|
||||
// try to determine MIME type for this entry
|
||||
// try to determine mimetype for this entry
|
||||
let result = match mime_type(&*MIMEDB, path) {
|
||||
// an error occurred while trying to read the file
|
||||
Err(_) => return Err(ScanError::File(path)),
|
||||
// the file was read successfully, but we were unable to determine its MIME type
|
||||
// the file was read successfully, but we were unable to determine its mimetype
|
||||
Ok(None) => return Err(ScanError::Mime(path)),
|
||||
// a MIME type was found!
|
||||
// a mimetype was found!
|
||||
Ok(Some(result)) => result,
|
||||
};
|
||||
|
||||
// set of known extensions for the given MIME type
|
||||
// set of known extensions for the given mimetype
|
||||
let known_exts = mime_extension_lookup(result.essence_str().into());
|
||||
// file extension for this particular file
|
||||
let entry_ext = path.extension();
|
||||
|
||||
let valid = match known_exts {
|
||||
// there is a known set of extensions for this MIME type, and the file has an extension
|
||||
// there is a known set of extensions for this mimetype, and the file has an extension
|
||||
Some(e) if entry_ext.is_some() => e.contains(&entry_ext.unwrap().to_string_lossy().to_lowercase().into()),
|
||||
// either this file has no extension, or there is no known set of extensions for this MIME type :(
|
||||
// either this file has no extension, or there is no known set of extensions for this mimetype :(
|
||||
Some(_) | None => false,
|
||||
};
|
||||
|
||||
|
@ -174,7 +158,7 @@ pub fn scan_from_walkdir(
|
|||
chunk
|
||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||
.map(|entry| scan_file(entry, canonical_paths))
|
||||
.collect::<Vec<_>>()
|
||||
.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here?
|
||||
}).partition_map(|result| match result {
|
||||
Ok(f) => Either::Left(f),
|
||||
Err(e) => Either::Right(e),
|
||||
|
@ -224,7 +208,11 @@ pub fn scan_directory(
|
|||
// TODO: is there a way to just say `map_or(x, |y| y).thing()` instead of `map_or(x.thing(), |y| y.thing())`?
|
||||
// i don't care whether i'm returning a walkdir error or an io error, i just care about whether or not it
|
||||
// implements ToString (which they both do). map_or doesn't work on trait objects though :(
|
||||
error!("{}: {}", path, err.io_error().map_or(err.to_string(), |e| e.to_string()));
|
||||
error!(
|
||||
"{}: {}",
|
||||
path,
|
||||
err.io_error().map_or(err.to_string(), |e| e.to_string())
|
||||
);
|
||||
return None;
|
||||
}
|
||||
e.ok()
|
||||
|
@ -248,7 +236,17 @@ pub fn scan_directory(
|
|||
}
|
||||
}
|
||||
|
||||
/// Tries to identify the MIME type of a file from a given path.
|
||||
/// The number of bytes to read initially.
|
||||
///
|
||||
/// Rather than reading the entire file all at once into a [`BUF_SIZE`] buffer, it tends to be faster to read a small
|
||||
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
|
||||
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
|
||||
pub const INITIAL_BUF_SIZE: usize = 128;
|
||||
|
||||
/// The number of bytes to read if the file couldn't be identified from its first [`INITIAL_BUF_SIZE`] bytes.
|
||||
pub const BUF_SIZE: usize = 8192;
|
||||
|
||||
/// Tries to identify the mimetype of a file from a given path.
|
||||
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
||||
let mut buffer = [0; INITIAL_BUF_SIZE];
|
||||
let mut file = File::open(path)?;
|
||||
|
@ -277,17 +275,17 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
|||
}
|
||||
|
||||
let r = r.filter(|mime|
|
||||
// some MIME types should be investigated further, reading up to BUF_SIZE even if they've been determined already
|
||||
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
|
||||
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
|
||||
mime != &mime::TEXT_XML
|
||||
// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.
|
||||
// determining that a file is in one of the MS office formats in particular requires looking quite far into the
|
||||
// file.
|
||||
&& mime != &*APPLICATION_ZIP
|
||||
&& mime != &Mime::from_str("application/zip").unwrap()
|
||||
// doc/ppt/xls files are a subset of what's known as an "OLE2 compound document storage", at least according to
|
||||
// shared-mime-info. if a pre-OOXML era MS office file is scanned and identified as x-ole-storage, reading further
|
||||
// will allow it to be detected correctly as the appropriate filetype.
|
||||
&& mime != &*APPLICATION_X_OLE_STORAGE);
|
||||
&& mime != &Mime::from_str("application/x-ole-storage").unwrap());
|
||||
|
||||
if r.is_some() {
|
||||
return Ok(r);
|
||||
|
@ -300,13 +298,15 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
|||
Ok(db.get_type(&buffer))
|
||||
}
|
||||
|
||||
/// Returns a list of known extensions for this MIME type, if any.
|
||||
/// Returns a list of known extensions for this mime type, if any.
|
||||
/// This function uses the [`Mime`]'s "essence" rather than the [`Mime`] itself - [`mime_guess::get_mime_extensions`]
|
||||
/// ignores the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
|
||||
/// `essence_str` (which includes the suffix) fixes this.
|
||||
pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
|
||||
if let Some(exts) = MIMEXT.read().get(&essence) {
|
||||
return exts.clone();
|
||||
if let Ok(cache) = MIMEXT.read() {
|
||||
if let Some(exts) = cache.get(&essence) {
|
||||
return exts.clone();
|
||||
}
|
||||
}
|
||||
|
||||
let mut exts = mime_guess::get_mime_extensions_str(essence.as_str());
|
||||
|
@ -352,14 +352,23 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
|
|||
} else if essence == "application/zip" {
|
||||
// neither xdg-mime nor infer seem to be able to detect office XML files properly...
|
||||
[
|
||||
vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")],
|
||||
vec![
|
||||
String::from("zip"),
|
||||
String::from("docx"),
|
||||
String::from("xlsx"),
|
||||
String::from("pptx"),
|
||||
],
|
||||
possible_exts,
|
||||
]
|
||||
.concat()
|
||||
} else if essence == "application/x-ms-dos-executable" {
|
||||
// .dll, .exe, and .scr files are given the same MIME type... but you definitely don't want to rename one to the
|
||||
// .dll, .exe, and .scr files are given the same mime type... but you definitely don't want to rename one to the
|
||||
// other!
|
||||
[vec![String::from("dll"), String::from("exe"), String::from("scr")], possible_exts].concat()
|
||||
[
|
||||
vec![String::from("dll"), String::from("exe"), String::from("scr")],
|
||||
possible_exts,
|
||||
]
|
||||
.concat()
|
||||
} else {
|
||||
possible_exts
|
||||
})
|
||||
|
@ -367,6 +376,10 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
|
|||
None => None,
|
||||
};
|
||||
|
||||
MIMEXT.write().insert(essence, exts.clone());
|
||||
exts
|
||||
if let Ok(mut cache) = MIMEXT.write() {
|
||||
cache.insert(essence, exts.clone());
|
||||
exts
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,9 +20,9 @@ use crate::String;
|
|||
pub struct Findings {
|
||||
/// The location of the scanned file.
|
||||
pub file: PathBuf,
|
||||
/// Whether or not the file's extension is valid for its MIME type.
|
||||
/// Whether or not the file's extension is valid for its mimetype.
|
||||
pub valid: bool,
|
||||
/// The file's MIME type.
|
||||
/// The file's mimetype.
|
||||
pub mime: Mime,
|
||||
}
|
||||
|
||||
|
@ -79,7 +79,7 @@ impl serde::Serialize for Findings {
|
|||
pub enum ScanError<'a> {
|
||||
/// Something went wrong while trying to read the given file.
|
||||
File(&'a Path),
|
||||
/// Failed to determine the MIME type of the given file.
|
||||
/// Failed to determine the mimetype of the given file.
|
||||
Mime(&'a Path),
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ impl<'a> Display for ScanError<'a> {
|
|||
"Couldn't {} file: {}",
|
||||
match self {
|
||||
Self::File(_) => "read",
|
||||
Self::Mime(_) => "determine MIME type of",
|
||||
Self::Mime(_) => "determine mime type of",
|
||||
},
|
||||
match self {
|
||||
Self::File(f) | Self::Mime(f) => f.to_string_lossy(),
|
||||
|
|
|
@ -136,7 +136,7 @@ pub trait FormatSteps {
|
|||
match error {
|
||||
// failed to read the file
|
||||
ScanError::File(path) => self.unreadable(f, path)?,
|
||||
// file was read successfully, but we couldn't determine a MIME type
|
||||
// file was read successfully, but we couldn't determine a mimetype
|
||||
ScanError::Mime(path) => self.unknown_type(f, path)?,
|
||||
}
|
||||
}
|
||||
|
@ -179,7 +179,14 @@ impl FormatSteps for Shell {
|
|||
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||
smart_write(
|
||||
f,
|
||||
writablesln!["cat <<- '???'", Newline, "No known extension for ", path, Newline, "???"],
|
||||
writablesln![
|
||||
"cat <<- '???'",
|
||||
Newline,
|
||||
"No known extension for ",
|
||||
path,
|
||||
Newline,
|
||||
"???"
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -188,11 +195,14 @@ impl FormatSteps for Shell {
|
|||
}
|
||||
|
||||
fn unknown_type<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||
smart_write(f, writablesln!["# Failed to detect MIME type for ", path])
|
||||
smart_write(f, writablesln!["# Failed to detect mime type for ", path])
|
||||
}
|
||||
|
||||
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||
smart_write(f, writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())])?;
|
||||
smart_write(
|
||||
f,
|
||||
writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())],
|
||||
)?;
|
||||
|
||||
if let Ok(working_directory) = std::env::current_dir() {
|
||||
smart_write(f, writablesln!["# Run from ", (working_directory.as_path())])?;
|
||||
|
@ -222,14 +232,26 @@ impl FormatSteps for PowerShell {
|
|||
// there doesn't seem to be a way to rename the file, prompting only if the target already exists.
|
||||
smart_write(
|
||||
f,
|
||||
writablesln!["Rename-Item -Verbose -Path ", from, " -NewName ", (to.file_name().unwrap())],
|
||||
writablesln![
|
||||
"Rename-Item -Verbose -Path ",
|
||||
from,
|
||||
" -NewName ",
|
||||
(to.file_name().unwrap())
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||
smart_write(
|
||||
f,
|
||||
writablesln!["Write-Output @'", Newline, "No known extension for ", path, Newline, "'@"],
|
||||
writablesln![
|
||||
"Write-Output @'",
|
||||
Newline,
|
||||
"No known extension for ",
|
||||
path,
|
||||
Newline,
|
||||
"'@"
|
||||
],
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -241,7 +263,7 @@ impl FormatSteps for PowerShell {
|
|||
}
|
||||
|
||||
fn unknown_type<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||
smart_write(f, writablesln!["<# Failed to detect MIME type for ", path, " #>"])
|
||||
smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"])
|
||||
}
|
||||
|
||||
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||
|
|
|
@ -215,16 +215,16 @@ fn main() {
|
|||
} else {
|
||||
let mut buffered_stdout = BufWriter::new(stdout());
|
||||
|
||||
if match args.output_format {
|
||||
let result = match args.output_format {
|
||||
// TODO: simplify this to something like formats::write_all(args.output_format, ...)
|
||||
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
#[cfg(feature = "json")]
|
||||
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors),
|
||||
}
|
||||
.is_err()
|
||||
{
|
||||
};
|
||||
|
||||
if result.is_err() {
|
||||
error!("Failed to write to stdout.");
|
||||
exit(exitcode::IOERR);
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ use mime::Mime;
|
|||
pub trait MimeDb {
|
||||
/// Initialise the database.
|
||||
fn init() -> Self;
|
||||
/// Given a slice of bytes, returns the inferred MIME type, if any.
|
||||
/// Given a slice of bytes, returns the inferred mimetype, if any.
|
||||
fn get_type(&self, data: &[u8]) -> Option<Mime>;
|
||||
}
|
||||
|
||||
|
|
|
@ -319,7 +319,12 @@ impl ExtensionSet {
|
|||
Self::Images => mime_guess::get_mime_extensions_str("image/*").unwrap().to_vec(),
|
||||
Self::Audio => mime_guess::get_mime_extensions_str("audio/*").unwrap().to_vec(),
|
||||
Self::Video => mime_guess::get_mime_extensions_str("video/*").unwrap().to_vec(),
|
||||
Self::Media => [Self::Images.extensions(), Self::Audio.extensions(), Self::Video.extensions()].concat(),
|
||||
Self::Media => [
|
||||
Self::Images.extensions(),
|
||||
Self::Audio.extensions(),
|
||||
Self::Video.extensions(),
|
||||
]
|
||||
.concat(),
|
||||
Self::Documents => vec![
|
||||
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
|
||||
"pages", "key", "numbers",
|
||||
|
|
|
@ -29,13 +29,13 @@ const ZIP_BYTES: &[u8] = b"PK\x03\x04";
|
|||
/// Ensure that `extension_from_path` successfully returns the extension from a set of paths.
|
||||
fn get_ext() {
|
||||
let ext_checks: HashMap<_, Option<&OsStr>> = hashmap![
|
||||
Path::new("test.txt") => Some(OsStr::new("txt")),
|
||||
Path::new("test.zip") => Some(OsStr::new("zip")),
|
||||
Path::new("test.tar.gz") => Some(OsStr::new("gz")),
|
||||
Path::new("test.") => Some(OsStr::new("")),
|
||||
Path::new("test") => None,
|
||||
Path::new(".hidden") => None,
|
||||
];
|
||||
Path::new("test.txt") => Some(OsStr::new("txt")),
|
||||
Path::new("test.zip") => Some(OsStr::new("zip")),
|
||||
Path::new("test.tar.gz") => Some(OsStr::new("gz")),
|
||||
Path::new("test.") => Some(OsStr::new("")),
|
||||
Path::new("test") => None,
|
||||
Path::new(".hidden") => None,
|
||||
];
|
||||
|
||||
for (path, ext) in ext_checks {
|
||||
assert_eq!(path.extension(), ext);
|
||||
|
@ -43,7 +43,7 @@ fn get_ext() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
/// Ensure that the MIME types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
|
||||
/// Ensure that the mime types for JPEG, PNG, PDF, and ZIP are detected from their magic numbers.
|
||||
fn detect_type() {
|
||||
assert_eq!(MIMEDB.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
|
||||
assert_eq!(MIMEDB.get_type(PNG_BYTES), Some(IMAGE_PNG));
|
||||
|
@ -55,28 +55,23 @@ fn detect_type() {
|
|||
/// Ensure that `mime_extension_lookup` works as expected, and that the set of extensions for JPEG, PNG, PDF, and ZIP
|
||||
/// contain "jpg", "png", "pdf", and "zip", respectively.
|
||||
fn recommend_ext() {
|
||||
let tests = hashmap![
|
||||
&IMAGE_JPEG => "jpg",
|
||||
&IMAGE_PNG => "png",
|
||||
&APPLICATION_PDF => "pdf",
|
||||
&*APPLICATION_ZIP => "zip",
|
||||
];
|
||||
|
||||
for (mime, ext) in tests {
|
||||
assert!(
|
||||
mime_extension_lookup(mime.essence_str().into())
|
||||
.unwrap()
|
||||
.contains(&String::from(ext)),
|
||||
"mime_extension_lookup for {} didn't contain {}!",
|
||||
mime.essence_str(),
|
||||
ext
|
||||
);
|
||||
}
|
||||
assert!(mime_extension_lookup(IMAGE_JPEG.essence_str().into())
|
||||
.unwrap()
|
||||
.contains(&String::from("jpg")));
|
||||
assert!(mime_extension_lookup(IMAGE_PNG.essence_str().into())
|
||||
.unwrap()
|
||||
.contains(&String::from("png")));
|
||||
assert!(mime_extension_lookup(APPLICATION_PDF.essence_str().into())
|
||||
.unwrap()
|
||||
.contains(&String::from("pdf")));
|
||||
assert!(mime_extension_lookup(APPLICATION_ZIP.essence_str().into())
|
||||
.unwrap()
|
||||
.contains(&String::from("zip")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// Create a simple directory with some files, run `scan_directory` on it, and ensure that the files have their
|
||||
/// associated MIME types correctly deduced.
|
||||
/// associated mime types correctly deduced.
|
||||
fn simple_directory() {
|
||||
use std::borrow::Borrow;
|
||||
use std::env::set_current_dir;
|
||||
|
@ -141,16 +136,19 @@ fn simple_directory() {
|
|||
// the only invalid file detected should be "wrong.jpg", which is a misnamed png file
|
||||
// 1. ensure detected extension is "jpg"
|
||||
assert_eq!(result.file.as_path().extension().unwrap(), OsStr::new("jpg"));
|
||||
// 2. ensure detected MIME type is IMAGE_PNG
|
||||
// 2. ensure detected mime type is IMAGE_PNG
|
||||
assert_eq!(result.mime, IMAGE_PNG);
|
||||
// 3. ensure the recommended extension for "wrong.jpg" is "png"
|
||||
assert_eq!(&result.recommended_extension().unwrap(), &String::from("png"));
|
||||
// 4. ensure the recommended filename for "wrong.jpg" is "wrong.png"
|
||||
assert_eq!(result.recommended_path().unwrap().file_name(), Some(OsStr::new("wrong.png")));
|
||||
assert_eq!(
|
||||
result.recommended_path().unwrap().file_name(),
|
||||
Some(OsStr::new("wrong.png"))
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if the recommended extension for this file is in the list of known extensions for its MIME type - for
|
||||
// check if the recommended extension for this file is in the list of known extensions for its mimetype - for
|
||||
// example, if the file is determined to be an IMAGE_PNG, its recommended extension should be one of the extensions
|
||||
// returned by `mime_extension_lookup(IMAGE_PNG)`.
|
||||
assert!(mime_extension_lookup(result.mime.essence_str().into())
|
||||
|
@ -166,8 +164,8 @@ fn simple_directory() {
|
|||
.to_string_lossy()
|
||||
.starts_with("test"));
|
||||
|
||||
// make sure the guessed MIME type is correct based on the extension of the scanned file
|
||||
// because we already know that the extensions match the MIME type (as we created these files ourselves earlier in
|
||||
// make sure the guessed mimetype is correct based on the extension of the scanned file
|
||||
// because we already know that the extensions match the mimetype (as we created these files ourselves earlier in
|
||||
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
|
||||
let ext = result.file.as_path().extension().unwrap();
|
||||
assert_eq!(
|
||||
|
@ -405,7 +403,7 @@ fn test_json() {
|
|||
.read_to_string(&mut contents)
|
||||
.expect("Failed to read from cursor to string");
|
||||
|
||||
// the output should contain the file's MIME type
|
||||
// the output should contain the file's mime type
|
||||
assert!(
|
||||
contents.contains(IMAGE_JPEG.essence_str()),
|
||||
"JSON output doesn't contain move command!\n===\n{}",
|
||||
|
@ -432,20 +430,15 @@ fn media_contains_audio_video_images() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
/// Ensure that the `writables!` and `writablesln!` macros produce the output they should.
|
||||
/// Ensure that the `writables!` macro produces the output it should.
|
||||
fn writables_is_correct() {
|
||||
use fif::formats::Writable;
|
||||
use fif::{writables, writablesln};
|
||||
use fif::writables;
|
||||
|
||||
assert_eq!(
|
||||
&["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
|
||||
writables!["henlo", (Path::new("henlo")), Newline]
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
&["henlo".into(), Path::new("henlo").into(), Writable::Newline, Writable::Newline],
|
||||
writablesln!["henlo", (Path::new("henlo")), Newline]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -477,7 +470,10 @@ fn verbosity() {
|
|||
#[test]
|
||||
/// Ensures `os_name()`'s output is the same as [`std::env::consts::OS`], capitalisation notwithstanding
|
||||
fn validate_os_name() {
|
||||
assert_eq!(fif::utils::os_name().to_lowercase(), std::env::consts::OS.to_lowercase());
|
||||
assert_eq!(
|
||||
fif::utils::os_name().to_lowercase(),
|
||||
std::env::consts::OS.to_lowercase()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -509,7 +505,6 @@ fn sort_findings() {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(not(all(target_endian = "big", target_pointer_width = "32")))]
|
||||
/// Ensures that [`SmartString`]s don't deviate from std's Strings
|
||||
fn validate_string_type() {
|
||||
use std::string::String as StdString;
|
||||
|
@ -521,6 +516,6 @@ fn validate_string_type() {
|
|||
SmartString::from("A long and therefore heap-allocated string"),
|
||||
StdString::from("A long and therefore heap-allocated string")
|
||||
);
|
||||
|
||||
// uncomment if i ever update to smartstring >= 0.2.9
|
||||
smartstring::validate();
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ pub static CLAP_LONG_VERSION: Lazy<String> = Lazy::new(|| {
|
|||
.into()
|
||||
});
|
||||
|
||||
/// A [`Mime`] representing the "application/zip" MIME type.
|
||||
/// A [`Mime`] representing the "application/zip" mimetype.
|
||||
pub static APPLICATION_ZIP: Lazy<Mime> = Lazy::new(|| Mime::from_str("application/zip").unwrap());
|
||||
|
||||
/// Returns the name of the target operating system with proper casing, like "Windows" or "macOS".
|
||||
|
|
Loading…
Reference in a new issue