Compare commits

...

3 commits

12 changed files with 103 additions and 33 deletions

View file

@ -2,10 +2,16 @@
Dates are given in YYYY-MM-DD format.
## v0.3
### v0.3.4 (2021-mm-dd)
### v0.3.4 (2021-08-07)
#### Features
- Added `-I`/`--ignore-unknown-exts` flag for ignoring files with unknown extensions - for example, if fif doesn't know
what a ".fake" file is, setting this flag will prevent it from renaming "photo.fake" to "photo.jpg". This is useful
for avoiding the case where fif incorrectly mislabels an obscure format it isn't aware of as something else.
#### Other
- Refactored `formats.rs`
- More accurate dependency versions in `Cargo.toml` to ensure that the MSRV stays supported
- Sister project (?) [`new-mime-guess`] is now v3.0.0, updated to 2018 edition Rust, and with some new file and MIME
types added
### v0.3.3 (2021-07-07)
#### Features

58
Cargo.lock generated
View file

@ -183,7 +183,7 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
[[package]]
name = "fif"
version = "0.3.3"
version = "0.3.4"
dependencies = [
"cached",
"cfg-if",
@ -193,6 +193,7 @@ dependencies = [
"infer",
"itertools",
"log",
"mime",
"new_mime_guess",
"once_cell",
"rand",
@ -344,11 +345,13 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
[[package]]
name = "new_mime_guess"
version = "2.1.1"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "991337b97f81dff759c3edabb0bc01ceac92bff6f54852853824bbe1acd969f7"
checksum = "c8c8e990db7ba41ce2b4b3a38380a0f3385d61c55b3d4999d6e00389562a79a2"
dependencies = [
"mime",
"phf",
"phf_codegen",
"unicase",
]
@ -385,6 +388,45 @@ version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85"
[[package]]
name = "phf"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b21b531851d1d86a90bd62dd79be87ce2d90b7bcb6afbae07813921d6156696a"
dependencies = [
"phf_shared",
]
[[package]]
name = "phf_codegen"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f8c56073a14f772740bd86231763732f7559635215bf75df9f26f1d713d99e0"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]]
name = "phf_generator"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a68318426de33640f02be62b4ae8eb1261be2efbc337b60c54d845bf4484e0d9"
dependencies = [
"siphasher",
"unicase",
]
[[package]]
name = "ppv-lite86"
version = "0.2.10"
@ -500,9 +542,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.2.9"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee"
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [
"bitflags",
]
@ -578,6 +620,12 @@ dependencies = [
"serde",
]
[[package]]
name = "siphasher"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "729a25c17d72b06c68cb47955d44fda88ad2d3e7d77e025663fdd69b93dd71a1"
[[package]]
name = "smartstring"
version = "0.2.7"

View file

@ -1,7 +1,7 @@
[package]
name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
version = "0.3.3"
version = "0.3.4"
authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018"
license = "GPL-3.0-or-later"
@ -27,7 +27,8 @@ json = ["serde", "serde_json"]
[dependencies]
walkdir = "2.3.2"
log = "0.4.14"
mime_guess = { package = "new_mime_guess", version = "2.1.1" }
mime = "0.3.16"
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "3.0.0" }
snailquote = "0.3.0"
once_cell = "1.8.0"
rayon = { version = "1.5.0", optional = true }

View file

@ -32,6 +32,8 @@ for backend in "${_backends[@]}"; do
-A clippy::shadow_unrelated \
-A clippy::option_if_let_else \
-A clippy::multiple-crate-versions \
-A clippy::cast-possible-truncation \
-A clippy::cast-possible-wrap \
"$_extra"
done
@ -41,3 +43,5 @@ done
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
# option_if_let_else: the suggested code is usually harder to read than the original
# multiple_crate_versions: cached uses an old version of hashbrown :c
# cast-possible-truncation: only ever used where it would be totally fine
# cast-possible-wrap: ditto

View file

@ -1,6 +1,6 @@
use std::path::{Path, PathBuf};
use mime_guess::Mime;
use mime::Mime;
use crate::inspectors::mime_extension_lookup;
use crate::string_type::String;

View file

@ -33,10 +33,6 @@ macro_rules! writables {
$crate::formats::Writable::Newline
};
(@do Space) => {
$crate::formats::Writable::Space
};
(@do $arg:expr) => {
$arg.into()
}
@ -57,7 +53,6 @@ type Entries<'a> = [Result<Findings, ScanError<'a>>];
pub enum Writable<'a> {
String(&'a str),
Path(&'a Path),
Space,
Newline,
}
@ -80,7 +75,6 @@ fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
// ehhhh
for writeable in writeables {
match writeable {
Writable::Space => write!(f, " ")?,
Writable::Newline => {
cfg_if! {
if #[cfg(windows)] {
@ -189,7 +183,7 @@ impl Format for Shell {
impl FormatSteps for Shell {
fn rename<W: Write>(&self, f: &mut W, from: &Path, to: &Path) -> io::Result<()> {
smart_write(f, writablesln!("mv -v -i -- ", from, Space, to))
smart_write(f, writablesln!("mv -v -i -- ", from, "\t", to))
}
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {

View file

@ -7,7 +7,7 @@ use std::path::Path;
use std::str::FromStr;
use cached::cached;
use mime_guess::Mime;
use mime::Mime;
use crate::mime_db::MimeDb;
use crate::string_type::String;
@ -33,7 +33,7 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let r = db.get_type(&buffer).filter(|mime|
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
mime != &mime_guess::mime::TEXT_XML
mime != &mime::TEXT_XML
// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.
// determining that a file is in one of the MS office formats in particular requires looking quite far into the
// file.
@ -92,13 +92,13 @@ cached! {
Some(exts) => {
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
Some(if essence == mime_guess::mime::IMAGE_JPEG.essence_str() {
Some(if essence == mime::IMAGE_JPEG.essence_str() {
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat()
} else if essence == mime_guess::mime::TEXT_XML.essence_str() || essence == "application/xml" {
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
// (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually

View file

@ -33,6 +33,7 @@ use crate::formats::Format;
use crate::mime_db::MimeDb;
use crate::parameters::{OutputFormat, ScanOpts};
use crate::utils::{clap_long_version, os_name};
use mime_guess::from_ext;
use std::collections::BTreeSet;
mod findings;
@ -217,6 +218,11 @@ fn wanted_file(
let ext = ext.to_string_lossy().to_lowercase();
let ext = ext.as_str();
if scan_opts.ignore_unknown_exts && from_ext(ext).is_empty() {
// unknown extension, skip.
return false;
}
if let Some(exts) = exts {
// only scan if the file has one of the specified extensions.
exts.contains(&ext)

View file

@ -1,7 +1,7 @@
//! Backend-neutral Mime database implementation.
use cfg_if::cfg_if;
use mime_guess::Mime;
use mime::Mime;
pub trait MimeDb {
fn init() -> Self;

View file

@ -128,6 +128,11 @@ pub struct Parameters {
/// The directory to process.
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
pub dir: PathBuf,
/// Don't rename files with extensions unknown to fif.
/// For example, with this option, fif will not rename "image.unknown" to "image.jpg"
#[clap(short = 'I', long)]
pub ignore_unknown_exts: bool,
}
fn lowercase_exts(exts: &str) -> Result<(), String> {
@ -140,6 +145,7 @@ fn lowercase_exts(exts: &str) -> Result<(), String> {
/// Further options relating to scanning.
#[derive(PartialEq, Debug)]
#[allow(clippy::struct_excessive_bools)]
pub struct ScanOpts {
/// Whether hidden files and directories should be scanned.
pub hidden: bool,
@ -147,6 +153,8 @@ pub struct ScanOpts {
pub extensionless: bool,
/// Should symlinks be followed?
pub follow_symlinks: bool,
/// Whether to rename files with unknown extensions.
pub ignore_unknown_exts: bool,
}
impl Parameters {
@ -214,6 +222,7 @@ impl Parameters {
hidden: self.scan_hidden,
extensionless: self.scan_extensionless,
follow_symlinks: self.follow_symlinks,
ignore_unknown_exts: self.ignore_unknown_exts,
}
}

View file

@ -7,8 +7,7 @@ use crate::{scan_directory, scan_from_walkdir};
use crate::parameters::Parameters;
use clap::Clap;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime;
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use crate::parameters::ExtensionSet;
use std::collections::HashMap;
@ -101,6 +100,7 @@ fn simple_directory() {
files.insert("test.pdf", PDF_BYTES);
files.insert("test.zip", ZIP_BYTES);
files.insert("wrong.jpg", PNG_BYTES);
files.insert("ignore.fake_ext", ZIP_BYTES);
let dir = tempdir().expect("Failed to create temporary directory.");
set_current_dir(dir.path()).expect("Failed to change directory.");
@ -118,11 +118,13 @@ fn simple_directory() {
hidden: true,
extensionless: false,
follow_symlinks: false,
ignore_unknown_exts: true,
};
let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed.");
assert_eq!(entries.len(), files.len());
// there should be one file missing: "ignore.fake_ext"
assert_eq!(entries.len(), files.len() - 1);
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
crate::init_db();
@ -210,6 +212,7 @@ fn argument_parsing() {
hidden: false,
extensionless: false,
follow_symlinks: true,
ignore_unknown_exts: false,
},
"ScanOpts are incorrect"
);
@ -330,7 +333,10 @@ fn identify_random_bytes() {
for (mime, count) in &results {
println!("{}:\t{} counts", mime, count);
}
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>());
println!(
"No type found:\t{} counts",
results.values().len() as i32 - results.values().sum::<i32>()
);
}
#[test]
@ -364,7 +370,7 @@ fn outputs_move_commands() {
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
assert!(
contents.contains("misnamed_file.jpg"),
contents.contains("misnamed_file.jpg") && contents.contains("misnamed_file.png"),
"{} output doesn't contain move command!\n===\n{}",
format,
contents
@ -430,13 +436,8 @@ fn writables_is_correct() {
use crate::writables;
assert_eq!(
&[
"henlo".into(),
Path::new("henlo").into(),
Writable::Newline,
Writable::Space
],
writables!["henlo", (Path::new("henlo")), Newline, Space]
&["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
writables!["henlo", (Path::new("henlo")), Newline]
);
}

View file

@ -54,6 +54,7 @@ def main():
sys.exit(2)
print("Done! You might want to run cargo clean...")
subprocess.run(["du", "-sh", "target"])
if __name__ == "__main__":
main()