Compare commits
3 commits
7a7e6c9bc6
...
0404cff8b7
Author | SHA1 | Date | |
---|---|---|---|
0404cff8b7 | |||
ddde6acdd6 | |||
b48b8d5a65 |
12 changed files with 103 additions and 33 deletions
|
@ -2,10 +2,16 @@
|
|||
Dates are given in YYYY-MM-DD format.
|
||||
|
||||
## v0.3
|
||||
### v0.3.4 (2021-mm-dd)
|
||||
### v0.3.4 (2021-08-07)
|
||||
#### Features
|
||||
- Added `-I`/`--ignore-unknown-exts` flag for ignoring files with unknown extensions - for example, if fif doesn't know
|
||||
what a ".fake" file is, setting this flag will prevent it from renaming "photo.fake" to "photo.jpg". This is useful
|
||||
for avoiding the case where fif incorrectly mislabels an obscure format it isn't aware of as something else.
|
||||
#### Other
|
||||
- Refactored `formats.rs`
|
||||
- More accurate dependency versions in `Cargo.toml` to ensure that the MSRV stays supported
|
||||
- Sister project (?) [`new-mime-guess`] is now v3.0.0, updated to 2018 edition Rust, and with some new file and MIME
|
||||
types added
|
||||
|
||||
### v0.3.3 (2021-07-07)
|
||||
#### Features
|
||||
|
|
58
Cargo.lock
generated
58
Cargo.lock
generated
|
@ -183,7 +183,7 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
|
|||
|
||||
[[package]]
|
||||
name = "fif"
|
||||
version = "0.3.3"
|
||||
version = "0.3.4"
|
||||
dependencies = [
|
||||
"cached",
|
||||
"cfg-if",
|
||||
|
@ -193,6 +193,7 @@ dependencies = [
|
|||
"infer",
|
||||
"itertools",
|
||||
"log",
|
||||
"mime",
|
||||
"new_mime_guess",
|
||||
"once_cell",
|
||||
"rand",
|
||||
|
@ -344,11 +345,13 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
|
|||
|
||||
[[package]]
|
||||
name = "new_mime_guess"
|
||||
version = "2.1.1"
|
||||
version = "3.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "991337b97f81dff759c3edabb0bc01ceac92bff6f54852853824bbe1acd969f7"
|
||||
checksum = "c8c8e990db7ba41ce2b4b3a38380a0f3385d61c55b3d4999d6e00389562a79a2"
|
||||
dependencies = [
|
||||
"mime",
|
||||
"phf",
|
||||
"phf_codegen",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
|
@ -385,6 +388,45 @@ version = "2.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85"
|
||||
|
||||
[[package]]
|
||||
name = "phf"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b21b531851d1d86a90bd62dd79be87ce2d90b7bcb6afbae07813921d6156696a"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_codegen"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f8c56073a14f772740bd86231763732f7559635215bf75df9f26f1d713d99e0"
|
||||
dependencies = [
|
||||
"phf_generator",
|
||||
"phf_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_generator"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d43f3220d96e0080cc9ea234978ccd80d904eafb17be31bb0f76daaea6493082"
|
||||
dependencies = [
|
||||
"phf_shared",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "phf_shared"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a68318426de33640f02be62b4ae8eb1261be2efbc337b60c54d845bf4484e0d9"
|
||||
dependencies = [
|
||||
"siphasher",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.10"
|
||||
|
@ -500,9 +542,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.9"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee"
|
||||
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
@ -578,6 +620,12 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "729a25c17d72b06c68cb47955d44fda88ad2d3e7d77e025663fdd69b93dd71a1"
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
version = "0.2.7"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "fif"
|
||||
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
||||
version = "0.3.3"
|
||||
version = "0.3.4"
|
||||
authors = ["Lynnesbian <lynne@bune.city>"]
|
||||
edition = "2018"
|
||||
license = "GPL-3.0-or-later"
|
||||
|
@ -27,7 +27,8 @@ json = ["serde", "serde_json"]
|
|||
[dependencies]
|
||||
walkdir = "2.3.2"
|
||||
log = "0.4.14"
|
||||
mime_guess = { package = "new_mime_guess", version = "2.1.1" }
|
||||
mime = "0.3.16"
|
||||
mime_guess = { package = "new_mime_guess", features = ["phf-map"], version = "3.0.0" }
|
||||
snailquote = "0.3.0"
|
||||
once_cell = "1.8.0"
|
||||
rayon = { version = "1.5.0", optional = true }
|
||||
|
|
|
@ -32,6 +32,8 @@ for backend in "${_backends[@]}"; do
|
|||
-A clippy::shadow_unrelated \
|
||||
-A clippy::option_if_let_else \
|
||||
-A clippy::multiple-crate-versions \
|
||||
-A clippy::cast-possible-truncation \
|
||||
-A clippy::cast-possible-wrap \
|
||||
"$_extra"
|
||||
done
|
||||
|
||||
|
@ -41,3 +43,5 @@ done
|
|||
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
|
||||
# option_if_let_else: the suggested code is usually harder to read than the original
|
||||
# multiple_crate_versions: cached uses an old version of hashbrown :c
|
||||
# cast-possible-truncation: only ever used where it would be totally fine
|
||||
# cast-possible-wrap: ditto
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
|
||||
use mime_guess::Mime;
|
||||
use mime::Mime;
|
||||
|
||||
use crate::inspectors::mime_extension_lookup;
|
||||
use crate::string_type::String;
|
||||
|
|
|
@ -33,10 +33,6 @@ macro_rules! writables {
|
|||
$crate::formats::Writable::Newline
|
||||
};
|
||||
|
||||
(@do Space) => {
|
||||
$crate::formats::Writable::Space
|
||||
};
|
||||
|
||||
(@do $arg:expr) => {
|
||||
$arg.into()
|
||||
}
|
||||
|
@ -57,7 +53,6 @@ type Entries<'a> = [Result<Findings, ScanError<'a>>];
|
|||
pub enum Writable<'a> {
|
||||
String(&'a str),
|
||||
Path(&'a Path),
|
||||
Space,
|
||||
Newline,
|
||||
}
|
||||
|
||||
|
@ -80,7 +75,6 @@ fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
|
|||
// ehhhh
|
||||
for writeable in writeables {
|
||||
match writeable {
|
||||
Writable::Space => write!(f, " ")?,
|
||||
Writable::Newline => {
|
||||
cfg_if! {
|
||||
if #[cfg(windows)] {
|
||||
|
@ -189,7 +183,7 @@ impl Format for Shell {
|
|||
|
||||
impl FormatSteps for Shell {
|
||||
fn rename<W: Write>(&self, f: &mut W, from: &Path, to: &Path) -> io::Result<()> {
|
||||
smart_write(f, writablesln!("mv -v -i -- ", from, Space, to))
|
||||
smart_write(f, writablesln!("mv -v -i -- ", from, "\t", to))
|
||||
}
|
||||
|
||||
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||
|
|
|
@ -7,7 +7,7 @@ use std::path::Path;
|
|||
use std::str::FromStr;
|
||||
|
||||
use cached::cached;
|
||||
use mime_guess::Mime;
|
||||
use mime::Mime;
|
||||
|
||||
use crate::mime_db::MimeDb;
|
||||
use crate::string_type::String;
|
||||
|
@ -33,7 +33,7 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
|
|||
let r = db.get_type(&buffer).filter(|mime|
|
||||
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
|
||||
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
|
||||
mime != &mime_guess::mime::TEXT_XML
|
||||
mime != &mime::TEXT_XML
|
||||
// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.
|
||||
// determining that a file is in one of the MS office formats in particular requires looking quite far into the
|
||||
// file.
|
||||
|
@ -92,13 +92,13 @@ cached! {
|
|||
Some(exts) => {
|
||||
let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();
|
||||
|
||||
Some(if essence == mime_guess::mime::IMAGE_JPEG.essence_str() {
|
||||
Some(if essence == mime::IMAGE_JPEG.essence_str() {
|
||||
// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
|
||||
// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
|
||||
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
|
||||
[vec![String::from("jpg")], possible_exts].concat()
|
||||
|
||||
} else if essence == mime_guess::mime::TEXT_XML.essence_str() || essence == "application/xml" {
|
||||
} else if essence == mime::TEXT_XML.essence_str() || essence == "application/xml" {
|
||||
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
|
||||
// (in my opinion) be "xml".
|
||||
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
|
||||
|
|
|
@ -33,6 +33,7 @@ use crate::formats::Format;
|
|||
use crate::mime_db::MimeDb;
|
||||
use crate::parameters::{OutputFormat, ScanOpts};
|
||||
use crate::utils::{clap_long_version, os_name};
|
||||
use mime_guess::from_ext;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
mod findings;
|
||||
|
@ -217,6 +218,11 @@ fn wanted_file(
|
|||
let ext = ext.to_string_lossy().to_lowercase();
|
||||
let ext = ext.as_str();
|
||||
|
||||
if scan_opts.ignore_unknown_exts && from_ext(ext).is_empty() {
|
||||
// unknown extension, skip.
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Some(exts) = exts {
|
||||
// only scan if the file has one of the specified extensions.
|
||||
exts.contains(&ext)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
//! Backend-neutral Mime database implementation.
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
use mime_guess::Mime;
|
||||
use mime::Mime;
|
||||
|
||||
pub trait MimeDb {
|
||||
fn init() -> Self;
|
||||
|
|
|
@ -128,6 +128,11 @@ pub struct Parameters {
|
|||
/// The directory to process.
|
||||
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
|
||||
pub dir: PathBuf,
|
||||
|
||||
/// Don't rename files with extensions unknown to fif.
|
||||
/// For example, with this option, fif will not rename "image.unknown" to "image.jpg"
|
||||
#[clap(short = 'I', long)]
|
||||
pub ignore_unknown_exts: bool,
|
||||
}
|
||||
|
||||
fn lowercase_exts(exts: &str) -> Result<(), String> {
|
||||
|
@ -140,6 +145,7 @@ fn lowercase_exts(exts: &str) -> Result<(), String> {
|
|||
|
||||
/// Further options relating to scanning.
|
||||
#[derive(PartialEq, Debug)]
|
||||
#[allow(clippy::struct_excessive_bools)]
|
||||
pub struct ScanOpts {
|
||||
/// Whether hidden files and directories should be scanned.
|
||||
pub hidden: bool,
|
||||
|
@ -147,6 +153,8 @@ pub struct ScanOpts {
|
|||
pub extensionless: bool,
|
||||
/// Should symlinks be followed?
|
||||
pub follow_symlinks: bool,
|
||||
/// Whether to rename files with unknown extensions.
|
||||
pub ignore_unknown_exts: bool,
|
||||
}
|
||||
|
||||
impl Parameters {
|
||||
|
@ -214,6 +222,7 @@ impl Parameters {
|
|||
hidden: self.scan_hidden,
|
||||
extensionless: self.scan_extensionless,
|
||||
follow_symlinks: self.follow_symlinks,
|
||||
ignore_unknown_exts: self.ignore_unknown_exts,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,8 +7,7 @@ use crate::{scan_directory, scan_from_walkdir};
|
|||
|
||||
use crate::parameters::Parameters;
|
||||
use clap::Clap;
|
||||
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
||||
use mime_guess::Mime;
|
||||
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
||||
|
||||
use crate::parameters::ExtensionSet;
|
||||
use std::collections::HashMap;
|
||||
|
@ -101,6 +100,7 @@ fn simple_directory() {
|
|||
files.insert("test.pdf", PDF_BYTES);
|
||||
files.insert("test.zip", ZIP_BYTES);
|
||||
files.insert("wrong.jpg", PNG_BYTES);
|
||||
files.insert("ignore.fake_ext", ZIP_BYTES);
|
||||
|
||||
let dir = tempdir().expect("Failed to create temporary directory.");
|
||||
set_current_dir(dir.path()).expect("Failed to change directory.");
|
||||
|
@ -118,11 +118,13 @@ fn simple_directory() {
|
|||
hidden: true,
|
||||
extensionless: false,
|
||||
follow_symlinks: false,
|
||||
ignore_unknown_exts: true,
|
||||
};
|
||||
|
||||
let entries = scan_directory(dir.path(), None, None, &scan_opts).expect("Directory scan failed.");
|
||||
|
||||
assert_eq!(entries.len(), files.len());
|
||||
// there should be one file missing: "ignore.fake_ext"
|
||||
assert_eq!(entries.len(), files.len() - 1);
|
||||
|
||||
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
|
||||
crate::init_db();
|
||||
|
@ -210,6 +212,7 @@ fn argument_parsing() {
|
|||
hidden: false,
|
||||
extensionless: false,
|
||||
follow_symlinks: true,
|
||||
ignore_unknown_exts: false,
|
||||
},
|
||||
"ScanOpts are incorrect"
|
||||
);
|
||||
|
@ -330,7 +333,10 @@ fn identify_random_bytes() {
|
|||
for (mime, count) in &results {
|
||||
println!("{}:\t{} counts", mime, count);
|
||||
}
|
||||
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>());
|
||||
println!(
|
||||
"No type found:\t{} counts",
|
||||
results.values().len() as i32 - results.values().sum::<i32>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -364,7 +370,7 @@ fn outputs_move_commands() {
|
|||
|
||||
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
|
||||
assert!(
|
||||
contents.contains("misnamed_file.jpg"),
|
||||
contents.contains("misnamed_file.jpg") && contents.contains("misnamed_file.png"),
|
||||
"{} output doesn't contain move command!\n===\n{}",
|
||||
format,
|
||||
contents
|
||||
|
@ -430,13 +436,8 @@ fn writables_is_correct() {
|
|||
use crate::writables;
|
||||
|
||||
assert_eq!(
|
||||
&[
|
||||
"henlo".into(),
|
||||
Path::new("henlo").into(),
|
||||
Writable::Newline,
|
||||
Writable::Space
|
||||
],
|
||||
writables!["henlo", (Path::new("henlo")), Newline, Space]
|
||||
&["henlo".into(), Path::new("henlo").into(), Writable::Newline,],
|
||||
writables!["henlo", (Path::new("henlo")), Newline]
|
||||
);
|
||||
}
|
||||
|
||||
|
|
1
test.py
1
test.py
|
@ -54,6 +54,7 @@ def main():
|
|||
sys.exit(2)
|
||||
|
||||
print("Done! You might want to run cargo clean...")
|
||||
subprocess.run(["du", "-sh", "target"])
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Reference in a new issue