Compare commits

..

No commits in common. "534101db8b1033371dbb35cdd6f1c242729d9acf" and "7e3efbed5c93dbcb58d20ae606712627680a2109" have entirely different histories.

9 changed files with 96 additions and 264 deletions

View file

@ -10,70 +10,24 @@ cache:
default:
before_script:
- rustc --version
- cargo version
- rustc --version
stages:
- lint
- build
- test
- version
# TEMPLATE
.cargo-build:
build:
stage: build
parallel:
matrix:
- FEATURES: [ 'xdg-mime-backend', 'infer-backend', 'multi-threaded xdg-mime-backend',
'multi-threaded infer-backend' ]
script:
cargo build --no-default-features --locked --features="$FEATURES"
- cargo build --verbose --locked
.cargo-test:
cargo-test:
stage: test
parallel:
matrix:
- FEATURES: [ 'xdg-mime-backend', 'infer-backend', 'multi-threaded xdg-mime-backend',
'multi-threaded infer-backend' ]
script:
cargo test --no-default-features --locked --verbose --features="$FEATURES"
cargo test --verbose --locked
clippy:
stage: lint
stage: test
script:
- rustup component add clippy
- cargo clippy --version
- ./clippy.sh ci
# BUILD
build-stable:
extends: .cargo-build
build-msrv:
extends: build-stable
image: "rust:1.43.0"
build-nightly:
extends: build-stable
image: "rustlang/rust:nightly"
# TEST
test-stable:
extends: .cargo-test
test-msrv:
extends: test-stable
image: "rust:1.43.0"
test-nightly:
extends: test-stable
image: "rustlang/rust:nightly"
# VERSION
fif-version:
stage: version
script:
cargo run -- -V
- ./clippy.sh

View file

@ -4,19 +4,7 @@ Dates are given in YYYY-MM-DD format.
## v0.2
### v0.2.14 (2021-xx-yy)
#### Features
- Added `-x`/`--exclude` flag for excluding file extensions (overrides `-e` or `-E` - `-E images -x jpg` scans all image
files, except ".jpg" files)
- Added `-X`/`--exclude-set` flag for excluding sets of files, with the same syntax and sets as `-E`
- In addition to supplying included extensions as a comma separated list (like `-e jpg,png`), it is now possible to
supply them through multiple uses of the `-e` flag (like `-e jpg -e png`). This also applies to `-x`
- `-e` and `-E` no longer conflict with each other, and can now be used together. For example, `-E images -e mp3`
will scan all images *and* all MP3 files
- It is now possible to specify multiple extension sets at once: `-E images,system` will scan all images and archives
#### Other
- Published my fork of ['mime_guess'] as ['new_mime_guess'], allowing it to be used properly with
[crates.io](https://crates.io)
- The `videos` extension set has been renamed to `video`, in line with `audio`. `fif --help` has actually mistakenly
referred to the set as `video` since v0.2.12! 0uo
- Added `-x`/`--exclude` flag for excluding file extensions (overrides `-e` or `-E`)
### v0.2.13 (2021-04-26)
#### Features
@ -160,5 +148,4 @@ Initial commit!
[`clap`]: https://crates.io/crates/clap
[`infer`]: https://crates.io/crates/infer
[`mime_guess`]: https://crates.io/crates/mime_guess
[`new_mime_guess`]: https://crates.io/crates/new_mime_guess
[`snailquote`]: https://crates.io/crates/snailquote
[`snailquote`]: https://crates.io/crates/snailquote

13
Cargo.lock generated
View file

@ -204,7 +204,7 @@ dependencies = [
"infer",
"itertools",
"log",
"new_mime_guess",
"mime_guess",
"once_cell",
"rayon",
"smartstring",
@ -348,10 +348,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
[[package]]
name = "new_mime_guess"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e714f72c691c7d2b344ec8dd57d7f52b59651f46b9de477fb68363f097d694ae"
name = "mime_guess"
version = "2.0.4"
source = "git+https://github.com/Lynnesbian/mime_guess#5432b3c1991372291a5e67457cc9307c85f77bd9"
dependencies = [
"mime",
"unicase",
@ -579,9 +578,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.71"
version = "1.0.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad184cc9470f9117b2ac6817bfe297307418819ba40552f9b3846f05c33d5373"
checksum = "b9505f307c872bab8eb46f77ae357c8eba1fdacead58ee5a850116b1d7f82883"
dependencies = [
"proc-macro2",
"quote",

View file

@ -26,7 +26,7 @@ xdg-mime-backend = []
[dependencies]
walkdir = "2.3.2"
log = "0.4.14"
mime_guess = { package = "new_mime_guess", version = "2.1.0" }
mime_guess = "2.0.3"
snailquote = "0.3.0"
once_cell = "1.7.2"
infer = "0.4.0"
@ -41,6 +41,10 @@ xdg-mime = "0.3.3"
[target.'cfg(not(all(target_endian = "big", target_pointer_width = "32")))'.dependencies]
smartstring = "0.2.6"
[patch.crates-io]
# forked version with many more mime types
mime_guess = { git = "https://github.com/Lynnesbian/mime_guess", version = "2.0.4" }
[dependencies.clap]
version = "3.0.0-beta.2"
default-features = false

View file

@ -1,15 +1,5 @@
#!/bin/bash
set -e
_extra=""
if [ "$1" == "ci" ]; then
# deny on warnings when running in CI
_extra="-Dwarnings"
fi
# allow find to fail
find . -name '*.rs' -exec touch "{}" \; || true
fd -e rs -x touch {}
cargo clippy --all-features --tests -- \
-W clippy::nursery \
-W clippy::perf \
@ -24,8 +14,7 @@ cargo clippy --all-features --tests -- \
-A clippy::unused_io_amount \
-A clippy::redundant_closure_for_method_calls \
-A clippy::shadow_unrelated \
-A clippy::option_if_let_else \
"$_extra"
-A clippy::option_if_let_else
# ALLOWS:
# unused_io_amount: there are two places where i want to read up to X bytes and i'm fine with getting less than that

View file

@ -101,17 +101,14 @@ cached! {
// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
[vec![String::from("jpg")], possible_exts].concat()
} else if mime == mime_guess::mime::TEXT_XML || mime == Mime::from_str("application/xml").unwrap() {
// a somewhat similar case arises with XML files - the first suggested extension is "asa", when it should
} else if mime == mime_guess::mime::TEXT_XML {
// a somewhat similar case arises with XML files - the first suggested extension is "addin", when it should
// (in my opinion) be "xml".
// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
// *are* valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read
// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"
// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered
// to have valid extensions.
// TODO: if a file is detected as application/xml, but it has an extension like "xht" which corresponds to
// "application/xhtml+xml", let it through - in other words, if it's identified as application/xml, but its
// extension is classes as application/*+xml, consider it OK
[vec![String::from("xml"), String::from("svg")], possible_exts].concat()
} else if mime == Mime::from_str("application/msword").unwrap() {

View file

@ -34,7 +34,6 @@ use crate::formats::{Format, PowerShell, Shell};
use crate::mime_db::MimeDb;
use crate::parameters::{OutputFormat, ScanOpts};
use crate::scan_error::ScanError;
use std::collections::BTreeSet;
mod findings;
mod formats;
@ -89,12 +88,7 @@ fn main() {
debug!("Checking files regardless of extensions");
}
let entries = scan_directory(
&args.dirs,
extensions.as_ref(),
excludes.as_ref(),
&args.get_scan_opts(),
);
let entries = scan_directory(&args.dirs, extensions.as_ref(), excludes.as_ref(), &args.get_scan_opts());
if entries.is_none() {
// no need to log anything for fatal errors - fif will already have printed something obvious like
@ -144,7 +138,7 @@ fn main() {
let result = match args.output_format {
OutputFormat::Sh => Shell::new().write_all(&results, &mut buffered_stdout),
OutputFormat::PowerShell => PowerShell::new().write_all(&results, &mut buffered_stdout),
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
OutputFormat::Text => todo!(),
};
@ -185,12 +179,7 @@ cfg_if! {
/// Returns `true` if a file matches the given criteria. This means checking whether the file's extension appears in
/// `exts` (if specified), potentially skipping over hidden files, and so on.
fn wanted_file(
entry: &DirEntry,
exts: Option<&BTreeSet<&str>>,
exclude: Option<&BTreeSet<&str>>,
scan_opts: &ScanOpts,
) -> bool {
fn wanted_file(entry: &DirEntry, exts: Option<&Vec<&str>>, exclude: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> bool {
if entry.depth() == 0 {
// the root directory should always be scanned.
return true;
@ -296,12 +285,7 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
/// [DirEntry]s.
fn scan_directory(
dirs: &Path,
exts: Option<&BTreeSet<&str>>,
exclude: Option<&BTreeSet<&str>>,
scan_opts: &ScanOpts,
) -> Option<Vec<DirEntry>> {
fn scan_directory(dirs: &Path, exts: Option<&Vec<&str>>, exclude: Option<&Vec<&str>>, scan_opts: &ScanOpts) -> Option<Vec<DirEntry>> {
let stepper = WalkDir::new(dirs).follow_links(scan_opts.follow_symlinks).into_iter();
let mut probably_fatal_error = false;
let entries: Vec<DirEntry> = stepper

View file

@ -3,7 +3,6 @@
use crate::string_type::String as StringType;
use cfg_if::cfg_if;
use clap::{AppSettings, Clap};
use std::collections::BTreeSet;
use std::path::PathBuf;
cfg_if! {
@ -19,8 +18,9 @@ pub enum OutputFormat {
/// A Bourne shell compatible script.
Sh,
/// A PowerShell script.
#[clap(alias = "powershell")]
PowerShell,
/// Also a PowerShell script, with different casing to allow for `fif -o powershell`.
Powershell,
/// Plain text.
Text,
}
@ -40,32 +40,26 @@ pub enum OutputFormat {
setting(AppSettings::ColoredHelp)
)]
pub struct Parameters {
// NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name -
// `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash,
// anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen
// a file extension with a comma in its name before.
/// Only examine files with these extensions.
/// Multiple extensions can be specified by either using the flag multiple times (`-e jpg -e png -e gif`), or by
/// separating them with commas (`-e jpg,png,gif`).
#[clap(short, long, use_delimiter = true, require_delimiter = true)]
/// Only examine files with these extensions (comma-separated list).
/// This argument conflicts with `-E`.
#[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")]
pub exts: Option<Vec<StringType>>,
/// Use these preset lists of extensions as the search filter (comma-separated list).
/// `media` includes all extensions from the `audio`, `video`, and `images` sets, making `-E media` equivalent to
/// `-E audio,video,images`.
#[clap(short = 'E', long, arg_enum, use_delimiter = true, require_delimiter = true)]
pub ext_set: Vec<ExtensionSet>,
/// Use a preset list of extensions as the search filter.
/// `media` includes all extensions from the `audio`, `video`, and `images` sets. This argument conflicts with `-e`.
#[clap(short = 'E', long, arg_enum, group = "extensions")]
pub ext_set: Option<ExtensionSet>,
/// Don't scan files with these extensions.
/// This option takes precedence over extensions specified with `-e` or `-E`.
#[clap(short = 'x', long, use_delimiter = true, require_delimiter = true)]
/// Don't scan files with these extensions (comma-separated list).
/// This option takes preference over files specified with -e or -E.
#[clap(
short = 'x',
long,
use_delimiter = true,
require_delimiter = true,
)]
pub exclude: Option<Vec<StringType>>,
/// Exclude files using a preset list of extensions.
/// This option takes precedence over extensions specified with `-e` or `-E`.
#[clap(short = 'X', long, arg_enum, use_delimiter = true, require_delimiter = true)]
pub exclude_set: Vec<ExtensionSet>,
/// Don't skip hidden files and directories.
/// Even if this flag is not present, fif will still recurse into a hidden root directory - for example, `fif
/// ~/.hidden` will recurse into `~/.hidden` regardless of whether or not -s was passed as an argument.
@ -111,62 +105,38 @@ pub struct ScanOpts {
impl Parameters {
/// Returns an optional vec of the extensions to be scanned - i.e., extensions specified via the `-e` or `-E` flag,
/// minus the extensions excluded with the `-x` flag; i.e., the difference between the included and excluded sets.
pub fn extensions(&self) -> Option<BTreeSet<&str>> {
if let Some(included) = self.included_extensions() {
if let Some(excluded) = self.excluded_extensions() {
// return included extensions without excluded extensions
// ...maybe i should have called them "suffixes" instead of extensions...
Some(included.into_iter().filter(|ext| !excluded.contains(ext)).collect())
} else {
// no extensions excluded - just return all included
Some(included)
}
/// minus the extensions excluded with the `-x` flag.
pub fn extensions(&self) -> Option<Vec<&str>> {
let empty_vec = vec![];
let exclude = &self.excluded_extensions().unwrap_or(empty_vec);
// TODO: bleugh
if let Some(exts) = &self.exts {
// extensions supplied like "-e png,jpg,jpeg"
Some(
exts
.iter()
.map(|ext| ext.as_str())
.filter(|ext| !exclude.contains(ext))
.collect(),
)
} else if let Some(exts) = &self.ext_set {
// extensions supplied like "-E images"
Some(
exts
.extensions()
.into_iter()
.filter(|ext| !exclude.contains(ext))
.collect(),
)
} else {
// no extensions included - return none
// neither -E nor -e was passed
None
}
}
/// Returns an optional vec of extensions that were specified by `-e` or `-E`. Note that this doesn't account for
/// extensions excluded by the exclusion flags.
pub fn included_extensions(&self) -> Option<BTreeSet<&str>> {
let mut included = BTreeSet::new();
if let Some(exts) = self.exts.as_ref() {
// -e
included.extend(exts.iter().map(|ext| ext.as_str()));
}
if !&self.ext_set.is_empty() {
// -E
included.extend(self.ext_set.iter().flat_map(|set| set.extensions()));
}
match included {
x if x.is_empty() => None,
x => Some(x),
}
}
/// Returns an optional vec of extensions that were specified by `-x` or `-X`.
pub fn excluded_extensions(&self) -> Option<BTreeSet<&str>> {
let mut excluded = BTreeSet::new();
if let Some(exclude) = self.exclude.as_ref() {
// -x
excluded.extend(exclude.iter().map(|ext| ext.as_str()));
}
if !&self.exclude_set.is_empty() {
// -X
excluded.extend(self.exclude_set.iter().flat_map(|set| set.extensions()));
}
// excluded doesn't sound like a word anymore
// tongue twister: enter X-options' excellent extension exclusion
match excluded {
x if x.is_empty() => None,
x => Some(x),
}
pub fn excluded_extensions(&self) -> Option<Vec<&str>> {
self.exclude.as_ref().map(|exclude| exclude.iter().map(|ext| ext.as_str()).collect())
}
pub const fn get_scan_opts(&self) -> ScanOpts {
@ -198,7 +168,7 @@ pub enum ExtensionSet {
/// Extensions used for audio file formats, such as `mp3`, `ogg`, `flac`, etc.
Audio,
/// Extensions used for video file formats, such as `mkv`, `mp4`, `mov`, etc.
Video,
Videos,
/// Extensions used for media file formats. This acts as a combination of the [Images](ExtensionSet::Images),
/// [Audio](ExtensionSet::Audio) and [Videos](ExtensionSet::Videos) variants.
Media,
@ -218,27 +188,24 @@ impl ExtensionSet {
match self {
Self::Images => mime_guess::get_mime_extensions_str("image/*").unwrap().to_vec(),
Self::Audio => mime_guess::get_mime_extensions_str("audio/*").unwrap().to_vec(),
Self::Video => mime_guess::get_mime_extensions_str("video/*").unwrap().to_vec(),
Self::Videos => mime_guess::get_mime_extensions_str("video/*").unwrap().to_vec(),
Self::Media => [
Self::Images.extensions(),
Self::Audio.extensions(),
Self::Video.extensions(),
Self::Videos.extensions(),
]
.concat(),
Self::Documents => vec![
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
"pages", "key", "numbers",
],
Self::Text => [mime_guess::get_mime_extensions_str("text/*").unwrap(), &["js", "pl", "csh", "sh", "bash", "zsh", "fish", "bat", "php"]].concat(),
Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(),
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
// somehow to extract extensions for compressed files from mime_guess?
Self::Archives => vec![
"zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz", "rpa", "txz", "tz2", "sea", "sitx", "z",
"cpio",
],
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz", "rpa"],
Self::System => vec![
"com", "dll", "exe", "sys", "reg", "nt", "cpl", "msi", "efi", "bio", "rcv", "mbr", "sbf", "grub", "ko",
"dylib", "pdb", "hdmp", "crash", "cab",
"dylib", "pdb", "hdmp", "crash",
],
}
}

View file

@ -5,15 +5,12 @@ use crate::mime_db::MimeDb;
use crate::string_type::String;
use crate::{extension_from_path, scan_directory, scan_from_walkdir};
use crate::parameters::Parameters;
use clap::Clap;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime;
use crate::parameters::ExtensionSet;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
use std::path::Path;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
@ -170,17 +167,17 @@ fn simple_directory() {
#[test]
/// Ensure that command line argument parsing works correctly - flags are interpreted, booleans are set, and so on.
fn argument_parsing() {
use crate::parameters::ScanOpts;
use crate::parameters::{Parameters, ScanOpts};
use clap::Clap;
// pass `-f`, which enables following symlinks, and `-E images`, which scans files with image extensions
let args: Parameters = Parameters::parse_from(vec!["fif", "-f", "-E", "images"]);
// check if "jpg" is in the list of extensions to be scanned
assert!(
args
.extensions()
.expect("args.extensions() should be Some(_)!")
.contains(&"jpg"),
assert!(args
.extensions()
.expect("args.extensions() should be Some(_)!")
.contains(&"jpg"),
"args.extensions() should contain the `images` set!"
);
@ -206,19 +203,11 @@ fn argument_parsing() {
}
#[test]
/// Ensure that `fif -e jpg dir` is interpreted as "scan for jpg files in dir" and not "scan for jpg and dir files"
fn positional_args() {
for flag in &["-x", "-e", "-X", "-E"] {
assert_eq!(
Parameters::parse_from(vec!["fif", flag, "images", "directory"]).dirs,
PathBuf::from("directory")
)
}
}
#[test]
/// Ensure the `exclude` flag (`-x`) overrides `-e` and `-E`.
/// Ensure exclude overrides `-e` and `-E`.
fn exclude_overrides() {
use crate::parameters::{Parameters};
use clap::Clap;
// pass `-E images`, which includes many image extensions, and `-x jpg,png`, which should remove "jpg" and "png" from
// the extensions list
let args: Parameters = Parameters::parse_from(vec!["fif", "-x", "jpg,png", "-E", "images"]);
@ -242,46 +231,13 @@ fn exclude_overrides() {
assert!(extensions.contains(&"jkl"));
}
#[test]
/// Ensure the `exclude_set` flag (`-X`) overrides `-e`.
fn exclude_set_overrides_includes() {
// pass `-e jpg,flac` and `-X images` -- which should produce the equivalent of `-e flag`
let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "jpg,flac", "-X", "images"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should be set!");
let mut extensions = extensions.unwrap().into_iter();
assert_eq!(extensions.next(), Some("flac"), "Extensions should contain flac!");
assert_eq!(extensions.next(), None, "Too many extensions!");
}
#[test]
/// Ensure the `exclude_set` flag (`-X`) overrides `-E`.
fn exclude_set_overrides_include_set() {
// pass `-E media` and `-X images` -- which should produce the equivalent of `-E audio,video`
let args: Parameters = Parameters::parse_from(vec!["fif", "-E", "media", "-X", "images"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should be set!");
let extensions = extensions.unwrap();
// ensure all of audio and video's extensions are here
for &ext in ExtensionSet::Audio
.extensions()
.iter()
.chain(ExtensionSet::Video.extensions().iter())
{
assert!(extensions.contains(&ext), "Extensions should contain {}!", ext)
}
// ensure all of images' extensions are excluded
for ext in ExtensionSet::Images.extensions() {
assert!(!extensions.contains(&ext), "Extensions should not contain {}!", ext)
}
}
#[test]
/// Ensure that badly formed command line arguments are rejected.
fn rejects_bad_args() {
use crate::parameters::Parameters;
use clap::Clap;
let tests = [
// Non-existent flags:
vec!["fif", "-abcdefghijklmnopqrstuvwxyz"],
@ -289,8 +245,8 @@ fn rejects_bad_args() {
vec!["fif", "-E"],
// `-E` with an invalid set:
vec!["fif", "-E", "pebis"],
// `-X` with an invalid set:
vec!["fif", "-X", "pebis"],
// `-E` and `-e`:
vec!["fif", "-E", "media", "-e", "jpg"],
// `-e` with nothing but commas:
vec!["fif", "-e", ",,,,,"],
];
@ -356,19 +312,14 @@ fn outputs_move_commands() {
#[test]
/// Ensure that the Media extension set contains all (is a superset) of Audio, Video, and Images.
fn media_contains_audio_video_images() {
use crate::parameters::ExtensionSet::{Audio, Images, Media, Video};
use crate::parameters::ExtensionSet::{Audio, Images, Media, Videos};
let media_exts = Media.extensions();
// assert every extension in the audio/video/image sets is contained in the media set
[Audio.extensions(), Video.extensions(), Images.extensions()]
[Audio.extensions(), Videos.extensions(), Images.extensions()]
.concat()
.into_iter()
.for_each(|ext| assert!(media_exts.contains(&ext)));
assert_eq!(
Parameters::parse_from(&["fif", "-E", "media"]).extensions(),
Parameters::parse_from(&["fif", "-E", "audio,video,images"]).extensions()
)
}
#[test]