Compare commits
No commits in common. "e511d215beec0c1f501604fa317b721937bd3b8c" and "8a91ca3ef4cf8de390dcab918a9478b45cf97057" have entirely different histories.
e511d215be
...
8a91ca3ef4
8 changed files with 59 additions and 93 deletions
|
@ -15,12 +15,12 @@ default:
|
|||
- cargo version
|
||||
|
||||
stages:
|
||||
- lint # KNEE-DEEP IN THE DEBUG
|
||||
- cache-cleanup # THE CACHES OF HELL
|
||||
- build-base # INFERNAL build times
|
||||
- build # THY CREDITS CONSUMED
|
||||
- test # SIGKILL
|
||||
# - release
|
||||
- lint
|
||||
- cache-cleanup
|
||||
- build-base
|
||||
- build
|
||||
- test
|
||||
- release
|
||||
|
||||
# TEMPLATES
|
||||
|
||||
|
@ -176,11 +176,6 @@ test-nightly:
|
|||
- target
|
||||
- .cargo
|
||||
|
||||
# RELEASE
|
||||
# this is currently all disabled because
|
||||
# a) it's gross
|
||||
# b) it probably won't work and i really don't want to partake in CI tomfoolery right now, or ever
|
||||
|
||||
build-release:
|
||||
stage: release
|
||||
cache: { }
|
||||
|
|
|
@ -2,11 +2,6 @@
|
|||
Dates are given in YYYY-MM-DD format.
|
||||
|
||||
## v0.3
|
||||
### v0.3.3 (2021-mm-dd)
|
||||
#### Features
|
||||
- Added `--canonical-paths` flag for outputting canonical paths in output - for example,
|
||||
`mv /home/lynne/file.jpg /home/lynne/file.mp3` instead of the default `mv file.jpg file.mp3`
|
||||
|
||||
### v0.3.2 (2021-06-14)
|
||||
#### Bugfixes
|
||||
- Fixed PowerShell output regression introduced in v0.2.13, whoops
|
||||
|
|
16
Cargo.lock
generated
16
Cargo.lock
generated
|
@ -429,9 +429,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.4"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
|
||||
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
|
@ -451,18 +451,18 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.3"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
|
||||
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_hc"
|
||||
version = "0.3.1"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
|
||||
checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73"
|
||||
dependencies = [
|
||||
"rand_core",
|
||||
]
|
||||
|
@ -494,9 +494,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.2.9"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee"
|
||||
checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
|
||||
use mime_guess::Mime;
|
||||
|
||||
|
@ -11,9 +11,9 @@ use std::fmt::{Display, Formatter};
|
|||
|
||||
/// Information about a scanned file.
|
||||
#[derive(Ord, PartialOrd, Eq, PartialEq)]
|
||||
pub struct Findings {
|
||||
pub struct Findings<'a> {
|
||||
/// The location of the scanned file.
|
||||
pub file: PathBuf,
|
||||
pub file: &'a Path,
|
||||
/// Whether or not the file's extension is valid for its mimetype.
|
||||
pub valid: bool,
|
||||
/// The file's mimetype.
|
||||
|
@ -21,7 +21,7 @@ pub struct Findings {
|
|||
}
|
||||
|
||||
#[cfg(feature = "json")]
|
||||
impl serde::Serialize for Findings {
|
||||
impl<'a> serde::Serialize for Findings<'a> {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
|
@ -36,7 +36,7 @@ impl serde::Serialize for Findings {
|
|||
}
|
||||
}
|
||||
|
||||
impl Findings {
|
||||
impl<'a> Findings<'a> {
|
||||
pub fn recommended_extension(&self) -> Option<String> {
|
||||
mime_extension_lookup(self.mime.essence_str().into()).map(|extensions| extensions[0].clone())
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ macro_rules! writablesln {
|
|||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
type Entries<'a> = [Result<Findings, ScanError<'a>>];
|
||||
type Entries<'a> = [Result<Findings<'a>, ScanError<'a>>];
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum Writable<'a> {
|
||||
|
@ -167,17 +167,11 @@ pub trait Format {
|
|||
}
|
||||
}
|
||||
|
||||
if findings.len() != entries.len() {
|
||||
// if these lengths aren't the same, there was at least one error
|
||||
// add a blank line between the errors and commands
|
||||
smart_write(f, writables![Newline])?;
|
||||
}
|
||||
|
||||
for finding in findings {
|
||||
if let Some(ext) = finding.recommended_extension() {
|
||||
self.rename(f, finding.file.as_path(), &finding.file.with_extension(ext.as_str()))?;
|
||||
self.rename(f, finding.file, &finding.file.with_extension(ext.as_str()))?;
|
||||
} else {
|
||||
self.no_known_extension(f, finding.file.as_path())?;
|
||||
self.no_known_extension(f, finding.file)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,7 +190,7 @@ impl Format for Shell {
|
|||
}
|
||||
|
||||
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||
smart_write(f, writablesln!["cat <<- '???'", Newline, "No known extension for ", path, Newline, "???"])
|
||||
smart_write(f, writablesln!["echo No known extension for ", path])
|
||||
}
|
||||
|
||||
fn unreadable<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
|
||||
|
@ -239,7 +233,7 @@ impl Format for PowerShell {
|
|||
// there doesn't seem to be a way to rename the file, prompting only if the target already exists.
|
||||
smart_write(
|
||||
f,
|
||||
writablesln!["Rename-Item -Verbose -Path ", from, " -NewName ", (to.file_name().unwrap())],
|
||||
writablesln!["Rename-Item -Path ", from, " -NewName ", (to.file_name().unwrap())],
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -328,7 +322,7 @@ impl Format for Json {
|
|||
#[derive(serde::Serialize)]
|
||||
struct SerdeEntries<'a> {
|
||||
errors: &'a Vec<&'a ScanError<'a>>,
|
||||
findings: &'a Vec<&'a Findings>,
|
||||
findings: &'a Vec<&'a Findings<'a>>,
|
||||
}
|
||||
|
||||
let result = serde_json::to_writer_pretty(
|
||||
|
|
53
src/main.rs
53
src/main.rs
|
@ -94,13 +94,16 @@ fn main() {
|
|||
debug!("Checking files regardless of extensions");
|
||||
}
|
||||
|
||||
let entries = match scan_directory(&args.dir, extensions.as_ref(), excludes.as_ref(), &args.get_scan_opts()) {
|
||||
let entries = scan_directory(&args.dir, extensions.as_ref(), excludes.as_ref(), &args.get_scan_opts());
|
||||
|
||||
if entries.is_none() {
|
||||
// no need to log anything for fatal errors - fif will already have printed something obvious like
|
||||
// "[ERROR] /fake/path: No such file or directory (os error 2)". we can assume that if this has happened, the dir
|
||||
// given as input doesn't exist or is otherwise unreadable.
|
||||
None => exit(exitcode::NOINPUT),
|
||||
Some(e) => e,
|
||||
};
|
||||
exit(exitcode::NOINPUT);
|
||||
}
|
||||
|
||||
let entries = entries.unwrap();
|
||||
|
||||
if entries.is_empty() {
|
||||
warn!("No files matching requested options found.");
|
||||
|
@ -109,7 +112,7 @@ fn main() {
|
|||
|
||||
trace!("Found {} items to check", entries.len());
|
||||
|
||||
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths)
|
||||
let results: Vec<_> = scan_from_walkdir(&entries)
|
||||
.into_iter()
|
||||
.filter(
|
||||
|result| result.is_err() || !result.as_ref().unwrap().valid,
|
||||
|
@ -233,22 +236,27 @@ fn extension_from_path(path: &Path) -> Option<&OsStr> { path.extension() }
|
|||
/// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a
|
||||
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be
|
||||
/// determined.
|
||||
fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
|
||||
let path = entry.path();
|
||||
fn scan_file(entry: &DirEntry) -> Result<Findings, ScanError> {
|
||||
// try to determine mimetype for this entry
|
||||
let result = match inspectors::mime_type(MIMEDB.get().unwrap(), path) {
|
||||
let result = inspectors::mime_type(MIMEDB.get().unwrap(), entry.path());
|
||||
|
||||
if result.is_err() {
|
||||
// an error occurred while trying to read the file
|
||||
Err(_) => return Err(ScanError::File(path)),
|
||||
return Err(ScanError::File(entry.path()));
|
||||
}
|
||||
|
||||
let result = result.unwrap();
|
||||
if result.is_none() {
|
||||
// the file was read successfully, but we were unable to determine its mimetype
|
||||
Ok(None) => return Err(ScanError::Mime(path)),
|
||||
// a mimetype was found!
|
||||
Ok(Some(result)) => result,
|
||||
};
|
||||
return Err(ScanError::Mime(entry.path()));
|
||||
}
|
||||
|
||||
let result = result.unwrap();
|
||||
|
||||
// set of known extensions for the given mimetype
|
||||
let known_exts = inspectors::mime_extension_lookup(result.essence_str().into());
|
||||
// file extension for this particular file
|
||||
let entry_ext = extension_from_path(path);
|
||||
let entry_ext = extension_from_path(entry.path());
|
||||
|
||||
let valid = match known_exts {
|
||||
// there is a known set of extensions for this mimetype, and the file has an extension
|
||||
|
@ -257,24 +265,15 @@ fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanEr
|
|||
Some(_) | None => false,
|
||||
};
|
||||
|
||||
let path = if canonical_paths {
|
||||
match std::fs::canonicalize(path) {
|
||||
Ok(path) => path,
|
||||
Err(_) => return Err(ScanError::File(entry.path())),
|
||||
}
|
||||
} else {
|
||||
path.to_path_buf() // :c
|
||||
};
|
||||
|
||||
Ok(Findings {
|
||||
file: path,
|
||||
file: entry.path(),
|
||||
valid,
|
||||
mime: result,
|
||||
})
|
||||
}
|
||||
|
||||
/// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector.
|
||||
fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec<Result<Findings, ScanError>> {
|
||||
fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "multi-threaded")] {
|
||||
use rayon::prelude::*;
|
||||
|
@ -285,12 +284,12 @@ fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec<Result<
|
|||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||
.map(|entry| scan_file(entry, canonical_paths))
|
||||
.map(|entry| scan_file(entry))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
entries.iter().map(|entry: &DirEntry| scan_file(entry, canonical_paths)).collect()
|
||||
entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,7 +33,6 @@ pub enum OutputFormat {
|
|||
// TODO: convert this to macro style?: https://docs.rs/clap/3.0.0-beta.2/clap/index.html#using-macros
|
||||
|
||||
#[derive(Clap, Debug)]
|
||||
#[allow(clippy::struct_excessive_bools)]
|
||||
#[clap(
|
||||
version = clap_version(),
|
||||
long_version = clap_long_version(),
|
||||
|
@ -117,13 +116,6 @@ pub struct Parameters {
|
|||
#[clap(short, long, parse(from_occurrences), group = "verbosity")]
|
||||
pub quiet: u8,
|
||||
|
||||
/// Use canonical (absolute) paths in output.
|
||||
/// A canonical path is the "one true path" to a given file, and is always an absolute path. While a file may have
|
||||
/// many absolute paths (for example, on Windows, '\\?\C:\file.txt' and 'C:\file.txt' are both absolute paths to the
|
||||
/// same file), but only one canonical path. This does not effect logged output.
|
||||
#[clap(long)]
|
||||
pub canonical_paths: bool,
|
||||
|
||||
/// The directory to process.
|
||||
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
|
||||
pub dir: PathBuf,
|
||||
|
|
|
@ -87,8 +87,7 @@ fn recommend_ext() {
|
|||
fn simple_directory() {
|
||||
use crate::parameters::ScanOpts;
|
||||
use std::borrow::Borrow;
|
||||
use std::env::set_current_dir;
|
||||
use std::fs::{canonicalize, File};
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use tempfile::tempdir;
|
||||
|
||||
|
@ -103,7 +102,6 @@ fn simple_directory() {
|
|||
files.insert("wrong.jpg", PNG_BYTES);
|
||||
|
||||
let dir = tempdir().expect("Failed to create temporary directory.");
|
||||
set_current_dir(dir.path()).expect("Failed to change directory.");
|
||||
|
||||
for (name, bytes) in &files {
|
||||
let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name));
|
||||
|
@ -127,22 +125,15 @@ fn simple_directory() {
|
|||
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
|
||||
crate::init_db();
|
||||
|
||||
let results = scan_from_walkdir(&entries, false);
|
||||
let canonical_results = scan_from_walkdir(&entries, true);
|
||||
assert_eq!(results.len(), canonical_results.len());
|
||||
|
||||
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
|
||||
let results = scan_from_walkdir(&entries);
|
||||
for result in results {
|
||||
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
|
||||
let result = result.as_ref().expect("Error while scanning file");
|
||||
let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
|
||||
|
||||
// paths should be canonical
|
||||
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
|
||||
let result = result.expect("Error while scanning file");
|
||||
|
||||
if !result.valid {
|
||||
// the only invalid file detected should be "wrong.jpg", which is a misnamed png file
|
||||
// 1. ensure detected extension is "jpg"
|
||||
assert_eq!(extension_from_path(result.file.as_path()).unwrap(), OsStr::new("jpg"));
|
||||
assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg"));
|
||||
// 2. ensure detected mime type is IMAGE_PNG
|
||||
assert_eq!(result.mime, IMAGE_PNG);
|
||||
// 3. ensure the recommended extension for "wrong.jpg" is "png"
|
||||
|
@ -160,7 +151,7 @@ fn simple_directory() {
|
|||
// make sure the guessed mimetype is correct based on the extension of the scanned file
|
||||
// because we already know that the extensions match the mimetype (as we created these files ourselves earlier in
|
||||
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
|
||||
let ext = extension_from_path(result.file.as_path());
|
||||
let ext = extension_from_path(result.file);
|
||||
assert!(ext.is_some());
|
||||
assert_eq!(
|
||||
result.mime,
|
||||
|
@ -342,7 +333,7 @@ fn outputs_move_commands() {
|
|||
|
||||
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
||||
let entries = vec![Ok(Findings {
|
||||
file: Path::new("misnamed_file.png").to_path_buf(),
|
||||
file: Path::new("misnamed_file.png"),
|
||||
valid: false,
|
||||
mime: IMAGE_JPEG,
|
||||
})];
|
||||
|
@ -381,7 +372,7 @@ fn test_json() {
|
|||
use std::io::Read;
|
||||
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
||||
let entries = vec![Ok(Findings {
|
||||
file: Path::new("misnamed_file.png").to_path_buf(),
|
||||
file: Path::new("misnamed_file.png"),
|
||||
valid: false,
|
||||
mime: IMAGE_JPEG,
|
||||
})];
|
||||
|
|
Loading…
Reference in a new issue