Compare commits

...

4 commits

Author SHA1 Message Date
e511d215be
minor bash/pwsh output changes
- bash now uses a heredoc to ensure that cheeky filenames containing linebreaks don't escape the echo string
- powershell rename-item now has the verbose flag that i didn't know existed
- there is now a newline between the error and successful output lines
2021-06-18 17:42:16 +10:00
4f78d93975
code cleanup 2021-06-18 15:36:05 +10:00
3b731a7c61
added --canonical-paths flag
Findings now takes a PathBuf instead of a reference to a Path but there's no noticeable performance change
2021-06-18 15:17:30 +10:00
3b7a26961b
i'm disabling the release job because it is probably haunted and, to my knowledge at least, nobody has ever died from being TOO paranoid about CI 2021-06-18 13:26:22 +10:00
8 changed files with 93 additions and 59 deletions

View file

@ -15,12 +15,12 @@ default:
- cargo version - cargo version
stages: stages:
- lint - lint # KNEE-DEEP IN THE DEBUG
- cache-cleanup - cache-cleanup # THE CACHES OF HELL
- build-base - build-base # INFERNAL build times
- build - build # THY CREDITS CONSUMED
- test - test # SIGKILL
- release # - release
# TEMPLATES # TEMPLATES
@ -176,6 +176,11 @@ test-nightly:
- target - target
- .cargo - .cargo
# RELEASE
# this is currently all disabled because
# a) it's gross
# b) it probably won't work and i really don't want to partake in CI tomfoolery right now, or ever
build-release: build-release:
stage: release stage: release
cache: { } cache: { }

View file

@ -2,6 +2,11 @@
Dates are given in YYYY-MM-DD format. Dates are given in YYYY-MM-DD format.
## v0.3 ## v0.3
### v0.3.3 (2021-mm-dd)
#### Features
- Added `--canonical-paths` flag for outputting canonical paths in output - for example,
`mv /home/lynne/file.jpg /home/lynne/file.mp3` instead of the default `mv file.jpg file.mp3`
### v0.3.2 (2021-06-14) ### v0.3.2 (2021-06-14)
#### Bugfixes #### Bugfixes
- Fixed PowerShell output regression introduced in v0.2.13, whoops - Fixed PowerShell output regression introduced in v0.2.13, whoops

16
Cargo.lock generated
View file

@ -429,9 +429,9 @@ dependencies = [
[[package]] [[package]]
name = "rand" name = "rand"
version = "0.8.3" version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [ dependencies = [
"libc", "libc",
"rand_chacha", "rand_chacha",
@ -451,18 +451,18 @@ dependencies = [
[[package]] [[package]]
name = "rand_core" name = "rand_core"
version = "0.6.2" version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [ dependencies = [
"getrandom", "getrandom",
] ]
[[package]] [[package]]
name = "rand_hc" name = "rand_hc"
version = "0.3.0" version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
dependencies = [ dependencies = [
"rand_core", "rand_core",
] ]
@ -494,9 +494,9 @@ dependencies = [
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.2.8" version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc" checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee"
dependencies = [ dependencies = [
"bitflags", "bitflags",
] ]

View file

@ -1,4 +1,4 @@
use std::path::Path; use std::path::{Path, PathBuf};
use mime_guess::Mime; use mime_guess::Mime;
@ -11,9 +11,9 @@ use std::fmt::{Display, Formatter};
/// Information about a scanned file. /// Information about a scanned file.
#[derive(Ord, PartialOrd, Eq, PartialEq)] #[derive(Ord, PartialOrd, Eq, PartialEq)]
pub struct Findings<'a> { pub struct Findings {
/// The location of the scanned file. /// The location of the scanned file.
pub file: &'a Path, pub file: PathBuf,
/// Whether or not the file's extension is valid for its mimetype. /// Whether or not the file's extension is valid for its mimetype.
pub valid: bool, pub valid: bool,
/// The file's mimetype. /// The file's mimetype.
@ -21,7 +21,7 @@ pub struct Findings<'a> {
} }
#[cfg(feature = "json")] #[cfg(feature = "json")]
impl<'a> serde::Serialize for Findings<'a> { impl serde::Serialize for Findings {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where where
S: Serializer, S: Serializer,
@ -36,7 +36,7 @@ impl<'a> serde::Serialize for Findings<'a> {
} }
} }
impl<'a> Findings<'a> { impl Findings {
pub fn recommended_extension(&self) -> Option<String> { pub fn recommended_extension(&self) -> Option<String> {
mime_extension_lookup(self.mime.essence_str().into()).map(|extensions| extensions[0].clone()) mime_extension_lookup(self.mime.essence_str().into()).map(|extensions| extensions[0].clone())
} }

View file

@ -51,7 +51,7 @@ macro_rules! writablesln {
} }
#[doc(hidden)] #[doc(hidden)]
type Entries<'a> = [Result<Findings<'a>, ScanError<'a>>]; type Entries<'a> = [Result<Findings, ScanError<'a>>];
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum Writable<'a> { pub enum Writable<'a> {
@ -167,11 +167,17 @@ pub trait Format {
} }
} }
if findings.len() != entries.len() {
// if these lengths aren't the same, there was at least one error
// add a blank line between the errors and commands
smart_write(f, writables![Newline])?;
}
for finding in findings { for finding in findings {
if let Some(ext) = finding.recommended_extension() { if let Some(ext) = finding.recommended_extension() {
self.rename(f, finding.file, &finding.file.with_extension(ext.as_str()))?; self.rename(f, finding.file.as_path(), &finding.file.with_extension(ext.as_str()))?;
} else { } else {
self.no_known_extension(f, finding.file)?; self.no_known_extension(f, finding.file.as_path())?;
} }
} }
@ -190,7 +196,7 @@ impl Format for Shell {
} }
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> { fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
smart_write(f, writablesln!["echo No known extension for ", path]) smart_write(f, writablesln!["cat <<- '???'", Newline, "No known extension for ", path, Newline, "???"])
} }
fn unreadable<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> { fn unreadable<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()> {
@ -233,7 +239,7 @@ impl Format for PowerShell {
// there doesn't seem to be a way to rename the file, prompting only if the target already exists. // there doesn't seem to be a way to rename the file, prompting only if the target already exists.
smart_write( smart_write(
f, f,
writablesln!["Rename-Item -Path ", from, " -NewName ", (to.file_name().unwrap())], writablesln!["Rename-Item -Verbose -Path ", from, " -NewName ", (to.file_name().unwrap())],
) )
} }
@ -322,7 +328,7 @@ impl Format for Json {
#[derive(serde::Serialize)] #[derive(serde::Serialize)]
struct SerdeEntries<'a> { struct SerdeEntries<'a> {
errors: &'a Vec<&'a ScanError<'a>>, errors: &'a Vec<&'a ScanError<'a>>,
findings: &'a Vec<&'a Findings<'a>>, findings: &'a Vec<&'a Findings>,
} }
let result = serde_json::to_writer_pretty( let result = serde_json::to_writer_pretty(

View file

@ -94,16 +94,13 @@ fn main() {
debug!("Checking files regardless of extensions"); debug!("Checking files regardless of extensions");
} }
let entries = scan_directory(&args.dir, extensions.as_ref(), excludes.as_ref(), &args.get_scan_opts()); let entries = match scan_directory(&args.dir, extensions.as_ref(), excludes.as_ref(), &args.get_scan_opts()) {
if entries.is_none() {
// no need to log anything for fatal errors - fif will already have printed something obvious like // no need to log anything for fatal errors - fif will already have printed something obvious like
// "[ERROR] /fake/path: No such file or directory (os error 2)". we can assume that if this has happened, the dir // "[ERROR] /fake/path: No such file or directory (os error 2)". we can assume that if this has happened, the dir
// given as input doesn't exist or is otherwise unreadable. // given as input doesn't exist or is otherwise unreadable.
exit(exitcode::NOINPUT); None => exit(exitcode::NOINPUT),
} Some(e) => e,
};
let entries = entries.unwrap();
if entries.is_empty() { if entries.is_empty() {
warn!("No files matching requested options found."); warn!("No files matching requested options found.");
@ -112,7 +109,7 @@ fn main() {
trace!("Found {} items to check", entries.len()); trace!("Found {} items to check", entries.len());
let results: Vec<_> = scan_from_walkdir(&entries) let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths)
.into_iter() .into_iter()
.filter( .filter(
|result| result.is_err() || !result.as_ref().unwrap().valid, |result| result.is_err() || !result.as_ref().unwrap().valid,
@ -236,27 +233,22 @@ fn extension_from_path(path: &Path) -> Option<&OsStr> { path.extension() }
/// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a /// In the event of an IO error, the returned [`ScanError`] will be of type [`ScanError::File`]. Otherwise, a
/// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be /// [`ScanError::Mime`] will be returned, meaning that the file was scanned successfully, but a mimetype could not be
/// determined. /// determined.
fn scan_file(entry: &DirEntry) -> Result<Findings, ScanError> { fn scan_file(entry: &DirEntry, canonical_paths: bool) -> Result<Findings, ScanError> {
let path = entry.path();
// try to determine mimetype for this entry // try to determine mimetype for this entry
let result = inspectors::mime_type(MIMEDB.get().unwrap(), entry.path()); let result = match inspectors::mime_type(MIMEDB.get().unwrap(), path) {
if result.is_err() {
// an error occurred while trying to read the file // an error occurred while trying to read the file
return Err(ScanError::File(entry.path())); Err(_) => return Err(ScanError::File(path)),
}
let result = result.unwrap();
if result.is_none() {
// the file was read successfully, but we were unable to determine its mimetype // the file was read successfully, but we were unable to determine its mimetype
return Err(ScanError::Mime(entry.path())); Ok(None) => return Err(ScanError::Mime(path)),
} // a mimetype was found!
Ok(Some(result)) => result,
let result = result.unwrap(); };
// set of known extensions for the given mimetype // set of known extensions for the given mimetype
let known_exts = inspectors::mime_extension_lookup(result.essence_str().into()); let known_exts = inspectors::mime_extension_lookup(result.essence_str().into());
// file extension for this particular file // file extension for this particular file
let entry_ext = extension_from_path(entry.path()); let entry_ext = extension_from_path(path);
let valid = match known_exts { let valid = match known_exts {
// there is a known set of extensions for this mimetype, and the file has an extension // there is a known set of extensions for this mimetype, and the file has an extension
@ -265,15 +257,24 @@ fn scan_file(entry: &DirEntry) -> Result<Findings, ScanError> {
Some(_) | None => false, Some(_) | None => false,
}; };
let path = if canonical_paths {
match std::fs::canonicalize(path) {
Ok(path) => path,
Err(_) => return Err(ScanError::File(entry.path())),
}
} else {
path.to_path_buf() // :c
};
Ok(Findings { Ok(Findings {
file: entry.path(), file: path,
valid, valid,
mime: result, mime: result,
}) })
} }
/// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector. /// Takes a slice of [`DirEntry`]s and calls [`scan_file`] on each one, returning the results in a vector.
fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> { fn scan_from_walkdir(entries: &[DirEntry], canonical_paths: bool) -> Vec<Result<Findings, ScanError>> {
cfg_if! { cfg_if! {
if #[cfg(feature = "multi-threaded")] { if #[cfg(feature = "multi-threaded")] {
use rayon::prelude::*; use rayon::prelude::*;
@ -284,12 +285,12 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, ScanError>> {
.flat_map(|chunk| { .flat_map(|chunk| {
chunk chunk
.iter() // iter over the chunk, which is a slice of DirEntry structs .iter() // iter over the chunk, which is a slice of DirEntry structs
.map(|entry| scan_file(entry)) .map(|entry| scan_file(entry, canonical_paths))
.collect::<Vec<_>>() .collect::<Vec<_>>()
}) })
.collect() .collect()
} else { } else {
entries.iter().map(|entry: &DirEntry| scan_file(entry)).collect() entries.iter().map(|entry: &DirEntry| scan_file(entry, canonical_paths)).collect()
} }
} }
} }

View file

@ -33,6 +33,7 @@ pub enum OutputFormat {
// TODO: convert this to macro style?: https://docs.rs/clap/3.0.0-beta.2/clap/index.html#using-macros // TODO: convert this to macro style?: https://docs.rs/clap/3.0.0-beta.2/clap/index.html#using-macros
#[derive(Clap, Debug)] #[derive(Clap, Debug)]
#[allow(clippy::struct_excessive_bools)]
#[clap( #[clap(
version = clap_version(), version = clap_version(),
long_version = clap_long_version(), long_version = clap_long_version(),
@ -116,6 +117,13 @@ pub struct Parameters {
#[clap(short, long, parse(from_occurrences), group = "verbosity")] #[clap(short, long, parse(from_occurrences), group = "verbosity")]
pub quiet: u8, pub quiet: u8,
/// Use canonical (absolute) paths in output.
/// A canonical path is the "one true path" to a given file, and is always an absolute path. While a file may have
/// many absolute paths (for example, on Windows, '\\?\C:\file.txt' and 'C:\file.txt' are both absolute paths to the
/// same file), but only one canonical path. This does not effect logged output.
#[clap(long)]
pub canonical_paths: bool,
/// The directory to process. /// The directory to process.
#[clap(name = "DIR", default_value = ".", parse(from_os_str))] #[clap(name = "DIR", default_value = ".", parse(from_os_str))]
pub dir: PathBuf, pub dir: PathBuf,

View file

@ -87,7 +87,8 @@ fn recommend_ext() {
fn simple_directory() { fn simple_directory() {
use crate::parameters::ScanOpts; use crate::parameters::ScanOpts;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::fs::File; use std::env::set_current_dir;
use std::fs::{canonicalize, File};
use std::io::Write; use std::io::Write;
use tempfile::tempdir; use tempfile::tempdir;
@ -102,6 +103,7 @@ fn simple_directory() {
files.insert("wrong.jpg", PNG_BYTES); files.insert("wrong.jpg", PNG_BYTES);
let dir = tempdir().expect("Failed to create temporary directory."); let dir = tempdir().expect("Failed to create temporary directory.");
set_current_dir(dir.path()).expect("Failed to change directory.");
for (name, bytes) in &files { for (name, bytes) in &files {
let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name)); let mut file = File::create(dir.path().join(name)).expect(&*format!("Failed to create file: {}", name));
@ -125,15 +127,22 @@ fn simple_directory() {
// initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present. // initialise global mime DB - this is needed because `scan_from_walkdir` expects it to be present.
crate::init_db(); crate::init_db();
let results = scan_from_walkdir(&entries); let results = scan_from_walkdir(&entries, false);
for result in results { let canonical_results = scan_from_walkdir(&entries, true);
assert_eq!(results.len(), canonical_results.len());
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test. // there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
let result = result.expect("Error while scanning file"); let result = result.as_ref().expect("Error while scanning file");
let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
// paths should be canonical
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
if !result.valid { if !result.valid {
// the only invalid file detected should be "wrong.jpg", which is a misnamed png file // the only invalid file detected should be "wrong.jpg", which is a misnamed png file
// 1. ensure detected extension is "jpg" // 1. ensure detected extension is "jpg"
assert_eq!(extension_from_path(&*result.file).unwrap(), OsStr::new("jpg")); assert_eq!(extension_from_path(result.file.as_path()).unwrap(), OsStr::new("jpg"));
// 2. ensure detected mime type is IMAGE_PNG // 2. ensure detected mime type is IMAGE_PNG
assert_eq!(result.mime, IMAGE_PNG); assert_eq!(result.mime, IMAGE_PNG);
// 3. ensure the recommended extension for "wrong.jpg" is "png" // 3. ensure the recommended extension for "wrong.jpg" is "png"
@ -151,7 +160,7 @@ fn simple_directory() {
// make sure the guessed mimetype is correct based on the extension of the scanned file // make sure the guessed mimetype is correct based on the extension of the scanned file
// because we already know that the extensions match the mimetype (as we created these files ourselves earlier in // because we already know that the extensions match the mimetype (as we created these files ourselves earlier in
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc. // the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
let ext = extension_from_path(result.file); let ext = extension_from_path(result.file.as_path());
assert!(ext.is_some()); assert!(ext.is_some());
assert_eq!( assert_eq!(
result.mime, result.mime,
@ -333,7 +342,7 @@ fn outputs_move_commands() {
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings { let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png"), file: Path::new("misnamed_file.png").to_path_buf(),
valid: false, valid: false,
mime: IMAGE_JPEG, mime: IMAGE_JPEG,
})]; })];
@ -372,7 +381,7 @@ fn test_json() {
use std::io::Read; use std::io::Read;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings { let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png"), file: Path::new("misnamed_file.png").to_path_buf(),
valid: false, valid: false,
mime: IMAGE_JPEG, mime: IMAGE_JPEG,
})]; })];