rudimentary script output, lots of small stuff, performance improvements
This commit is contained in:
parent
9be33cd90f
commit
82bdbebec5
6 changed files with 155 additions and 172 deletions
175
Cargo.lock
generated
175
Cargo.lock
generated
|
@ -15,17 +15,6 @@ dependencies = [
|
|||
"event-listener",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-trait"
|
||||
version = "0.1.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8d3a45e77e34375a7923b1e8febb049bb011f064714a8e17a1a616fef01da13d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
|
@ -56,10 +45,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "5e2afe73808fbaac302e39c9754bfc3c4b4d0f99c9c240b9f4e4efc841ad1b74"
|
||||
dependencies = [
|
||||
"async-mutex",
|
||||
"async-trait",
|
||||
"cached_proc_macro",
|
||||
"cached_proc_macro_types",
|
||||
"futures",
|
||||
"hashbrown",
|
||||
"once_cell",
|
||||
]
|
||||
|
@ -270,6 +257,7 @@ dependencies = [
|
|||
"mime_guess",
|
||||
"rayon",
|
||||
"smartstring",
|
||||
"snailquote",
|
||||
"walkdir",
|
||||
"xdg-mime",
|
||||
]
|
||||
|
@ -280,101 +268,6 @@ version = "1.0.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "futures"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da9052a1a50244d8d5aa9bf55cbc2fb6f357c86cc52e46c62ed390a7180cf150"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-executor",
|
||||
"futures-io",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-channel"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f2d31b7ec7efab6eefc7c57233bb10b847986139d88cc2f5a02a1ae6871a1846"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-core"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79e5145dde8da7d1b3892dad07a9c98fc04bc39892b1ecc9692cf53e2b780a65"
|
||||
|
||||
[[package]]
|
||||
name = "futures-executor"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9e59fdc009a4b3096bf94f740a0f2424c082521f20a9b08c5c07c48d90fd9b9"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-task",
|
||||
"futures-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-io"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28be053525281ad8259d47e4de5de657b25e7bac113458555bb4b70bc6870500"
|
||||
|
||||
[[package]]
|
||||
name = "futures-macro"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c287d25add322d9f9abdcdc5927ca398917996600182178774032e9f8258fedd"
|
||||
dependencies = [
|
||||
"proc-macro-hack",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-sink"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "caf5c69029bda2e743fddd0582d1083951d65cc9539aebf8812f36c3491342d6"
|
||||
|
||||
[[package]]
|
||||
name = "futures-task"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13de07eb8ea81ae445aca7b69f5f7bf15d7bf4912d8ca37d6645c77ae8a58d86"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "futures-util"
|
||||
version = "0.3.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "632a8cd0f2a4b3fdea1657f08bde063848c3bd00f9bbf6e256b8be78802e624b"
|
||||
dependencies = [
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-io",
|
||||
"futures-macro",
|
||||
"futures-sink",
|
||||
"futures-task",
|
||||
"memchr",
|
||||
"pin-project-lite",
|
||||
"pin-utils",
|
||||
"proc-macro-hack",
|
||||
"proc-macro-nested",
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.2"
|
||||
|
@ -530,18 +423,6 @@ version = "2.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85"
|
||||
|
||||
[[package]]
|
||||
name = "pin-project-lite"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "439697af366c49a6d0a010c56a0d97685bc140ce0d377b13a2ea2aa42d64a827"
|
||||
|
||||
[[package]]
|
||||
name = "pin-utils"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.4"
|
||||
|
@ -566,18 +447,6 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-hack"
|
||||
version = "0.5.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-nested"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.24"
|
||||
|
@ -661,12 +530,6 @@ version = "1.1.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
|
||||
|
||||
[[package]]
|
||||
name = "slab"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
|
||||
|
||||
[[package]]
|
||||
name = "smartstring"
|
||||
version = "0.2.6"
|
||||
|
@ -676,6 +539,16 @@ dependencies = [
|
|||
"static_assertions",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "snailquote"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f34b729d802f52194598858ac852c3fb3b33f6e026cd03195072ccb7bf3fc810"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "static_assertions"
|
||||
version = "1.1.0"
|
||||
|
@ -734,6 +607,26 @@ dependencies = [
|
|||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146"
|
||||
dependencies = [
|
||||
"thiserror-impl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicase"
|
||||
version = "2.6.0"
|
||||
|
@ -761,6 +654,12 @@ version = "0.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||
|
||||
[[package]]
|
||||
name = "unicode_categories"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.2"
|
||||
|
|
12
Cargo.toml
12
Cargo.toml
|
@ -19,7 +19,7 @@ smartstring = "0.2.6"
|
|||
xdg-mime = {git = "https://github.com/ebassi/xdg-mime-rs", version = "0.3"}
|
||||
mime_guess = "2.0.3"
|
||||
rayon = "1.5.0"
|
||||
cached = "0.23.0"
|
||||
snailquote = "0.3.0"
|
||||
|
||||
[dependencies.clap]
|
||||
version = "3.0.0-beta.2"
|
||||
|
@ -30,5 +30,15 @@ version = "0.8.2"
|
|||
default-features = false
|
||||
features = ["termcolor", "atty"]
|
||||
|
||||
[dependencies.cached]
|
||||
version = "0.23.0"
|
||||
default-features = false
|
||||
features = ["proc_macro"]
|
||||
|
||||
|
||||
[profile.release]
|
||||
lto = "thin"
|
||||
|
||||
# optimise dependencies, even when producing debug builds
|
||||
[profile.dev.package."*"]
|
||||
opt-level = 3
|
||||
|
|
|
@ -1,23 +1,79 @@
|
|||
use std::fmt;
|
||||
use std::fmt::Formatter;
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use crate::Findings;
|
||||
use crate::scanerror::ScanError;
|
||||
use std::path::PathBuf;
|
||||
use snailquote::escape;
|
||||
|
||||
trait Format {
|
||||
fn rename(f: &mut fmt::Formatter<'_>, from: &str, to: &str) -> fmt::Result;
|
||||
fn unreadable(f: &mut fmt::Formatter<'_>, path: &str) -> fmt::Result;
|
||||
fn unknown_type(f: &mut fmt::Formatter<'_>, path: &str) -> fmt::Result;
|
||||
type Entries = [Result<Findings, (ScanError, PathBuf)>];
|
||||
|
||||
pub trait Format {
|
||||
fn new() -> Self;
|
||||
fn rename<T: Write>(&self, f: &mut T, from: &str, to: &str) -> io::Result<()>;
|
||||
fn no_known_extension<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()>;
|
||||
fn unreadable<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()>;
|
||||
fn unknown_type<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()>;
|
||||
|
||||
fn write_all<T: Write>(&self, entries: &Entries, f: &mut T) -> io::Result<()> {
|
||||
// TODO: clean this up - it's horrifying
|
||||
for entry in entries {
|
||||
match entry {
|
||||
Ok(finding) => {
|
||||
// the file was successfully scanned, and a mimetype was detected
|
||||
if !finding.valid {
|
||||
// the file's extension is wrong!
|
||||
match finding.recommended_extension() {
|
||||
Some(ext) => {
|
||||
// there's a known extension for this mimetype!!
|
||||
self.rename(
|
||||
f,
|
||||
&finding.file.to_string_lossy(),
|
||||
&finding.file.with_extension(ext.as_str()).to_string_lossy()
|
||||
)?
|
||||
}
|
||||
None => {
|
||||
// unfortunately, there's no known extension for this mimetype :(
|
||||
self.no_known_extension(f, &finding.file.to_string_lossy())?
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
// something went wrong 0uo
|
||||
match error.0 {
|
||||
// failed to read the file
|
||||
ScanError::File => self.unreadable(f, &error.1.to_string_lossy())?,
|
||||
// file was read successfully, but we couldn't determine a mimetype
|
||||
ScanError::Mime => self.unknown_type(f, &error.1.to_string_lossy())?
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
struct Script {}
|
||||
pub struct Script {}
|
||||
impl Format for Script {
|
||||
fn rename(f: &mut Formatter<'_>, from: &str, to: &str) -> fmt::Result {
|
||||
write!(f, "mv {} {}", from, to)
|
||||
fn new() -> Self {
|
||||
return Script {}
|
||||
}
|
||||
|
||||
fn unreadable(f: &mut Formatter<'_>, path: &str) -> fmt::Result {
|
||||
write!(f, "# Failed to read {}", path)
|
||||
fn rename<T: Write>(&self, f: &mut T, from: &str, to: &str) -> io::Result<()> {
|
||||
// TODO: string escaping aaaaaaAAAAAAAAAA
|
||||
writeln!(f, "mv -v -i -- {} {}", escape(from), escape(to))
|
||||
}
|
||||
|
||||
fn unknown_type(f: &mut Formatter<'_>, path: &str) -> fmt::Result {
|
||||
write!(f, "# Failed to detect mime type for {}", path)
|
||||
fn no_known_extension<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()> {
|
||||
writeln!(f, "echo No known extension for {}!", escape(path))
|
||||
}
|
||||
|
||||
fn unreadable<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()> {
|
||||
writeln!(f, "# Failed to read {}", escape(path))
|
||||
}
|
||||
|
||||
fn unknown_type<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()> {
|
||||
writeln!(f, "# Failed to detect mime type for {}", escape(path))
|
||||
}
|
||||
}
|
38
src/main.rs
38
src/main.rs
|
@ -17,6 +17,7 @@
|
|||
mod parameters;
|
||||
mod inspectors;
|
||||
mod formats;
|
||||
mod scanerror;
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use walkdir::{WalkDir, DirEntry};
|
||||
|
@ -27,8 +28,12 @@ use log::{debug, trace, info, warn};
|
|||
use rayon::prelude::*;
|
||||
use std::fmt::{self, Display};
|
||||
use xdg_mime::SharedMimeInfo;
|
||||
use crate::parameters::OutputFormat;
|
||||
use crate::scanerror::ScanError;
|
||||
use crate::formats::{Script, Format};
|
||||
use std::io::stdout;
|
||||
|
||||
struct Findings {
|
||||
pub struct Findings {
|
||||
file: PathBuf,
|
||||
valid: bool,
|
||||
mime: Mime,
|
||||
|
@ -41,11 +46,6 @@ impl Findings {
|
|||
}
|
||||
}
|
||||
|
||||
enum ScanError {
|
||||
File,
|
||||
Mime
|
||||
}
|
||||
|
||||
impl Display for ScanError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}",
|
||||
|
@ -57,14 +57,15 @@ impl Display for ScanError {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: test if this actually works on a windows machine
|
||||
// TODO: test if this actually works on a windows machine - not there's much of a point right now, considering
|
||||
// xdg-mime-rs doesn't support windows
|
||||
#[cfg(windows)]
|
||||
fn is_hidden(entry: &DirEntry) -> bool {
|
||||
use std::os::windows::prelude::*;
|
||||
std::fs::metadata(entry) // try to get metadata for file
|
||||
.map_or(
|
||||
false, // if getting metadata/attributes fails, assume it's not hidden
|
||||
|f| f.file_attributes() & 0x2 > 0 // flag for hidden - https://docs.microsoft.com/en-us/windows/win32/fileio/file-attribute-constants
|
||||
|f| f.file_attributes() & 0x2 > 0 // flag for hidden - https://docs.microsoft.com/windows/win32/fileio/file-attribute-constants
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -102,9 +103,9 @@ fn extension_from_path(path: &Path) -> Option<String> {
|
|||
|
||||
fn scan_file(db: &SharedMimeInfo, entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> {
|
||||
// try to determine mimetype for this entry
|
||||
let result = inspectors::mime_type(&db, entry.path());
|
||||
let result = inspectors::mime_type(db, entry.path());
|
||||
|
||||
if let Err(_) = result {
|
||||
if result.is_err() {
|
||||
// an error occurred while trying to read the file
|
||||
// error!("{}: {}", entry.path().to_string_lossy(), error);
|
||||
return Err((ScanError::File, entry.path().to_path_buf()));
|
||||
|
@ -138,16 +139,15 @@ fn scan_file(db: &SharedMimeInfo, entry: &DirEntry) -> Result<Findings, (ScanErr
|
|||
valid, // make this a function
|
||||
mime: result,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
fn scan_from_walkdir(db: &SharedMimeInfo, entries: Vec<DirEntry>) -> Vec<Result<Findings, (ScanError, PathBuf)>> {
|
||||
#[cfg(feature = "multi-threaded")] {
|
||||
// rather than using a standard par_iter, split the entries into chunks of 16 first.
|
||||
// rather than using a standard par_iter, split the entries into chunks of 32 first.
|
||||
// this allows each spawned thread to handle 16 files before before closing, rather than creating a new thread for
|
||||
// each file. this leads to a pretty substantial speedup that i'm pretty substantially happy about 0u0
|
||||
entries
|
||||
.par_chunks(16) // split into chunks of 16
|
||||
.par_chunks(32) // split into chunks of 32
|
||||
.flat_map(|chunk| chunk // return Vec<...> instead of Chunk<Vec<...>>
|
||||
.iter() // iter over the chunk, which is a slice of DirEntry structs
|
||||
.map(|entry| scan_file(db, entry))
|
||||
|
@ -171,7 +171,7 @@ fn main() {
|
|||
// .format(|buf, r| writeln!(buf, "{} - {}", r.level(), r.args()))
|
||||
.format_module_path(false) // don't include module in logs, as it's not necessary
|
||||
.format_timestamp(None) // don't include timestamps (unnecessary, and the feature flag is disabled anyway)
|
||||
.target(env_logger::Target::Stdout) // log to stdout rather than stderr
|
||||
// .target(env_logger::Target::Stdout) // log to stdout rather than stderr
|
||||
.init();
|
||||
|
||||
let db = xdg_mime::SharedMimeInfo::new();
|
||||
|
@ -188,7 +188,7 @@ fn main() {
|
|||
|
||||
let results = scan_from_walkdir(&db, entries);
|
||||
|
||||
for result in results {
|
||||
for result in &results {
|
||||
match result {
|
||||
Ok(r) => {
|
||||
if !r.valid {
|
||||
|
@ -201,5 +201,13 @@ fn main() {
|
|||
}
|
||||
}
|
||||
|
||||
match args.output_format {
|
||||
OutputFormat::Script => {
|
||||
let s = Script::new();
|
||||
s.write_all(&results, &mut stdout().lock()).expect("failed to ouptput");
|
||||
},
|
||||
OutputFormat::Text => debug!("eewr")
|
||||
}
|
||||
|
||||
debug!("Done");
|
||||
}
|
||||
|
|
|
@ -1,20 +1,26 @@
|
|||
use clap::Clap;
|
||||
use clap::{Clap};
|
||||
use std::path::PathBuf;
|
||||
use smartstring::alias::String;
|
||||
use smartstring::{LazyCompact, SmartString};
|
||||
|
||||
#[derive(Clap, PartialEq, Debug)]
|
||||
pub enum OutputFormat {
|
||||
Script,
|
||||
Text
|
||||
}
|
||||
|
||||
#[derive(Clap, Debug)]
|
||||
pub struct Parameters {
|
||||
/// Only examine files with these extensions (Comma-separated list)
|
||||
#[clap(short, long, use_delimiter = true)]
|
||||
pub extensions: Option<Vec<String>>,
|
||||
pub extensions: Option<Vec<SmartString<LazyCompact>>>,
|
||||
|
||||
/// Don't skip hidden files and directories
|
||||
#[clap(short, long)]
|
||||
pub scan_hidden: bool,
|
||||
|
||||
/// Output format to use. See "--help formats" for more information.
|
||||
#[clap(short, long, default_value="script", possible_values = &["script", "text"])]
|
||||
pub output_format: String,
|
||||
#[clap(short, long, default_value="script", arg_enum)]
|
||||
pub output_format: OutputFormat,
|
||||
|
||||
/// Directory to process
|
||||
// TODO: right now this can only take a single directory - should this be improved?
|
||||
|
|
4
src/scanerror.rs
Normal file
4
src/scanerror.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
pub enum ScanError {
|
||||
File,
|
||||
Mime
|
||||
}
|
Loading…
Reference in a new issue