handle invalid utf-8 filenames properly... i think

This commit is contained in:
Lynne Megido 2021-02-14 23:58:46 +10:00
parent 32ba0176b2
commit 00de841fda
Signed by: lynnesbian
GPG Key ID: F0A184B5213D9F90
2 changed files with 60 additions and 35 deletions

View File

@ -1,51 +1,65 @@
use std::io; use std::io::{self, Write};
use std::io::Write;
use crate::Findings; use crate::Findings;
use crate::scanerror::ScanError; use crate::scanerror::ScanError;
use std::path::PathBuf; use std::path::PathBuf;
use snailquote::escape; use snailquote::escape;
#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;
#[cfg(windows)]
use std::os::windows::ffi::OsStrExt;
type Entries = [Result<Findings, (ScanError, PathBuf)>]; type Entries = [Result<Findings, (ScanError, PathBuf)>];
fn write_pathbuf<W: Write>(f: &mut W, path: &PathBuf) -> io::Result<()> {
match path.to_str() {
Some(string) => {write!(f, "{}", escape(string))}
None => {
write!(f, "'")?;
#[cfg(unix)]
f.write_all(&*path.as_os_str().as_bytes())?;
#[cfg(windows)]
f.write_all(&*path.as_os_str().encode_wide().collect())?; // TODO: TEST THIS
write!(f, "'")
}
}
}
pub trait Format { pub trait Format {
fn new() -> Self; fn new() -> Self;
fn rename<T: Write>(&self, f: &mut T, from: &str, to: &str) -> io::Result<()>; fn rename<W: Write>(&self, f: &mut W, from: &PathBuf, to: &PathBuf) -> io::Result<()>;
fn no_known_extension<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()>; fn no_known_extension<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()>;
fn unreadable<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()>; fn unreadable<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()>;
fn unknown_type<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()>; fn unknown_type<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()>;
fn write_all<T: Write>(&self, entries: &Entries, f: &mut T) -> io::Result<()> { fn write_all<W: Write>(&self, entries: &Entries, f: &mut W) -> io::Result<()> {
// TODO: clean this up - it's horrifying // TODO: clean this up - it's horrifying
for entry in entries { for entry in entries {
match entry { match entry {
Ok(finding) => { Ok(finding) => {
// the file was successfully scanned, and a mimetype was detected // the file was successfully scanned, and a mimetype was detected
if !finding.valid { if !finding.valid {
// the file's extension is wrong! // the file's extension is wrong - check for known extension
match finding.recommended_extension() { if let Some(ext) = finding.recommended_extension() {
Some(ext) => { self.rename(
// there's a known extension for this mimetype!! f,
self.rename( &finding.file,
f, &finding.file.with_extension(ext.as_str())
&finding.file.to_string_lossy(), )?
&finding.file.with_extension(ext.as_str()).to_string_lossy()
)?
}
None => {
// unfortunately, there's no known extension for this mimetype :(
self.no_known_extension(f, &finding.file.to_string_lossy())?
}
}
} else {
self.no_known_extension(f, &finding.file)?
}
} }
} }
Err(error) => { Err(error) => {
// something went wrong 0uo // something went wrong 0uo
match error.0 { match error.0 {
// failed to read the file // failed to read the file
ScanError::File => self.unreadable(f, &error.1.to_string_lossy())?, ScanError::File => self.unreadable(f, &error.1)?,
// file was read successfully, but we couldn't determine a mimetype // file was read successfully, but we couldn't determine a mimetype
ScanError::Mime => self.unknown_type(f, &error.1.to_string_lossy())? ScanError::Mime => self.unknown_type(f, &error.1)?
} }
} }
} }
@ -56,24 +70,35 @@ pub trait Format {
pub struct Script {} pub struct Script {}
impl Format for Script { impl Format for Script {
// TODO: begin write_all output with "#!/bin/sh" or w/e
fn new() -> Self { fn new() -> Self {
return Script {} return Script {}
} }
fn rename<T: Write>(&self, f: &mut T, from: &str, to: &str) -> io::Result<()> { fn rename<W: Write>(&self, f: &mut W, from: &PathBuf, to: &PathBuf) -> io::Result<()> {
// TODO: string escaping aaaaaaAAAAAAAAAA // TODO: surely there's a better way...
writeln!(f, "mv -v -i -- {} {}", escape(from), escape(to)) write!(f, "mv -v -i -- ")?;
write_pathbuf(f, from)?;
write!(f, " ")?;
write_pathbuf(f, to)?;
write!(f, "\n")
} }
fn no_known_extension<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()> { fn no_known_extension<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
writeln!(f, "echo No known extension for {}!", escape(path)) write!(f, "echo No known extension for ")?;
write_pathbuf(f, path)?;
write!(f, "\n")
} }
fn unreadable<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()> { fn unreadable<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
writeln!(f, "# Failed to read {}", escape(path)) write!(f, "# Failed to read ")?;
write_pathbuf(f, path)?;
write!(f, "\n")
} }
fn unknown_type<T: Write>(&self, f: &mut T, path: &str) -> io::Result<()> { fn unknown_type<W: Write>(&self, f: &mut W, path: &PathBuf) -> io::Result<()> {
writeln!(f, "# Failed to detect mime type for {}", escape(path)) write!(f, "# Failed to detect mime type for ")?;
write_pathbuf(f, path)?;
write!(f, "\n")
} }
} }

View File

@ -28,7 +28,7 @@ use log::{debug, trace, info, warn};
use rayon::prelude::*; use rayon::prelude::*;
use std::fmt::{self, Display}; use std::fmt::{self, Display};
use xdg_mime::SharedMimeInfo; use xdg_mime::SharedMimeInfo;
use std::io::stdout; use std::io::{stdout, BufWriter};
use crate::parameters::OutputFormat; use crate::parameters::OutputFormat;
use crate::scanerror::ScanError; use crate::scanerror::ScanError;
use crate::formats::{Script, Format}; use crate::formats::{Script, Format};
@ -204,7 +204,7 @@ fn main() {
match args.output_format { match args.output_format {
OutputFormat::Script => { OutputFormat::Script => {
let s = Script::new(); let s = Script::new();
s.write_all(&results, &mut stdout().lock()).expect("failed to ouptput"); s.write_all(&results, &mut BufWriter::new(stdout().lock())).expect("failed to output");
}, },
OutputFormat::Text => debug!("eewr") OutputFormat::Text => debug!("eewr")
} }