documentation!!! ≥50% coverage!!! wow!!!!!

This commit is contained in:
Lynne Megido 2021-03-01 00:06:05 +10:00
parent e294f56ecf
commit 31aaa80701
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
10 changed files with 94 additions and 30 deletions

View file

@ -3,6 +3,13 @@
<component name="CsvFileAttributes"> <component name="CsvFileAttributes">
<option name="attributeMap"> <option name="attributeMap">
<map> <map>
<entry key="/src/inspectors.rs">
<value>
<Attribute>
<option name="separator" value="&#9;" />
</Attribute>
</value>
</entry>
<entry key="/src/main.rs"> <entry key="/src/main.rs">
<value> <value>
<Attribute> <Attribute>

View file

@ -1,32 +1,42 @@
//! Sets of extensions for use with [Parameter](crate::parameters::Parameters)'s `-E` flag.
use clap::Clap; use clap::Clap;
#[derive(Clap, PartialEq, Debug)] #[derive(Clap, PartialEq, Debug)]
pub enum ExtensionSet { pub enum ExtensionSet {
/// Extensions used for image file formats, such as `png`, `jpeg`, `webp`, etc.
Images, Images,
/// Extensions used for audio file formats, such as `mp3`, `ogg`, `flac`, etc.
Audio, Audio,
/// Extensions used for video file formats, such as `mkv`, `mp4`, `mov`, etc.
Videos, Videos,
/// Extensions used for media file formats. This acts as a combination of the [Images](ExtensionSet::Images),
/// [Audio](ExtensionSet::Audio) and [Videos](ExtensionSet::Videos) variants.
Media, Media,
/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
Documents, Documents,
/// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc.
Archives, Archives,
} }
impl ExtensionSet { impl ExtensionSet {
/// The list of known extensions for this ExtensionSet.
pub fn extensions(&self) -> Vec<&str> { pub fn extensions(&self) -> Vec<&str> {
match self { match self {
Self::Images => mime_guess::get_mime_extensions_str("image/*"), Self::Images => mime_guess::get_mime_extensions_str("image/*").unwrap().to_vec(),
Self::Videos => mime_guess::get_mime_extensions_str("video/*"), Self::Audio => mime_guess::get_mime_extensions_str("audio/*").unwrap().to_vec(),
Self::Audio => mime_guess::get_mime_extensions_str("audio/*"), Self::Videos => mime_guess::get_mime_extensions_str("video/*").unwrap().to_vec(),
Self::Documents => Some( Self::Media => [
&[ Self::Images.extensions(),
Self::Audio.extensions(),
Self::Videos.extensions(),
]
.concat(),
Self::Documents => vec![
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps", "pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
][..], ],
),
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used // many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
// somehow to extract extensions for compressed files from mime_guess? // somehow to extract extensions for compressed files from mime_guess?
Self::Archives => Some(&["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"][..]), Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"],
_ => todo!(),
} }
.unwrap()
.to_vec()
} }
} }

View file

@ -5,9 +5,13 @@ use smartstring::alias::String;
use crate::inspectors::mime_extension_lookup; use crate::inspectors::mime_extension_lookup;
/// Information about a scanned file.
pub struct Findings { pub struct Findings {
/// The location of the scanned file.
pub file: PathBuf, // TODO: replace with Path???? <'a> and all that pub file: PathBuf, // TODO: replace with Path???? <'a> and all that
/// Whether or not the file's extension is valid for its mimetype.
pub valid: bool, pub valid: bool,
/// The file's mimetype.
pub mime: Mime, pub mime: Mime,
} }

View file

@ -1,3 +1,5 @@
//! The various formats that [fif](crate) can output to.
use std::io::{self, Write}; use std::io::{self, Write};
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
@ -8,8 +10,10 @@ use snailquote::escape;
use crate::scanerror::ScanError; use crate::scanerror::ScanError;
use crate::{Findings, BACKEND}; use crate::{Findings, BACKEND};
/// The current version of fif, as defined in Cargo.toml.
const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); const VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION");
#[doc(hidden)]
type Entries = [Result<Findings, (ScanError, PathBuf)>]; type Entries = [Result<Findings, (ScanError, PathBuf)>];
enum Writable<'a> { enum Writable<'a> {
@ -97,6 +101,7 @@ pub trait Format {
} }
// TODO: maybe make a batch script version for windows // TODO: maybe make a batch script version for windows
/// Bourne-Shell compatible script.
pub struct Script {} pub struct Script {}
impl Format for Script { impl Format for Script {

View file

@ -1,3 +1,5 @@
//! Functions for getting the mime type and extension of a file.
use std::fs::File; use std::fs::File;
use std::io; use std::io;
use std::io::{Read, Seek, SeekFrom}; use std::io::{Read, Seek, SeekFrom};
@ -10,13 +12,17 @@ use smartstring::alias::String;
use crate::mimedb::MimeDb; use crate::mimedb::MimeDb;
// rather than reading once into a large buffer, it tends to be faster to first try identifying the file from a small /// The number of bytes to read initially.
// chunk read from the top, and *then* proceeding with the large buffer. many file formats can be easily identified by ///
// the first 128 bytes. of course, not all formats can, and some (OOXML...) require reading a long ways in. /// Rather than reading the entire file all at once into a [BUF_SIZE] buffer, it tends to be faster to read a small
/// chunk of the file and trying to identify that, proceeding with the larger buffer if that fails. Many file formats
/// can be identified with the first few dozen bytes, so the "happy path" will likely be taken in the majority of cases.
const INITIAL_BUF_SIZE: usize = 128; const INITIAL_BUF_SIZE: usize = 128;
/// The number of bytes to read if the file couldn't be identified from its first [INITIAL_BUF_SIZE] bytes.
const BUF_SIZE: usize = 4096; const BUF_SIZE: usize = 4096;
/// Tries to identify the mimetype of a file from a given path.
pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> { pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let mut buffer = [0; INITIAL_BUF_SIZE]; let mut buffer = [0; INITIAL_BUF_SIZE];
let mut file = File::open(path)?; let mut file = File::open(path)?;
@ -59,6 +65,10 @@ cached! {
MIMEXT; MIMEXT;
fn mime_extension_lookup(mime: Mime) -> Option<Vec<String>> = { fn mime_extension_lookup(mime: Mime) -> Option<Vec<String>> = {
// Returns a list of known extensions for this mime type, if any.
// ↑ this is supposed to be a doc comment, but the cached! macro doesn't support that... maybe i should switch to
// the derive macro
// match on the mime's `essence_str` rather than the mime itself - mime_guess::get_mime_extensions ignores the type // match on the mime's `essence_str` rather than the mime itself - mime_guess::get_mime_extensions ignores the type
// suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. passing the essence_str // suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. passing the essence_str
// (which includes the suffix) fixes this. // (which includes the suffix) fixes this.

View file

@ -31,8 +31,8 @@ use crate::formats::{Format, Script};
use crate::mimedb::MimeDb; use crate::mimedb::MimeDb;
use crate::parameters::OutputFormat; use crate::parameters::OutputFormat;
use crate::scanerror::ScanError; use crate::scanerror::ScanError;
use std::process::exit;
use env_logger::Env; use env_logger::Env;
use std::process::exit;
mod extensionset; mod extensionset;
mod findings; mod findings;
@ -44,23 +44,27 @@ mod scanerror;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
mod util;
cfg_if! { cfg_if! {
if #[cfg(any(all(not(unix), not(feature = "xdg-mime-backend")), all(unix, feature = "infer-backend")))] { if #[cfg(any(all(not(unix), not(feature = "xdg-mime-backend")), all(unix, feature = "infer-backend")))] {
/// A [OnceCell] holding an instance of [mimedb::MimeDb].
static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new(); static MIMEDB: OnceCell<mimedb::InferDb> = OnceCell::new();
/// The backend being used; either "Infer" or "XDG-Mime".
const BACKEND: &str = "Infer"; const BACKEND: &str = "Infer";
} else { } else {
/// A [OnceCell] holding an instance of [mimedb::MimeDb].
static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new(); static MIMEDB: OnceCell<mimedb::XdgDb> = OnceCell::new();
/// The backend being used; either "Infer" or "XDG-Mime".
const BACKEND: &str = "XDG-Mime"; const BACKEND: &str = "XDG-Mime";
} }
} }
#[doc(hidden)]
fn main() { fn main() {
let args: parameters::Parameters = parameters::Parameters::parse(); let args: parameters::Parameters = parameters::Parameters::parse();
let mut builder = env_logger::Builder::from_env( let mut builder = env_logger::Builder::from_env(Env::new().filter_or("RUST_LOG", "INFO"));
Env::new().filter_or("RUST_LOG", "INFO")
);
builder builder
// .format(|buf, r| writeln!(buf, "{} - {}", r.level(), r.args())) // .format(|buf, r| writeln!(buf, "{} - {}", r.level(), r.args()))
@ -138,6 +142,7 @@ fn main() {
cfg_if! { cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {
/// Determines whether or not a file is hidden by checking its win32 file attributes.
fn is_hidden(entry: &DirEntry) -> bool { fn is_hidden(entry: &DirEntry) -> bool {
use std::os::windows::prelude::*; use std::os::windows::prelude::*;
std::fs::metadata(entry.path()) // try to get metadata for file std::fs::metadata(entry.path()) // try to get metadata for file
@ -147,6 +152,7 @@ cfg_if! {
) )
} }
} else { } else {
/// Determines whether or not a file is hidden by checking for a leading full stop.
fn is_hidden(entry: &DirEntry) -> bool { fn is_hidden(entry: &DirEntry) -> bool {
entry entry
.file_name() .file_name()
@ -156,6 +162,7 @@ cfg_if! {
} }
} }
/// Returns `true` if a file matches the given criteria.
fn wanted_file(scan_hidden: bool, exts: &[&str], entry: &DirEntry) -> bool { fn wanted_file(scan_hidden: bool, exts: &[&str], entry: &DirEntry) -> bool {
if !scan_hidden && is_hidden(entry) { if !scan_hidden && is_hidden(entry) {
// skip hidden files and directories. this check is performed first because it's very lightweight. // skip hidden files and directories. this check is performed first because it's very lightweight.
@ -176,11 +183,20 @@ fn wanted_file(scan_hidden: bool, exts: &[&str], entry: &DirEntry) -> bool {
exts.contains(&ext.unwrap().to_lowercase().as_str()) exts.contains(&ext.unwrap().to_lowercase().as_str())
} }
/// Given a file path, returns its extension, using [std::path::Path::extension].
///
/// The extension is currently [converted to a lossy string](std::ffi::OsStr::to_string_lossy), although it will
/// (eventually) in future return an OsStr instead.
fn extension_from_path(path: &Path) -> Option<String> { fn extension_from_path(path: &Path) -> Option<String> {
path.extension(). // Get the path's extension path.extension(). // Get the path's extension
map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String map(|e| String::from(e.to_string_lossy())) // Convert from OsStr to String
} }
/// Inspects the given entry, returning a [Findings] on success and a tuple of [ScanError] and [PathBuf] on failure.
///
/// In the event of an IO error, the returned ScanError will be of type [ScanError::File]. Otherwise, a
/// [ScanError::Mime] will be returned, meaning that the file was scanned successfully, but a mimetype could not be
/// determined.
fn scan_file(entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> { fn scan_file(entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> {
// try to determine mimetype for this entry // try to determine mimetype for this entry
let result = inspectors::mime_type(MIMEDB.get().unwrap(), entry.path()); let result = inspectors::mime_type(MIMEDB.get().unwrap(), entry.path());
@ -219,6 +235,7 @@ fn scan_file(entry: &DirEntry) -> Result<Findings, (ScanError, PathBuf)> {
}) })
} }
/// Takes a slice of [DirEntry]s and calls [scan_file] on each one, returning the results in a vector.
fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, PathBuf)>> { fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, PathBuf)>> {
cfg_if! { cfg_if! {
if #[cfg(feature = "multi-threaded")] { if #[cfg(feature = "multi-threaded")] {
@ -240,6 +257,8 @@ fn scan_from_walkdir(entries: &[DirEntry]) -> Vec<Result<Findings, (ScanError, P
} }
} }
/// Scans a given directory with [WalkDir], filters with [wanted_file], checks for errors, and returns a vector of
/// [DirEntry]s.
fn scan_directory(dirs: &PathBuf, exts: &Vec<&str>, scan_hidden: bool) -> Option<Vec<DirEntry>> { fn scan_directory(dirs: &PathBuf, exts: &Vec<&str>, scan_hidden: bool) -> Option<Vec<DirEntry>> {
let stepper = WalkDir::new(dirs).into_iter(); let stepper = WalkDir::new(dirs).into_iter();
let mut probably_fatal_error = false; let mut probably_fatal_error = false;
@ -278,6 +297,7 @@ fn scan_directory(dirs: &PathBuf, exts: &Vec<&str>, scan_hidden: bool) -> Option
} }
} }
/// Initialises [MIMEDB] with a value dependent on the current backend.
fn init_db() { fn init_db() {
cfg_if! { cfg_if! {
if #[cfg(any(all(not(unix), not(feature = "xdg-mime-backend")), all(unix, feature = "infer-backend")))] { if #[cfg(any(all(not(unix), not(feature = "xdg-mime-backend")), all(unix, feature = "infer-backend")))] {

View file

@ -1,3 +1,5 @@
//! Backend-neutral Mime database implementation.
use cfg_if::cfg_if; use cfg_if::cfg_if;
use mime_guess::Mime; use mime_guess::Mime;

View file

@ -1,3 +1,5 @@
//! [Clap] struct used to parse command line arguments.
use std::path::PathBuf; use std::path::PathBuf;
use crate::extensionset::ExtensionSet; use crate::extensionset::ExtensionSet;
@ -10,6 +12,8 @@ pub enum OutputFormat {
Text, Text,
} }
// TODO: convert this to macro style: https://docs.rs/clap/3.0.0-beta.2/clap/index.html#using-macros
#[derive(Clap, Debug)] #[derive(Clap, Debug)]
#[clap(version = option_env!("CARGO_PKG_VERSION").unwrap_or("???"))] #[clap(version = option_env!("CARGO_PKG_VERSION").unwrap_or("???"))]
pub struct Parameters { pub struct Parameters {

View file

@ -2,7 +2,9 @@ use std::fmt::{Display, Formatter, Result};
#[derive(Debug)] #[derive(Debug)]
pub enum ScanError { pub enum ScanError {
/// Something went wrong while trying to read the given file.
File, File,
/// Failed to determine the mimetype of the given file.
Mime, Mime,
} }

View file

@ -2,13 +2,13 @@ use crate::inspectors::mime_extension_lookup;
use crate::mimedb::*; use crate::mimedb::*;
use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir}; use crate::{extension_from_path, init_db, scan_directory, scan_from_walkdir};
use crate::parameters::Parameters;
use cfg_if::cfg_if; use cfg_if::cfg_if;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime; use mime_guess::Mime;
use smartstring::alias::String; use smartstring::alias::String;
use std::collections::HashMap; use std::collections::HashMap;
use std::path::Path; use std::path::Path;
use crate::parameters::Parameters;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A"; const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
@ -118,16 +118,16 @@ fn simple_directory() {
// 2. ensure mime type detected is IMAGE_PNG // 2. ensure mime type detected is IMAGE_PNG
assert_eq!(result.mime, IMAGE_PNG); assert_eq!(result.mime, IMAGE_PNG);
// 3. ensure recommended extension is in the list of known extensions for PNG files // 3. ensure recommended extension is in the list of known extensions for PNG files
assert!(mime_extension_lookup(IMAGE_PNG).unwrap().contains(&result.recommended_extension().unwrap())); assert!(mime_extension_lookup(IMAGE_PNG)
.unwrap()
.contains(&result.recommended_extension().unwrap()));
continue; continue;
} }
// check if the recommended extension for this file is in the list of known extensions for its mimetype // check if the recommended extension for this file is in the list of known extensions for its mimetype
assert!( assert!(mime_extension_lookup(result.mime.clone())
mime_extension_lookup(result.mime.clone())
.unwrap() .unwrap()
.contains(&result.recommended_extension().unwrap()) .contains(&result.recommended_extension().unwrap()));
);
// make sure the guessed mimetype is correct based on the extension of the scanned file // make sure the guessed mimetype is correct based on the extension of the scanned file
assert_eq!( assert_eq!(