From b93e111357265e6cd3dc2f880b9f579862dce242 Mon Sep 17 00:00:00 2001 From: Lynne Date: Thu, 6 May 2021 08:57:42 +1000 Subject: [PATCH] added JSON output --- .gitlab-ci.yml | 4 ++-- CHANGELOG.md | 4 ++++ Cargo.lock | 51 ++++++++++++++++++++++++++++++++++++++------ Cargo.toml | 5 ++++- src/findings.rs | 17 +++++++++++++++ src/formats.rs | 54 +++++++++++++++++++++++++++++++++++++++-------- src/main.rs | 2 ++ src/parameters.rs | 3 +++ src/scan_error.rs | 6 ++++-- src/tests/mod.rs | 46 ++++++++++++++++++++++++++++++++-------- 10 files changed, 163 insertions(+), 29 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 55a5d2c..97cde77 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -39,13 +39,13 @@ stages: - .gitlab-ci.yml script: - cargo build --no-default-features --locked --features="$FEATURES" + cargo build --no-default-features --locked --features="json $FEATURES" .cargo-test: extends: .cargo-build stage: test script: - cargo test --no-default-features --locked --verbose --features="$FEATURES" + cargo test --no-default-features --locked --verbose --features="json $FEATURES" clippy: stage: lint diff --git a/CHANGELOG.md b/CHANGELOG.md index 17ebd2f..345df22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ Dates are given in YYYY-MM-DD format. ## v0.3 +### v0.3.1 (2021-xx-yy) +#### Features +- Added JSON output support via `-o json` + ### v0.3.0 (2021-04-28) #### Features - Added `-x`/`--exclude` flag for excluding file extensions (overrides `-e` or `-E` - `-E images -x jpg` scans all image diff --git a/Cargo.lock b/Cargo.lock index 89bbc36..25f1811 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -198,6 +198,8 @@ dependencies = [ "once_cell", "rand", "rayon", + "serde", + "serde_json", "smartstring", "snailquote", "tempfile", @@ -274,6 +276,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" + [[package]] name = "lazy_static" version = "1.4.0" @@ -310,9 +318,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.3.4" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" +checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" [[package]] name = "memoffset" @@ -487,9 +495,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85dd92e586f7355c633911e11f77f3d12f04b1b1bd76a198bd34ae3af8341ef2" +checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc" dependencies = [ "bitflags", ] @@ -534,6 +542,37 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "serde" +version = "1.0.125" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.125" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "smartstring" version = "0.2.6" @@ -561,9 +600,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "1.0.71" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad184cc9470f9117b2ac6817bfe297307418819ba40552f9b3846f05c33d5373" +checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index be1d1d5..34da334 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,10 +18,11 @@ exclude = [".idea/", "*.toml", "!Cargo.toml", "*.sh", "*.py", "*.yml", "*.md", " maintenance = { status = "experimental" } [features] -default = ["multi-threaded"] +default = ["multi-threaded", "json"] multi-threaded = ["rayon"] infer-backend = [] xdg-mime-backend = [] +json = ["serde", "serde_json"] [dependencies] walkdir = "2.3.2" @@ -34,6 +35,8 @@ rayon = { version = "1.5.0", optional = true } exitcode = "1.1.2" cfg-if = "1.0.0" itertools = "0.10.0" +serde = { version = "1.0", features = ["derive"], optional = true } +serde_json = { version = "1.0", optional = true } [target.'cfg(unix)'.dependencies] xdg-mime = "0.3.3" diff --git a/src/findings.rs b/src/findings.rs index 63699ea..13843d0 100644 --- a/src/findings.rs +++ b/src/findings.rs @@ -5,6 +5,9 @@ use mime_guess::Mime; use crate::inspectors::mime_extension_lookup; use crate::string_type::String; +#[cfg(feature = "json")] +use serde::{Serializer, ser::SerializeStruct}; + /// Information about a scanned file. #[derive(Ord, PartialOrd, Eq, PartialEq)] pub struct Findings<'a> { @@ -16,6 +19,20 @@ pub struct Findings<'a> { pub mime: Mime, } +#[cfg(feature = "json")] +impl<'a> serde::Serialize for Findings<'a> { + fn serialize(&self, serializer: S) -> Result where + S: Serializer { + // the second parameter is the number of fields in the struct -- in this case, 3 + let mut state = serializer.serialize_struct("Findings", 3)?; + + state.serialize_field("file", &self.file)?; + state.serialize_field("valid", &self.valid)?; + state.serialize_field("mime", &self.mime.essence_str())?; + state.end() + } +} + impl<'a> Findings<'a> { pub fn recommended_extension(&self) -> Option { mime_extension_lookup(self.mime.clone()).map(|extensions| extensions[0].clone()) diff --git a/src/formats.rs b/src/formats.rs index 96002a1..eb7622f 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -13,6 +13,7 @@ use crate::scan_error::ScanError; use crate::{Findings, BACKEND}; use itertools::Itertools; + /// A macro for creating an array of `Writable`s without needing to pepper your code with `into()`s. /// # Usage /// ``` @@ -102,9 +103,9 @@ fn smart_write(f: &mut W, writeables: &[Writable]) -> io::Result<()> { } Writable::String(s) => write!(f, "{}", s)?, Writable::Path(path) => { - if let Some(string) = path.to_str() { - let escaped = escape(string); - if escaped == string { + if let Some(path_str) = path.to_str() { + let escaped = escape(path_str); + if escaped.as_ref() == path_str { // the escaped string is the same as the input - this will occur for inputs like "file.txt" which don't // need to be escaped. however, it's Best Practiceâ„¢ to escape such strings anyway, so we prefix/suffix the // escaped string with single quotes. @@ -132,14 +133,21 @@ fn smart_write(f: &mut W, writeables: &[Writable]) -> io::Result<()> { Ok(()) } +// TODO: this might need a restructure. +// it would be nice if i didn't have to write a case for every OutputFormat variant that looked like +// OutputFormat::PowerShell => PowerShell::new().write_all(...) +// also, JSON's implementation differs vastly from PowerShell and Shell's implementations. Maybe they shouldn't be +// treated as implementing the same trait, since in that case, the format trait is more of a concept rather than an +// actual definition of behaviour. +// structuring code is *hard* pub trait Format { fn new() -> Self; - fn rename(&self, f: &mut W, from: &Path, to: &Path) -> io::Result<()>; - fn no_known_extension(&self, f: &mut W, path: &Path) -> io::Result<()>; - fn unreadable(&self, f: &mut W, path: &Path) -> io::Result<()>; - fn unknown_type(&self, f: &mut W, path: &Path) -> io::Result<()>; - fn header(&self, f: &mut W, entries: &Entries) -> io::Result<()>; - fn footer(&self, f: &mut W, entries: &Entries) -> io::Result<()>; + fn rename(&self, _f: &mut W, _from: &Path, _to: &Path) -> io::Result<()> { unreachable!() } + fn no_known_extension(&self, _f: &mut W, _path: &Path) -> io::Result<()> { unreachable!() } + fn unreadable(&self, _f: &mut W, _path: &Path) -> io::Result<()> { unreachable!() } + fn unknown_type(&self, _f: &mut W, _path: &Path) -> io::Result<()> { unreachable!() } + fn header(&self, _f: &mut W, _entries: &Entries) -> io::Result<()> { unreachable!() } + fn footer(&self, _f: &mut W, _entries: &Entries) -> io::Result<()> { unreachable!() } fn write_all(&self, f: &mut W, entries: &Entries) -> io::Result<()> { // TODO: clean this up - it's kinda messy @@ -286,3 +294,31 @@ impl Format for PowerShell { smart_write(f, writablesln![Newline, "Write-Output 'Done!'"]) } } + +#[cfg(feature = "json")] +pub struct Json; + +#[cfg(feature = "json")] +impl Format for Json { + fn new() -> Self { Self {} } + + fn write_all(&self, f: &mut W, entries: &Entries) -> io::Result<()> { + #[derive(serde::Serialize)] + struct SerdeEntries<'a> { + errors: &'a Vec<&'a ScanError<'a>>, + findings: &'a Vec<&'a Findings<'a>>, + } + + let result = serde_json::to_writer_pretty(f, &SerdeEntries { + errors: &entries.iter().filter_map(|e| e.as_ref().err()).sorted().collect(), + findings: &entries.iter().filter_map(|f| f.as_ref().ok()).sorted().collect() + }); + + if let Err(err) = result { + log::error!("Error while serialising: {}", err); + return Err(err.into()) + } + + Ok(()) + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 93832ed..684a366 100644 --- a/src/main.rs +++ b/src/main.rs @@ -148,6 +148,8 @@ fn main() { let result = match args.output_format { OutputFormat::Sh => Shell::new().write_all(&mut buffered_stdout, &results), OutputFormat::PowerShell => PowerShell::new().write_all(&mut buffered_stdout, &results), + #[cfg(feature = "json")] + OutputFormat::Json => formats::Json::new().write_all(&mut buffered_stdout, &results), OutputFormat::Text => todo!(), }; diff --git a/src/parameters.rs b/src/parameters.rs index 005bea5..7c7ad2b 100644 --- a/src/parameters.rs +++ b/src/parameters.rs @@ -24,6 +24,9 @@ pub enum OutputFormat { PowerShell, /// Plain text. Text, + /// JSON. + #[cfg(feature = "json")] + Json, } // TODO: convert this to macro style?: https://docs.rs/clap/3.0.0-beta.2/clap/index.html#using-macros diff --git a/src/scan_error.rs b/src/scan_error.rs index bf7ea8d..2865c15 100644 --- a/src/scan_error.rs +++ b/src/scan_error.rs @@ -1,7 +1,9 @@ -use std::fmt::{Display, Formatter, Result}; +use std::fmt::{Display, Formatter}; use std::path::Path; #[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] +#[cfg_attr(feature = "json", derive(serde::Serialize))] +#[cfg_attr(feature = "json", serde(tag = "type", content = "path"))] pub enum ScanError<'a> { /// Something went wrong while trying to read the given file. File(&'a Path), @@ -10,7 +12,7 @@ pub enum ScanError<'a> { } impl<'a> Display for ScanError<'a> { - fn fmt(&self, f: &mut Formatter<'_>) -> Result { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, "Couldn't {} file: {}", diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 48f193b..2a9bbf8 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -335,20 +335,18 @@ fn outputs_move_commands() { valid: false, mime: IMAGE_JPEG, })]; - - for format in ["Shell", "PowerShell"].iter() { + + for format in &["Shell", "PowerShell"] { let mut cursor = std::io::Cursor::new(Vec::new()); let mut contents = std::string::String::new(); match *format { "Shell" => Shell::new() - .write_all(&mut cursor, &entries) - .expect("Failed to write to cursor"), + .write_all(&mut cursor, &entries), "PowerShell" => PowerShell::new() - .write_all(&mut cursor, &entries) - .expect("Failed to write to cursor"), + .write_all(&mut cursor, &entries), _ => unreachable!() - } + }.expect("Failed to write to cursor"); cursor.set_position(0); cursor @@ -358,11 +356,41 @@ fn outputs_move_commands() { // the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg" assert!( contents.contains("misnamed_file.jpg"), - "{} output doesn't contain move command!", - format + "{} output doesn't contain move command!\n===\n{}", + format, + contents ) } +} +#[test] +/// Ensure JSON output is valid. +fn test_json() { + use std::io::Read; + use crate::formats::Json; + // create an example finding stating that "misnamed_file.png" has been identified as a jpeg file + let entries = vec![Ok(Findings { + file: Path::new("misnamed_file.png"), + valid: false, + mime: IMAGE_JPEG, + })]; + + let mut cursor = std::io::Cursor::new(Vec::new()); + let mut contents = std::string::String::new(); + + Json::new().write_all(&mut cursor, &entries).expect("Failed to write to cursor"); + + cursor.set_position(0); + cursor + .read_to_string(&mut contents) + .expect("Failed to read from cursor to string"); + + // the output should contain the file's mime type + assert!( + contents.contains(IMAGE_JPEG.essence_str()), + "JSON output doesn't contain move command!\n===\n{}", + contents + ) } #[test]