added JSON output

This commit is contained in:
Lynne Megido 2021-05-06 08:57:42 +10:00
parent 774e72423f
commit b93e111357
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90
10 changed files with 163 additions and 29 deletions

View file

@ -39,13 +39,13 @@ stages:
- .gitlab-ci.yml
script:
cargo build --no-default-features --locked --features="$FEATURES"
cargo build --no-default-features --locked --features="json $FEATURES"
.cargo-test:
extends: .cargo-build
stage: test
script:
cargo test --no-default-features --locked --verbose --features="$FEATURES"
cargo test --no-default-features --locked --verbose --features="json $FEATURES"
clippy:
stage: lint

View file

@ -2,6 +2,10 @@
Dates are given in YYYY-MM-DD format.
## v0.3
### v0.3.1 (2021-xx-yy)
#### Features
- Added JSON output support via `-o json`
### v0.3.0 (2021-04-28)
#### Features
- Added `-x`/`--exclude` flag for excluding file extensions (overrides `-e` or `-E` - `-E images -x jpg` scans all image

51
Cargo.lock generated
View file

@ -198,6 +198,8 @@ dependencies = [
"once_cell",
"rand",
"rayon",
"serde",
"serde_json",
"smartstring",
"snailquote",
"tempfile",
@ -274,6 +276,12 @@ dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -310,9 +318,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.3.4"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "memoffset"
@ -487,9 +495,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.2.7"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85dd92e586f7355c633911e11f77f3d12f04b1b1bd76a198bd34ae3af8341ef2"
checksum = "742739e41cd49414de871ea5e549afb7e2a3ac77b589bcbebe8c82fab37147fc"
dependencies = [
"bitflags",
]
@ -534,6 +542,37 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.125"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.64"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "smartstring"
version = "0.2.6"
@ -561,9 +600,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.71"
version = "1.0.72"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad184cc9470f9117b2ac6817bfe297307418819ba40552f9b3846f05c33d5373"
checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82"
dependencies = [
"proc-macro2",
"quote",

View file

@ -18,10 +18,11 @@ exclude = [".idea/", "*.toml", "!Cargo.toml", "*.sh", "*.py", "*.yml", "*.md", "
maintenance = { status = "experimental" }
[features]
default = ["multi-threaded"]
default = ["multi-threaded", "json"]
multi-threaded = ["rayon"]
infer-backend = []
xdg-mime-backend = []
json = ["serde", "serde_json"]
[dependencies]
walkdir = "2.3.2"
@ -34,6 +35,8 @@ rayon = { version = "1.5.0", optional = true }
exitcode = "1.1.2"
cfg-if = "1.0.0"
itertools = "0.10.0"
serde = { version = "1.0", features = ["derive"], optional = true }
serde_json = { version = "1.0", optional = true }
[target.'cfg(unix)'.dependencies]
xdg-mime = "0.3.3"

View file

@ -5,6 +5,9 @@ use mime_guess::Mime;
use crate::inspectors::mime_extension_lookup;
use crate::string_type::String;
#[cfg(feature = "json")]
use serde::{Serializer, ser::SerializeStruct};
/// Information about a scanned file.
#[derive(Ord, PartialOrd, Eq, PartialEq)]
pub struct Findings<'a> {
@ -16,6 +19,20 @@ pub struct Findings<'a> {
pub mime: Mime,
}
#[cfg(feature = "json")]
impl<'a> serde::Serialize for Findings<'a> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where
S: Serializer {
// the second parameter is the number of fields in the struct -- in this case, 3
let mut state = serializer.serialize_struct("Findings", 3)?;
state.serialize_field("file", &self.file)?;
state.serialize_field("valid", &self.valid)?;
state.serialize_field("mime", &self.mime.essence_str())?;
state.end()
}
}
impl<'a> Findings<'a> {
pub fn recommended_extension(&self) -> Option<String> {
mime_extension_lookup(self.mime.clone()).map(|extensions| extensions[0].clone())

View file

@ -13,6 +13,7 @@ use crate::scan_error::ScanError;
use crate::{Findings, BACKEND};
use itertools::Itertools;
/// A macro for creating an array of `Writable`s without needing to pepper your code with `into()`s.
/// # Usage
/// ```
@ -102,9 +103,9 @@ fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
}
Writable::String(s) => write!(f, "{}", s)?,
Writable::Path(path) => {
if let Some(string) = path.to_str() {
let escaped = escape(string);
if escaped == string {
if let Some(path_str) = path.to_str() {
let escaped = escape(path_str);
if escaped.as_ref() == path_str {
// the escaped string is the same as the input - this will occur for inputs like "file.txt" which don't
// need to be escaped. however, it's Best Practice™ to escape such strings anyway, so we prefix/suffix the
// escaped string with single quotes.
@ -132,14 +133,21 @@ fn smart_write<W: Write>(f: &mut W, writeables: &[Writable]) -> io::Result<()> {
Ok(())
}
// TODO: this might need a restructure.
// it would be nice if i didn't have to write a case for every OutputFormat variant that looked like
// OutputFormat::PowerShell => PowerShell::new().write_all(...)
// also, JSON's implementation differs vastly from PowerShell and Shell's implementations. Maybe they shouldn't be
// treated as implementing the same trait, since in that case, the format trait is more of a concept rather than an
// actual definition of behaviour.
// structuring code is *hard*
pub trait Format {
fn new() -> Self;
fn rename<W: Write>(&self, f: &mut W, from: &Path, to: &Path) -> io::Result<()>;
fn no_known_extension<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()>;
fn unreadable<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()>;
fn unknown_type<W: Write>(&self, f: &mut W, path: &Path) -> io::Result<()>;
fn header<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()>;
fn footer<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()>;
fn rename<W: Write>(&self, _f: &mut W, _from: &Path, _to: &Path) -> io::Result<()> { unreachable!() }
fn no_known_extension<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()> { unreachable!() }
fn unreadable<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()> { unreachable!() }
fn unknown_type<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()> { unreachable!() }
fn header<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()> { unreachable!() }
fn footer<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()> { unreachable!() }
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
// TODO: clean this up - it's kinda messy
@ -286,3 +294,31 @@ impl Format for PowerShell {
smart_write(f, writablesln![Newline, "Write-Output 'Done!'"])
}
}
#[cfg(feature = "json")]
pub struct Json;
#[cfg(feature = "json")]
impl Format for Json {
fn new() -> Self { Self {} }
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
#[derive(serde::Serialize)]
struct SerdeEntries<'a> {
errors: &'a Vec<&'a ScanError<'a>>,
findings: &'a Vec<&'a Findings<'a>>,
}
let result = serde_json::to_writer_pretty(f, &SerdeEntries {
errors: &entries.iter().filter_map(|e| e.as_ref().err()).sorted().collect(),
findings: &entries.iter().filter_map(|f| f.as_ref().ok()).sorted().collect()
});
if let Err(err) = result {
log::error!("Error while serialising: {}", err);
return Err(err.into())
}
Ok(())
}
}

View file

@ -148,6 +148,8 @@ fn main() {
let result = match args.output_format {
OutputFormat::Sh => Shell::new().write_all(&mut buffered_stdout, &results),
OutputFormat::PowerShell => PowerShell::new().write_all(&mut buffered_stdout, &results),
#[cfg(feature = "json")]
OutputFormat::Json => formats::Json::new().write_all(&mut buffered_stdout, &results),
OutputFormat::Text => todo!(),
};

View file

@ -24,6 +24,9 @@ pub enum OutputFormat {
PowerShell,
/// Plain text.
Text,
/// JSON.
#[cfg(feature = "json")]
Json,
}
// TODO: convert this to macro style?: https://docs.rs/clap/3.0.0-beta.2/clap/index.html#using-macros

View file

@ -1,7 +1,9 @@
use std::fmt::{Display, Formatter, Result};
use std::fmt::{Display, Formatter};
use std::path::Path;
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
#[cfg_attr(feature = "json", derive(serde::Serialize))]
#[cfg_attr(feature = "json", serde(tag = "type", content = "path"))]
pub enum ScanError<'a> {
/// Something went wrong while trying to read the given file.
File(&'a Path),
@ -10,7 +12,7 @@ pub enum ScanError<'a> {
}
impl<'a> Display for ScanError<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Couldn't {} file: {}",

View file

@ -335,20 +335,18 @@ fn outputs_move_commands() {
valid: false,
mime: IMAGE_JPEG,
})];
for format in ["Shell", "PowerShell"].iter() {
for format in &["Shell", "PowerShell"] {
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
match *format {
"Shell" => Shell::new()
.write_all(&mut cursor, &entries)
.expect("Failed to write to cursor"),
.write_all(&mut cursor, &entries),
"PowerShell" => PowerShell::new()
.write_all(&mut cursor, &entries)
.expect("Failed to write to cursor"),
.write_all(&mut cursor, &entries),
_ => unreachable!()
}
}.expect("Failed to write to cursor");
cursor.set_position(0);
cursor
@ -358,11 +356,41 @@ fn outputs_move_commands() {
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
assert!(
contents.contains("misnamed_file.jpg"),
"{} output doesn't contain move command!",
format
"{} output doesn't contain move command!\n===\n{}",
format,
contents
)
}
}
#[test]
/// Ensure JSON output is valid.
fn test_json() {
use std::io::Read;
use crate::formats::Json;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png"),
valid: false,
mime: IMAGE_JPEG,
})];
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
Json::new().write_all(&mut cursor, &entries).expect("Failed to write to cursor");
cursor.set_position(0);
cursor
.read_to_string(&mut contents)
.expect("Failed to read from cursor to string");
// the output should contain the file's mime type
assert!(
contents.contains(IMAGE_JPEG.essence_str()),
"JSON output doesn't contain move command!\n===\n{}",
contents
)
}
#[test]