Compare commits

...

5 commits

Author SHA1 Message Date
d54cc8d6da
added text extension set, more test coverage, cargo update
All checks were successful
continuous-integration/drone/push Build is passing
2021-04-08 23:33:33 +10:00
0f2f408c09
added architectures test to test.py, PKGBUILD for arch distros 2021-04-07 05:20:49 +10:00
e83ac778d6
update changelog 2021-04-07 01:48:46 +10:00
be37b24705
better documentation for parameters, renamed Script to Sh 2021-04-07 01:47:40 +10:00
cb2a30f455
much better README 2021-04-07 01:47:12 +10:00
13 changed files with 234 additions and 47 deletions

1
.gitignore vendored
View file

@ -8,3 +8,4 @@ fif_*
!clippy.sh !clippy.sh
cargo-timing*.html cargo-timing*.html
todo.txt todo.txt
/pkg/fif.spec

View file

@ -10,6 +10,27 @@
</Attribute> </Attribute>
</value> </value>
</entry> </entry>
<entry key="/README.md">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/pkg/PKGBUILD">
<value>
<Attribute>
<option name="separator" value="&#9;" />
</Attribute>
</value>
</entry>
<entry key="/pkg/fif.spec">
<value>
<Attribute>
<option name="separator" value=":" />
</Attribute>
</value>
</entry>
<entry key="/src/formats.rs"> <entry key="/src/formats.rs">
<value> <value>
<Attribute> <Attribute>

View file

@ -13,6 +13,7 @@
<excludeFolder url="file://$MODULE_DIR$/old" /> <excludeFolder url="file://$MODULE_DIR$/old" />
<excludeFolder url="file://$MODULE_DIR$/awful" /> <excludeFolder url="file://$MODULE_DIR$/awful" />
<excludeFolder url="file://$MODULE_DIR$/.mypy_cache" /> <excludeFolder url="file://$MODULE_DIR$/.mypy_cache" />
<excludeFolder url="file://$MODULE_DIR$/pkg" />
</content> </content>
<orderEntry type="inheritedJdk" /> <orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />

View file

@ -2,7 +2,15 @@
Dates are given in YYYY-MM-DD format. Dates are given in YYYY-MM-DD format.
## v0.2 ## v0.2
### v0.2.11 (0201-) ### v0.2.12 (2021-???)
- Much better README.md
- Better documentation for command line arguments
- Added more stuff to test.py
- PKGBUILD for Arch-based distros
- Added Text extension set
- More test coverage
### v0.2.11 (2021-04-04)
#### Features #### Features
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag - fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension - Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension

8
Cargo.lock generated
View file

@ -284,9 +284,9 @@ dependencies = [
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.92" version = "0.2.93"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714" checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
[[package]] [[package]]
name = "log" name = "log"
@ -549,9 +549,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.68" version = "1.0.69"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87" checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",

View file

@ -10,7 +10,7 @@ repository = "https://git.bune.city/lynnesbian/fif"
readme = "README.md" readme = "README.md"
keywords = ["mime", "mimetype", "utilities", "tools"] keywords = ["mime", "mimetype", "utilities", "tools"]
categories = ["command-line-utilities"] categories = ["command-line-utilities"]
exclude = [".idea/", "Cross.toml", "*.sh", "*.py", ".drone.yml"] exclude = [".idea/", "Cross.toml", "*.sh", "*.py", ".drone.yml", "pkg/"]
#resolver = "2" #resolver = "2"
#license-file = "LICENSE" #license-file = "LICENSE"
@ -67,6 +67,9 @@ fastrand = "1.4.0"
[profile.release] [profile.release]
lto = "thin" lto = "thin"
[profile.test]
opt-level = 0
# optimise dependencies, even when producing debug builds # optimise dependencies, even when producing debug builds
[profile.dev.package."*"] [profile.dev.package."*"]
opt-level = 3 opt-level = 3

View file

@ -1,45 +1,78 @@
fif fif
=== ===
[![Crates.io](https://img.shields.io/crates/v/fif.svg?style=flat-square)](https://crates.io/crates/fif) [![Version](https://img.shields.io/crates/v/fif.svg?style=flat-square)](https://crates.io/crates/fif)
[![Crates.io](https://img.shields.io/crates/l/fif.svg?style=flat-square)](https://git.bune.city/lynnesbian/fif/src/branch/master/LICENSE) [![License](https://img.shields.io/crates/l/fif.svg?style=flat-square)](https://git.bune.city/lynnesbian/fif/src/branch/master/LICENSE)
![Minimum Supported Rust Version](https://img.shields.io/badge/msrv-1.43.0-orange?style=flat-square) ![Minimum Supported Rust Version](https://img.shields.io/badge/msrv-1.43.0-orange?style=flat-square)
[![CI Status](https://drone.bune.city/api/badges/lynnesbian/fif/status.svg?style=flat-square)](https://drone.bune.city/lynnesbian/fif) [![CI Status](https://drone.bune.city/api/badges/lynnesbian/fif/status.svg?style=flat-square)](https://drone.bune.city/lynnesbian/fif)
[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
A command-line tool for detecting and optionally correcting files with incorrect extensions. A command-line tool for detecting and optionally correcting files with incorrect extensions.
fif recursively scans the given directory and outputs a shell script to fix the name of any files with incorrect
extensions. By default, fif will scan all non-hidden files in the given directory, and will ignore symlinks.
As fif prints a shell script to stdout rather than acting on the files directly, you may wish to redirect its output to
a file, e.g. `fif ~/Documents > output.sh`. You can also pipe the output directly into your shell, e.g.
`fif ~/Documents | bash`, although this is not recommended - you should look over fif's output and verify for yourself
that it's not doing anything that will give you a headache before running it.
## Installation ## Installation
### Default backend ### Cargo
```bash ```bash
cargo install --locked fif cargo install --locked fif
``` ```
### Other backends To update, simply re-run this command, or use a tool like
`fif` supports using [infer](https://crates.io/crates/infer) or [xdg-mime](https://crates.io/crates/xdg-mime) as its [cargo-update](https://github.com/nabijaczleweli/cargo-update).
backend for looking up file types. By default, xdg-mime will be used on Linux, and infer on all other systems.
xdg-mime should work on any Unixy system with [libmagic/file(1)](https://www.darwinsys.com/file/) installed, although #### Other backends
I've only tested it on Linux. infer should work on any system. `fif` supports using [`infer`](https://crates.io/crates/infer) or [`xdg-mime`](https://crates.io/crates/xdg-mime) as its
backend for looking up file types. By default, xdg-mime will be used on
[*nix systems](https://en.wikipedia.org/wiki/Unix-like) (Linux, macOS, *BSD, etc.), and infer on all other systems.
`xdg-mime` should work on any *nix system with [libmagic/file(1)](https://www.darwinsys.com/file/) installed, although
I've only tested it on Linux and FreeBSD. `infer` should work on any system.
You can override the default backend for your system at compile time like so: You can override the default backend for your system at compile time like so:
```bash ```bash
# xdg-mime # xdg-mime
cargo install --features=xdg-mime-backend cargo install fif --features=xdg-mime-backend
# infer # infer
cargo install --features=infer-backend cargo install fif --features=infer-backend
```
Of the supported backends, `xdg-mime` by far supports the most file types, as it uses the excellent [Shared MIME
Info](https://gitlab.freedesktop.org/xdg/shared-mime-info/) database, whereas `infer` uses its own baked-in database.
However, `infer` is also faster to load, if only by a few dozen milliseconds, and has no external dependencies.
#### Multithreading
It is also possible to disable multithreading by installing without default features:
```bash
cargo install fif --no-default-features
``` ```
## Usage ## Usage
See `fif --help` for more. See `fif --help` for more.
### The basics ### Logging
The simplest way to use fif looks like this: By default, fif will log any warnings and/or errors encountered during execution. The verbosity of the logging can be
modified by the `RUST_LOG` to one of: `trace`, `debug`, `info`, `warn`, `error`.
For example:
```bash ```bash
fif -E images ~/Pictures RUST_LOG=debug fif ~/Downloads
``` ```
This command will scan all of the files with extensions used by image files (.jpg, .png, etc) in your `~/Pictures` ### The basics
directory. The simplest way to use fif looks like this:
```bash
fif ~/Downloads
```
This command will scan all non-hidden files in your `~/Downloads` directory.
You can also manually specify a set of extensions to use: You can also manually specify a set of extensions to use:
@ -47,11 +80,26 @@ You can also manually specify a set of extensions to use:
fif -e jpeg,jpg,zip,docx ~/Documents fif -e jpeg,jpg,zip,docx ~/Documents
``` ```
By default, fif will output a bash script that can be used to fix all the files it found with incorrect file extensions. Or a set of extensions - for example, to scan files with image extensions (jpg, png, gif, bmp...):
```bash
fif -E images ~/Pictures
```
For more information, see [the man page](https://git.bune.city/lynnesbian/fif/src/branch/master/doc/fif.1.txt)
### Output
By default, fif will output a bash script (or PowerShell script on Windows) that can be used to fix all the files it
found with incorrect file extensions.
You might find it useful to output this script to a file (rather than to stdout): You might find it useful to output this script to a file (rather than to stdout):
```bash ```bash
fif -E images ~/Pictures > output.sh fif -E images ~/Pictures > output.sh
``` ```
More coming soon! You can also manually specify an output format to use:
```bash
fif -O powershell ~/Documents > output.ps1
```

26
pkg/PKGBUILD Normal file
View file

@ -0,0 +1,26 @@
pkgname=fif
pkgver=0.2.11
pkgrel=1
pkgdesc="A command-line tool for detecting and optionally correcting files with incorrect extensions."
# tier 1 rust linux targets
arch=('x86_64' 'i686' 'aarch64')
url="https://git.bune.city/lynnesbian/fif"
license=('GPLv3+')
depends=('shared-mime-info')
source=("$pkgname-$pkgver.tar.gz::https://git.bune.city/lynnesbian/$pkgname/archive/v${pkgver}.tar.gz")
sha256sums=("fd2b3133fabf8ad1993c6d16a9bf1ad645b1eff8fd30a4a9227ef5a157f56183")
build() {
cd "$pkgname"
cargo build --release --locked
}
check() {
cd "$pkgname"
cargo build --release --locked
}
package() {
cd "$pkgname"
install -Dm 755 target/release/${pkgname} -t "${pkgdir}/usr/bin"
}

View file

@ -14,6 +14,8 @@ pub enum ExtensionSet {
Media, Media,
/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc. /// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
Documents, Documents,
/// Extensions used for text file formats, such as `txt`, `toml`, `html`, etc.
Text,
/// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc. /// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc.
Archives, Archives,
} }
@ -34,9 +36,10 @@ impl ExtensionSet {
Self::Documents => vec![ Self::Documents => vec![
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps", "pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
], ],
Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(),
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used // many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
// somehow to extract extensions for compressed files from mime_guess? // somehow to extract extensions for compressed files from mime_guess?
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"], Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz"],
} }
} }
} }

View file

@ -14,6 +14,8 @@
// You should have received a copy of the GNU General Public License // You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>. // along with this program. If not, see <https://www.gnu.org/licenses/>.
#![forbid(unsafe_code)]
use std::io::{stdout, BufWriter}; use std::io::{stdout, BufWriter};
use std::path::Path; use std::path::Path;
@ -130,7 +132,7 @@ fn main() {
let mut buffered_stdout = BufWriter::new(stdout()); let mut buffered_stdout = BufWriter::new(stdout());
let result = match args.output_format { let result = match args.output_format {
OutputFormat::Script => Script::new().write_all(&results, &mut buffered_stdout), OutputFormat::Sh => Script::new().write_all(&results, &mut buffered_stdout),
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout), OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
OutputFormat::Text => todo!(), OutputFormat::Text => todo!(),
}; };

View file

@ -10,14 +10,14 @@ cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {
const DEFAULT_FORMAT: &str = "powershell"; const DEFAULT_FORMAT: &str = "powershell";
} else { } else {
const DEFAULT_FORMAT: &str = "script"; const DEFAULT_FORMAT: &str = "sh";
} }
} }
#[derive(Clap, PartialEq, Debug)] #[derive(Clap, PartialEq, Debug)]
pub enum OutputFormat { pub enum OutputFormat {
/// A Bourne shell compatible script. /// A Bourne shell compatible script.
Script, Sh,
/// A PowerShell script. /// A PowerShell script.
PowerShell, PowerShell,
/// Also a PowerShell script, with different casing to allow for `fif -o powershell`. /// Also a PowerShell script, with different casing to allow for `fif -o powershell`.
@ -41,31 +41,38 @@ pub enum OutputFormat {
setting(AppSettings::ColoredHelp) setting(AppSettings::ColoredHelp)
)] )]
pub struct Parameters { pub struct Parameters {
/// Only examine files with these extensions (Comma-separated list) /// Only examine files with these extensions (Comma-separated list).
/// This argument conflicts with `-E`.
#[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")] #[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")]
pub exts: Option<Vec<StringType>>, pub exts: Option<Vec<StringType>>,
/// Use a preset list of extensions as the search filter /// Use a preset list of extensions as the search filter.
/// `media` includes all extensions from the `audio`, `video`, and `images` sets. This argument conflicts with `-e`.
#[clap(short = 'E', long, arg_enum, group = "extensions")] #[clap(short = 'E', long, arg_enum, group = "extensions")]
pub ext_set: Option<ExtensionSet>, pub ext_set: Option<ExtensionSet>,
/// Don't skip hidden files and directories /// Don't skip hidden files and directories.
/// Even if this flag is not present, fif will still recurse into a hidden root directory - for example, `fif
/// ~/.hidden` will recurse into `~/.hidden` regardless of whether or not -s was passed as an argument.
#[clap(short, long)] #[clap(short, long)]
pub scan_hidden: bool, pub scan_hidden: bool,
/// Scan files without extensions /// Scan files without extensions.
/// By default, fif will ignore files without extensions - for example, a jpeg file named `photo` won't be considered
/// misnamed. Supplying the -S flag will cause fif to recommend renaming this file to `photo.jpg`.
#[clap(short = 'S', long)] #[clap(short = 'S', long)]
pub scan_extensionless: bool, pub scan_extensionless: bool,
/// Output format to use /// Output format to use.
/// By default, fif will output a PowerShell script on Windows, and a Bourne Shell script on other platforms.
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)] #[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
pub output_format: OutputFormat, pub output_format: OutputFormat,
/// Follow symlinks /// Follow symlinks.
#[clap(short, long)] #[clap(short, long)]
pub follow_symlinks: bool, pub follow_symlinks: bool,
/// Directory to process /// The directory to process.
// TODO: right now this can only take a single directory - should this be improved? // TODO: right now this can only take a single directory - should this be improved?
#[clap(name = "DIR", default_value = ".", parse(from_os_str))] #[clap(name = "DIR", default_value = ".", parse(from_os_str))]
pub dirs: PathBuf, pub dirs: PathBuf,

View file

@ -8,9 +8,12 @@ use cfg_if::cfg_if;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG}; use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime; use mime_guess::Mime;
use crate::findings::Findings;
use crate::formats::{Format, Script};
use std::borrow::Borrow; use std::borrow::Borrow;
use std::collections::HashMap; use std::collections::HashMap;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::io::Read;
use std::path::Path; use std::path::Path;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF"; const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
@ -149,7 +152,10 @@ fn simple_directory() {
"pdf" => APPLICATION_PDF, "pdf" => APPLICATION_PDF,
"zip" => application_zip(), "zip" => application_zip(),
_ => APPLICATION_OCTET_STREAM, // general "fallback" type _ => APPLICATION_OCTET_STREAM, // general "fallback" type
} },
"Incorrect MIME type detected - got {:?} for a {:?} file",
result.mime,
ext.unwrap()
); );
} }
} }
@ -180,7 +186,8 @@ fn argument_parsing() {
hidden: false, hidden: false,
extensionless: false, extensionless: false,
follow_symlinks: true follow_symlinks: true
} },
"ScanOpts are incorrect"
) )
} }
@ -224,3 +231,42 @@ fn identify_random_bytes() {
} }
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>()) println!("No type found:\t{} counts", 500 - results.values().sum::<i32>())
} }
#[test]
fn outputs_move_commands() {
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png"),
valid: false,
mime: IMAGE_JPEG,
})];
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
Script::new()
.write_all(&entries, &mut cursor)
.expect("Failed to write to cursor");
cursor.set_position(0);
cursor
.read_to_string(&mut contents)
.expect("Failed to read from cursor to string");
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
assert!(
contents.contains("misnamed_file.jpg"),
"Output doesn't contain move command!"
)
}
#[test]
fn media_contains_audio_video_images() {
use crate::extension_set::ExtensionSet::{Audio, Images, Media, Videos};
let media_exts = Media.extensions();
// assert every extension in the audio/video/image sets is contained in the media set
[Audio.extensions(), Videos.extensions(), Images.extensions()]
.concat()
.into_iter()
.for_each(|ext| assert!(media_exts.contains(&ext)));
}

43
test.py
View file

@ -2,8 +2,19 @@
import re import re
import subprocess import subprocess
import sys
def main(): def test_archs():
archs = ["aarch64", "powerpc"]
upto = 1
target = len(archs)
for arch in archs:
print(f"Testing {arch} ({upto} of {target})")
subprocess.run(f"cross test --features=infer-backend --target {arch}-unknown-linux-gnu".split(" "))
upto += 1
def test_versions():
match = re.search( match = re.search(
r'rust-version ?= ?"([\d.]+)"', r'rust-version ?= ?"([\d.]+)"',
open("Cargo.toml", "r").read(-1) open("Cargo.toml", "r").read(-1)
@ -13,26 +24,36 @@ def main():
print("Couldn't find rust-version") print("Couldn't find rust-version")
exit(1) exit(1)
versions = [match.group(1), "stable", "beta", "nightly"] versions = [match.group(1), "stable", "nightly"]
backends = ["xdg-mime", "infer"] backends = ["xdg-mime", "infer"]
done = 0 upto = 1
target = len(versions) * len(backends) * 2 target = len(versions) * len(backends) * 2
for version in versions: for version in versions:
for backend in backends: for backend in backends:
print(f"[{version}, {backend}] Tests") print(f"[{version}, {backend}] Tests ({upto} of {target})")
subprocess.run(f"cargo +{version} test --features={backend}-backend".split(" ")) subprocess.run(f"cargo +{version} test --features={backend}-backend".split(" "))
done += 1 upto += 1
print(f"Success - {done} of {target} complete")
print(f"[{version}, {backend}] Scanning imgs") print(f"[{version}, {backend}] Scanning imgs ({upto} of {target})")
subprocess.run(f"cargo +{version} run --release --features={backend}-backend -- -E images imgs".split(" ")) subprocess.run(f"cargo +{version} run --release --features={backend}-backend -- imgs".split(" "))
done += 1 upto += 1
print(f"Success - {done} of {target} complete")
def main():
done_something = False
if "versions" in sys.argv:
test_versions()
done_something = True
if "archs" in sys.argv:
test_archs()
done_something = True
if not done_something:
print("You must supply at least one of `versions` or `archs` as an argument! 0uo")
sys.exit(2)
print("Done! You might want to run cargo clean...") print("Done! You might want to run cargo clean...")
if __name__ == "__main__": if __name__ == "__main__":
main() main()