Compare commits

..

No commits in common. "d54cc8d6da1a8ffe4dd18e7fac56573a72281f3e" and "fb67c11eb6d211fc268c7fdee2f869e8e7de3aae" have entirely different histories.

13 changed files with 45 additions and 232 deletions

1
.gitignore vendored
View file

@ -8,4 +8,3 @@ fif_*
!clippy.sh
cargo-timing*.html
todo.txt
/pkg/fif.spec

View file

@ -10,27 +10,6 @@
</Attribute>
</value>
</entry>
<entry key="/README.md">
<value>
<Attribute>
<option name="separator" value="," />
</Attribute>
</value>
</entry>
<entry key="/pkg/PKGBUILD">
<value>
<Attribute>
<option name="separator" value="&#9;" />
</Attribute>
</value>
</entry>
<entry key="/pkg/fif.spec">
<value>
<Attribute>
<option name="separator" value=":" />
</Attribute>
</value>
</entry>
<entry key="/src/formats.rs">
<value>
<Attribute>

View file

@ -13,7 +13,6 @@
<excludeFolder url="file://$MODULE_DIR$/old" />
<excludeFolder url="file://$MODULE_DIR$/awful" />
<excludeFolder url="file://$MODULE_DIR$/.mypy_cache" />
<excludeFolder url="file://$MODULE_DIR$/pkg" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />

View file

@ -2,15 +2,7 @@
Dates are given in YYYY-MM-DD format.
## v0.2
### v0.2.12 (2021-???)
- Much better README.md
- Better documentation for command line arguments
- Added more stuff to test.py
- PKGBUILD for Arch-based distros
- Added Text extension set
- More test coverage
### v0.2.11 (2021-04-04)
### v0.2.11 (0201-)
#### Features
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension

8
Cargo.lock generated
View file

@ -284,9 +284,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.93"
version = "0.2.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714"
[[package]]
name = "log"
@ -549,9 +549,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.69"
version = "1.0.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb"
checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87"
dependencies = [
"proc-macro2",
"quote",

View file

@ -10,7 +10,7 @@ repository = "https://git.bune.city/lynnesbian/fif"
readme = "README.md"
keywords = ["mime", "mimetype", "utilities", "tools"]
categories = ["command-line-utilities"]
exclude = [".idea/", "Cross.toml", "*.sh", "*.py", ".drone.yml", "pkg/"]
exclude = [".idea/", "Cross.toml", "*.sh", "*.py", ".drone.yml"]
#resolver = "2"
#license-file = "LICENSE"
@ -67,9 +67,6 @@ fastrand = "1.4.0"
[profile.release]
lto = "thin"
[profile.test]
opt-level = 0
# optimise dependencies, even when producing debug builds
[profile.dev.package."*"]
opt-level = 3

View file

@ -1,78 +1,45 @@
fif
===
[![Version](https://img.shields.io/crates/v/fif.svg?style=flat-square)](https://crates.io/crates/fif)
[![License](https://img.shields.io/crates/l/fif.svg?style=flat-square)](https://git.bune.city/lynnesbian/fif/src/branch/master/LICENSE)
[![Crates.io](https://img.shields.io/crates/v/fif.svg?style=flat-square)](https://crates.io/crates/fif)
[![Crates.io](https://img.shields.io/crates/l/fif.svg?style=flat-square)](https://git.bune.city/lynnesbian/fif/src/branch/master/LICENSE)
![Minimum Supported Rust Version](https://img.shields.io/badge/msrv-1.43.0-orange?style=flat-square)
[![CI Status](https://drone.bune.city/api/badges/lynnesbian/fif/status.svg?style=flat-square)](https://drone.bune.city/lynnesbian/fif)
[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
A command-line tool for detecting and optionally correcting files with incorrect extensions.
fif recursively scans the given directory and outputs a shell script to fix the name of any files with incorrect
extensions. By default, fif will scan all non-hidden files in the given directory, and will ignore symlinks.
As fif prints a shell script to stdout rather than acting on the files directly, you may wish to redirect its output to
a file, e.g. `fif ~/Documents > output.sh`. You can also pipe the output directly into your shell, e.g.
`fif ~/Documents | bash`, although this is not recommended - you should look over fif's output and verify for yourself
that it's not doing anything that will give you a headache before running it.
## Installation
### Cargo
### Default backend
```bash
cargo install --locked fif
```
To update, simply re-run this command, or use a tool like
[cargo-update](https://github.com/nabijaczleweli/cargo-update).
### Other backends
`fif` supports using [infer](https://crates.io/crates/infer) or [xdg-mime](https://crates.io/crates/xdg-mime) as its
backend for looking up file types. By default, xdg-mime will be used on Linux, and infer on all other systems.
#### Other backends
`fif` supports using [`infer`](https://crates.io/crates/infer) or [`xdg-mime`](https://crates.io/crates/xdg-mime) as its
backend for looking up file types. By default, xdg-mime will be used on
[*nix systems](https://en.wikipedia.org/wiki/Unix-like) (Linux, macOS, *BSD, etc.), and infer on all other systems.
`xdg-mime` should work on any *nix system with [libmagic/file(1)](https://www.darwinsys.com/file/) installed, although
I've only tested it on Linux and FreeBSD. `infer` should work on any system.
xdg-mime should work on any Unixy system with [libmagic/file(1)](https://www.darwinsys.com/file/) installed, although
I've only tested it on Linux. infer should work on any system.
You can override the default backend for your system at compile time like so:
```bash
# xdg-mime
cargo install fif --features=xdg-mime-backend
cargo install --features=xdg-mime-backend
# infer
cargo install fif --features=infer-backend
```
Of the supported backends, `xdg-mime` by far supports the most file types, as it uses the excellent [Shared MIME
Info](https://gitlab.freedesktop.org/xdg/shared-mime-info/) database, whereas `infer` uses its own baked-in database.
However, `infer` is also faster to load, if only by a few dozen milliseconds, and has no external dependencies.
#### Multithreading
It is also possible to disable multithreading by installing without default features:
```bash
cargo install fif --no-default-features
cargo install --features=infer-backend
```
## Usage
See `fif --help` for more.
### Logging
By default, fif will log any warnings and/or errors encountered during execution. The verbosity of the logging can be
modified by the `RUST_LOG` to one of: `trace`, `debug`, `info`, `warn`, `error`.
For example:
```bash
RUST_LOG=debug fif ~/Downloads
```
### The basics
The simplest way to use fif looks like this:
```bash
fif ~/Downloads
fif -E images ~/Pictures
```
This command will scan all non-hidden files in your `~/Downloads` directory.
This command will scan all of the files with extensions used by image files (.jpg, .png, etc) in your `~/Pictures`
directory.
You can also manually specify a set of extensions to use:
@ -80,26 +47,11 @@ You can also manually specify a set of extensions to use:
fif -e jpeg,jpg,zip,docx ~/Documents
```
Or a set of extensions - for example, to scan files with image extensions (jpg, png, gif, bmp...):
```bash
fif -E images ~/Pictures
```
For more information, see [the man page](https://git.bune.city/lynnesbian/fif/src/branch/master/doc/fif.1.txt)
### Output
By default, fif will output a bash script (or PowerShell script on Windows) that can be used to fix all the files it
found with incorrect file extensions.
By default, fif will output a bash script that can be used to fix all the files it found with incorrect file extensions.
You might find it useful to output this script to a file (rather than to stdout):
```bash
fif -E images ~/Pictures > output.sh
```
You can also manually specify an output format to use:
```bash
fif -O powershell ~/Documents > output.ps1
```
More coming soon!

View file

@ -1,26 +0,0 @@
pkgname=fif
pkgver=0.2.11
pkgrel=1
pkgdesc="A command-line tool for detecting and optionally correcting files with incorrect extensions."
# tier 1 rust linux targets
arch=('x86_64' 'i686' 'aarch64')
url="https://git.bune.city/lynnesbian/fif"
license=('GPLv3+')
depends=('shared-mime-info')
source=("$pkgname-$pkgver.tar.gz::https://git.bune.city/lynnesbian/$pkgname/archive/v${pkgver}.tar.gz")
sha256sums=("fd2b3133fabf8ad1993c6d16a9bf1ad645b1eff8fd30a4a9227ef5a157f56183")
build() {
cd "$pkgname"
cargo build --release --locked
}
check() {
cd "$pkgname"
cargo build --release --locked
}
package() {
cd "$pkgname"
install -Dm 755 target/release/${pkgname} -t "${pkgdir}/usr/bin"
}

View file

@ -14,8 +14,6 @@ pub enum ExtensionSet {
Media,
/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
Documents,
/// Extensions used for text file formats, such as `txt`, `toml`, `html`, etc.
Text,
/// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc.
Archives,
}
@ -36,10 +34,9 @@ impl ExtensionSet {
Self::Documents => vec![
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
],
Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(),
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
// somehow to extract extensions for compressed files from mime_guess?
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz"],
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"],
}
}
}

View file

@ -14,8 +14,6 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
#![forbid(unsafe_code)]
use std::io::{stdout, BufWriter};
use std::path::Path;
@ -132,7 +130,7 @@ fn main() {
let mut buffered_stdout = BufWriter::new(stdout());
let result = match args.output_format {
OutputFormat::Sh => Script::new().write_all(&results, &mut buffered_stdout),
OutputFormat::Script => Script::new().write_all(&results, &mut buffered_stdout),
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
OutputFormat::Text => todo!(),
};

View file

@ -10,14 +10,14 @@ cfg_if! {
if #[cfg(windows)] {
const DEFAULT_FORMAT: &str = "powershell";
} else {
const DEFAULT_FORMAT: &str = "sh";
const DEFAULT_FORMAT: &str = "script";
}
}
#[derive(Clap, PartialEq, Debug)]
pub enum OutputFormat {
/// A Bourne shell compatible script.
Sh,
Script,
/// A PowerShell script.
PowerShell,
/// Also a PowerShell script, with different casing to allow for `fif -o powershell`.
@ -41,38 +41,31 @@ pub enum OutputFormat {
setting(AppSettings::ColoredHelp)
)]
pub struct Parameters {
/// Only examine files with these extensions (Comma-separated list).
/// This argument conflicts with `-E`.
/// Only examine files with these extensions (Comma-separated list)
#[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")]
pub exts: Option<Vec<StringType>>,
/// Use a preset list of extensions as the search filter.
/// `media` includes all extensions from the `audio`, `video`, and `images` sets. This argument conflicts with `-e`.
/// Use a preset list of extensions as the search filter
#[clap(short = 'E', long, arg_enum, group = "extensions")]
pub ext_set: Option<ExtensionSet>,
/// Don't skip hidden files and directories.
/// Even if this flag is not present, fif will still recurse into a hidden root directory - for example, `fif
/// ~/.hidden` will recurse into `~/.hidden` regardless of whether or not -s was passed as an argument.
/// Don't skip hidden files and directories
#[clap(short, long)]
pub scan_hidden: bool,
/// Scan files without extensions.
/// By default, fif will ignore files without extensions - for example, a jpeg file named `photo` won't be considered
/// misnamed. Supplying the -S flag will cause fif to recommend renaming this file to `photo.jpg`.
/// Scan files without extensions
#[clap(short = 'S', long)]
pub scan_extensionless: bool,
/// Output format to use.
/// By default, fif will output a PowerShell script on Windows, and a Bourne Shell script on other platforms.
/// Output format to use
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
pub output_format: OutputFormat,
/// Follow symlinks.
/// Follow symlinks
#[clap(short, long)]
pub follow_symlinks: bool,
/// The directory to process.
/// Directory to process
// TODO: right now this can only take a single directory - should this be improved?
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
pub dirs: PathBuf,

View file

@ -8,12 +8,9 @@ use cfg_if::cfg_if;
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use mime_guess::Mime;
use crate::findings::Findings;
use crate::formats::{Format, Script};
use std::borrow::Borrow;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::io::Read;
use std::path::Path;
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
@ -152,10 +149,7 @@ fn simple_directory() {
"pdf" => APPLICATION_PDF,
"zip" => application_zip(),
_ => APPLICATION_OCTET_STREAM, // general "fallback" type
},
"Incorrect MIME type detected - got {:?} for a {:?} file",
result.mime,
ext.unwrap()
}
);
}
}
@ -186,8 +180,7 @@ fn argument_parsing() {
hidden: false,
extensionless: false,
follow_symlinks: true
},
"ScanOpts are incorrect"
}
)
}
@ -231,42 +224,3 @@ fn identify_random_bytes() {
}
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>())
}
#[test]
fn outputs_move_commands() {
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
file: Path::new("misnamed_file.png"),
valid: false,
mime: IMAGE_JPEG,
})];
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
Script::new()
.write_all(&entries, &mut cursor)
.expect("Failed to write to cursor");
cursor.set_position(0);
cursor
.read_to_string(&mut contents)
.expect("Failed to read from cursor to string");
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
assert!(
contents.contains("misnamed_file.jpg"),
"Output doesn't contain move command!"
)
}
#[test]
fn media_contains_audio_video_images() {
use crate::extension_set::ExtensionSet::{Audio, Images, Media, Videos};
let media_exts = Media.extensions();
// assert every extension in the audio/video/image sets is contained in the media set
[Audio.extensions(), Videos.extensions(), Images.extensions()]
.concat()
.into_iter()
.for_each(|ext| assert!(media_exts.contains(&ext)));
}

43
test.py
View file

@ -2,19 +2,8 @@
import re
import subprocess
import sys
def test_archs():
archs = ["aarch64", "powerpc"]
upto = 1
target = len(archs)
for arch in archs:
print(f"Testing {arch} ({upto} of {target})")
subprocess.run(f"cross test --features=infer-backend --target {arch}-unknown-linux-gnu".split(" "))
upto += 1
def test_versions():
def main():
match = re.search(
r'rust-version ?= ?"([\d.]+)"',
open("Cargo.toml", "r").read(-1)
@ -24,36 +13,26 @@ def test_versions():
print("Couldn't find rust-version")
exit(1)
versions = [match.group(1), "stable", "nightly"]
versions = [match.group(1), "stable", "beta", "nightly"]
backends = ["xdg-mime", "infer"]
upto = 1
done = 0
target = len(versions) * len(backends) * 2
for version in versions:
for backend in backends:
print(f"[{version}, {backend}] Tests ({upto} of {target})")
print(f"[{version}, {backend}] Tests")
subprocess.run(f"cargo +{version} test --features={backend}-backend".split(" "))
upto += 1
done += 1
print(f"Success - {done} of {target} complete")
print(f"[{version}, {backend}] Scanning imgs ({upto} of {target})")
subprocess.run(f"cargo +{version} run --release --features={backend}-backend -- imgs".split(" "))
upto += 1
def main():
done_something = False
if "versions" in sys.argv:
test_versions()
done_something = True
if "archs" in sys.argv:
test_archs()
done_something = True
if not done_something:
print("You must supply at least one of `versions` or `archs` as an argument! 0uo")
sys.exit(2)
print(f"[{version}, {backend}] Scanning imgs")
subprocess.run(f"cargo +{version} run --release --features={backend}-backend -- -E images imgs".split(" "))
done += 1
print(f"Success - {done} of {target} complete")
print("Done! You might want to run cargo clean...")
if __name__ == "__main__":
main()