Compare commits
5 commits
fb67c11eb6
...
d54cc8d6da
Author | SHA1 | Date | |
---|---|---|---|
d54cc8d6da | |||
0f2f408c09 | |||
e83ac778d6 | |||
be37b24705 | |||
cb2a30f455 |
13 changed files with 234 additions and 47 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -8,3 +8,4 @@ fif_*
|
|||
!clippy.sh
|
||||
cargo-timing*.html
|
||||
todo.txt
|
||||
/pkg/fif.spec
|
|
@ -10,6 +10,27 @@
|
|||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="/README.md">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value="," />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="/pkg/PKGBUILD">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value="	" />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="/pkg/fif.spec">
|
||||
<value>
|
||||
<Attribute>
|
||||
<option name="separator" value=":" />
|
||||
</Attribute>
|
||||
</value>
|
||||
</entry>
|
||||
<entry key="/src/formats.rs">
|
||||
<value>
|
||||
<Attribute>
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
<excludeFolder url="file://$MODULE_DIR$/old" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/awful" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/.mypy_cache" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/pkg" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
|
10
CHANGELOG.md
10
CHANGELOG.md
|
@ -2,7 +2,15 @@
|
|||
Dates are given in YYYY-MM-DD format.
|
||||
|
||||
## v0.2
|
||||
### v0.2.11 (0201-)
|
||||
### v0.2.12 (2021-???)
|
||||
- Much better README.md
|
||||
- Better documentation for command line arguments
|
||||
- Added more stuff to test.py
|
||||
- PKGBUILD for Arch-based distros
|
||||
- Added Text extension set
|
||||
- More test coverage
|
||||
|
||||
### v0.2.11 (2021-04-04)
|
||||
#### Features
|
||||
- fif can now traverse symlinks with the `-f`/`--follow-symlinks` flag
|
||||
- Extensions are no longer mandatory - running fif without `-e` or `-E` will scan all files, regardless of extension
|
||||
|
|
8
Cargo.lock
generated
8
Cargo.lock
generated
|
@ -284,9 +284,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.92"
|
||||
version = "0.2.93"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56d855069fafbb9b344c0f962150cd2c1187975cb1c22c1522c240d8c4986714"
|
||||
checksum = "9385f66bf6105b241aa65a61cb923ef20efc665cb9f9bb50ac2f0c4b7f378d41"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
|
@ -549,9 +549,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.68"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3ce15dd3ed8aa2f8eeac4716d6ef5ab58b6b9256db41d7e1a0224c2788e8fd87"
|
||||
checksum = "48fe99c6bd8b1cc636890bcc071842de909d902c81ac7dab53ba33c421ab8ffb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
|
@ -10,7 +10,7 @@ repository = "https://git.bune.city/lynnesbian/fif"
|
|||
readme = "README.md"
|
||||
keywords = ["mime", "mimetype", "utilities", "tools"]
|
||||
categories = ["command-line-utilities"]
|
||||
exclude = [".idea/", "Cross.toml", "*.sh", "*.py", ".drone.yml"]
|
||||
exclude = [".idea/", "Cross.toml", "*.sh", "*.py", ".drone.yml", "pkg/"]
|
||||
#resolver = "2"
|
||||
#license-file = "LICENSE"
|
||||
|
||||
|
@ -67,6 +67,9 @@ fastrand = "1.4.0"
|
|||
[profile.release]
|
||||
lto = "thin"
|
||||
|
||||
[profile.test]
|
||||
opt-level = 0
|
||||
|
||||
# optimise dependencies, even when producing debug builds
|
||||
[profile.dev.package."*"]
|
||||
opt-level = 3
|
||||
|
|
82
README.md
82
README.md
|
@ -1,45 +1,78 @@
|
|||
fif
|
||||
===
|
||||
[![Crates.io](https://img.shields.io/crates/v/fif.svg?style=flat-square)](https://crates.io/crates/fif)
|
||||
[![Crates.io](https://img.shields.io/crates/l/fif.svg?style=flat-square)](https://git.bune.city/lynnesbian/fif/src/branch/master/LICENSE)
|
||||
[![Version](https://img.shields.io/crates/v/fif.svg?style=flat-square)](https://crates.io/crates/fif)
|
||||
[![License](https://img.shields.io/crates/l/fif.svg?style=flat-square)](https://git.bune.city/lynnesbian/fif/src/branch/master/LICENSE)
|
||||
![Minimum Supported Rust Version](https://img.shields.io/badge/msrv-1.43.0-orange?style=flat-square)
|
||||
[![CI Status](https://drone.bune.city/api/badges/lynnesbian/fif/status.svg?style=flat-square)](https://drone.bune.city/lynnesbian/fif)
|
||||
[![unsafe forbidden](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/)
|
||||
|
||||
A command-line tool for detecting and optionally correcting files with incorrect extensions.
|
||||
|
||||
fif recursively scans the given directory and outputs a shell script to fix the name of any files with incorrect
|
||||
extensions. By default, fif will scan all non-hidden files in the given directory, and will ignore symlinks.
|
||||
|
||||
As fif prints a shell script to stdout rather than acting on the files directly, you may wish to redirect its output to
|
||||
a file, e.g. `fif ~/Documents > output.sh`. You can also pipe the output directly into your shell, e.g.
|
||||
`fif ~/Documents | bash`, although this is not recommended - you should look over fif's output and verify for yourself
|
||||
that it's not doing anything that will give you a headache before running it.
|
||||
|
||||
## Installation
|
||||
### Default backend
|
||||
### Cargo
|
||||
```bash
|
||||
cargo install --locked fif
|
||||
```
|
||||
|
||||
### Other backends
|
||||
`fif` supports using [infer](https://crates.io/crates/infer) or [xdg-mime](https://crates.io/crates/xdg-mime) as its
|
||||
backend for looking up file types. By default, xdg-mime will be used on Linux, and infer on all other systems.
|
||||
To update, simply re-run this command, or use a tool like
|
||||
[cargo-update](https://github.com/nabijaczleweli/cargo-update).
|
||||
|
||||
xdg-mime should work on any Unixy system with [libmagic/file(1)](https://www.darwinsys.com/file/) installed, although
|
||||
I've only tested it on Linux. infer should work on any system.
|
||||
#### Other backends
|
||||
`fif` supports using [`infer`](https://crates.io/crates/infer) or [`xdg-mime`](https://crates.io/crates/xdg-mime) as its
|
||||
backend for looking up file types. By default, xdg-mime will be used on
|
||||
[*nix systems](https://en.wikipedia.org/wiki/Unix-like) (Linux, macOS, *BSD, etc.), and infer on all other systems.
|
||||
|
||||
`xdg-mime` should work on any *nix system with [libmagic/file(1)](https://www.darwinsys.com/file/) installed, although
|
||||
I've only tested it on Linux and FreeBSD. `infer` should work on any system.
|
||||
|
||||
You can override the default backend for your system at compile time like so:
|
||||
|
||||
```bash
|
||||
# xdg-mime
|
||||
cargo install --features=xdg-mime-backend
|
||||
cargo install fif --features=xdg-mime-backend
|
||||
# infer
|
||||
cargo install --features=infer-backend
|
||||
cargo install fif --features=infer-backend
|
||||
```
|
||||
|
||||
Of the supported backends, `xdg-mime` by far supports the most file types, as it uses the excellent [Shared MIME
|
||||
Info](https://gitlab.freedesktop.org/xdg/shared-mime-info/) database, whereas `infer` uses its own baked-in database.
|
||||
However, `infer` is also faster to load, if only by a few dozen milliseconds, and has no external dependencies.
|
||||
|
||||
#### Multithreading
|
||||
It is also possible to disable multithreading by installing without default features:
|
||||
```bash
|
||||
cargo install fif --no-default-features
|
||||
```
|
||||
|
||||
## Usage
|
||||
See `fif --help` for more.
|
||||
|
||||
### The basics
|
||||
The simplest way to use fif looks like this:
|
||||
### Logging
|
||||
By default, fif will log any warnings and/or errors encountered during execution. The verbosity of the logging can be
|
||||
modified by the `RUST_LOG` to one of: `trace`, `debug`, `info`, `warn`, `error`.
|
||||
|
||||
For example:
|
||||
|
||||
```bash
|
||||
fif -E images ~/Pictures
|
||||
RUST_LOG=debug fif ~/Downloads
|
||||
```
|
||||
|
||||
This command will scan all of the files with extensions used by image files (.jpg, .png, etc) in your `~/Pictures`
|
||||
directory.
|
||||
### The basics
|
||||
The simplest way to use fif looks like this:
|
||||
|
||||
```bash
|
||||
fif ~/Downloads
|
||||
```
|
||||
|
||||
This command will scan all non-hidden files in your `~/Downloads` directory.
|
||||
|
||||
You can also manually specify a set of extensions to use:
|
||||
|
||||
|
@ -47,11 +80,26 @@ You can also manually specify a set of extensions to use:
|
|||
fif -e jpeg,jpg,zip,docx ~/Documents
|
||||
```
|
||||
|
||||
By default, fif will output a bash script that can be used to fix all the files it found with incorrect file extensions.
|
||||
Or a set of extensions - for example, to scan files with image extensions (jpg, png, gif, bmp...):
|
||||
|
||||
```bash
|
||||
fif -E images ~/Pictures
|
||||
```
|
||||
|
||||
For more information, see [the man page](https://git.bune.city/lynnesbian/fif/src/branch/master/doc/fif.1.txt)
|
||||
|
||||
### Output
|
||||
By default, fif will output a bash script (or PowerShell script on Windows) that can be used to fix all the files it
|
||||
found with incorrect file extensions.
|
||||
|
||||
You might find it useful to output this script to a file (rather than to stdout):
|
||||
|
||||
```bash
|
||||
fif -E images ~/Pictures > output.sh
|
||||
```
|
||||
|
||||
More coming soon!
|
||||
You can also manually specify an output format to use:
|
||||
|
||||
```bash
|
||||
fif -O powershell ~/Documents > output.ps1
|
||||
```
|
||||
|
|
26
pkg/PKGBUILD
Normal file
26
pkg/PKGBUILD
Normal file
|
@ -0,0 +1,26 @@
|
|||
pkgname=fif
|
||||
pkgver=0.2.11
|
||||
pkgrel=1
|
||||
pkgdesc="A command-line tool for detecting and optionally correcting files with incorrect extensions."
|
||||
# tier 1 rust linux targets
|
||||
arch=('x86_64' 'i686' 'aarch64')
|
||||
url="https://git.bune.city/lynnesbian/fif"
|
||||
license=('GPLv3+')
|
||||
depends=('shared-mime-info')
|
||||
source=("$pkgname-$pkgver.tar.gz::https://git.bune.city/lynnesbian/$pkgname/archive/v${pkgver}.tar.gz")
|
||||
sha256sums=("fd2b3133fabf8ad1993c6d16a9bf1ad645b1eff8fd30a4a9227ef5a157f56183")
|
||||
|
||||
build() {
|
||||
cd "$pkgname"
|
||||
cargo build --release --locked
|
||||
}
|
||||
|
||||
check() {
|
||||
cd "$pkgname"
|
||||
cargo build --release --locked
|
||||
}
|
||||
|
||||
package() {
|
||||
cd "$pkgname"
|
||||
install -Dm 755 target/release/${pkgname} -t "${pkgdir}/usr/bin"
|
||||
}
|
|
@ -14,6 +14,8 @@ pub enum ExtensionSet {
|
|||
Media,
|
||||
/// Extensions used for document file formats, such as `pdf`, `odt`, `docx`, etc.
|
||||
Documents,
|
||||
/// Extensions used for text file formats, such as `txt`, `toml`, `html`, etc.
|
||||
Text,
|
||||
/// Extensions used for archive file formats, such as `zip`, `zst`, `gz`, etc.
|
||||
Archives,
|
||||
}
|
||||
|
@ -34,9 +36,10 @@ impl ExtensionSet {
|
|||
Self::Documents => vec![
|
||||
"pdf", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "csv", "tsv", "odt", "ods", "odp", "oda", "rtf", "ps",
|
||||
],
|
||||
Self::Text => mime_guess::get_mime_extensions_str("text/*").unwrap().to_vec(),
|
||||
// many compressed file types follow the name scheme "application/x.+compressed.*" - maybe this can be used
|
||||
// somehow to extract extensions for compressed files from mime_guess?
|
||||
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2"],
|
||||
Self::Archives => vec!["zip", "tar", "gz", "zst", "xz", "rar", "7z", "bz", "bz2", "tgz"],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
#![forbid(unsafe_code)]
|
||||
|
||||
use std::io::{stdout, BufWriter};
|
||||
use std::path::Path;
|
||||
|
||||
|
@ -130,7 +132,7 @@ fn main() {
|
|||
let mut buffered_stdout = BufWriter::new(stdout());
|
||||
|
||||
let result = match args.output_format {
|
||||
OutputFormat::Script => Script::new().write_all(&results, &mut buffered_stdout),
|
||||
OutputFormat::Sh => Script::new().write_all(&results, &mut buffered_stdout),
|
||||
OutputFormat::PowerShell | OutputFormat::Powershell => PowerShell::new().write_all(&results, &mut buffered_stdout),
|
||||
OutputFormat::Text => todo!(),
|
||||
};
|
||||
|
|
|
@ -10,14 +10,14 @@ cfg_if! {
|
|||
if #[cfg(windows)] {
|
||||
const DEFAULT_FORMAT: &str = "powershell";
|
||||
} else {
|
||||
const DEFAULT_FORMAT: &str = "script";
|
||||
const DEFAULT_FORMAT: &str = "sh";
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clap, PartialEq, Debug)]
|
||||
pub enum OutputFormat {
|
||||
/// A Bourne shell compatible script.
|
||||
Script,
|
||||
Sh,
|
||||
/// A PowerShell script.
|
||||
PowerShell,
|
||||
/// Also a PowerShell script, with different casing to allow for `fif -o powershell`.
|
||||
|
@ -41,31 +41,38 @@ pub enum OutputFormat {
|
|||
setting(AppSettings::ColoredHelp)
|
||||
)]
|
||||
pub struct Parameters {
|
||||
/// Only examine files with these extensions (Comma-separated list)
|
||||
/// Only examine files with these extensions (Comma-separated list).
|
||||
/// This argument conflicts with `-E`.
|
||||
#[clap(short, long, use_delimiter = true, require_delimiter = true, group = "extensions")]
|
||||
pub exts: Option<Vec<StringType>>,
|
||||
|
||||
/// Use a preset list of extensions as the search filter
|
||||
/// Use a preset list of extensions as the search filter.
|
||||
/// `media` includes all extensions from the `audio`, `video`, and `images` sets. This argument conflicts with `-e`.
|
||||
#[clap(short = 'E', long, arg_enum, group = "extensions")]
|
||||
pub ext_set: Option<ExtensionSet>,
|
||||
|
||||
/// Don't skip hidden files and directories
|
||||
/// Don't skip hidden files and directories.
|
||||
/// Even if this flag is not present, fif will still recurse into a hidden root directory - for example, `fif
|
||||
/// ~/.hidden` will recurse into `~/.hidden` regardless of whether or not -s was passed as an argument.
|
||||
#[clap(short, long)]
|
||||
pub scan_hidden: bool,
|
||||
|
||||
/// Scan files without extensions
|
||||
/// Scan files without extensions.
|
||||
/// By default, fif will ignore files without extensions - for example, a jpeg file named `photo` won't be considered
|
||||
/// misnamed. Supplying the -S flag will cause fif to recommend renaming this file to `photo.jpg`.
|
||||
#[clap(short = 'S', long)]
|
||||
pub scan_extensionless: bool,
|
||||
|
||||
/// Output format to use
|
||||
/// Output format to use.
|
||||
/// By default, fif will output a PowerShell script on Windows, and a Bourne Shell script on other platforms.
|
||||
#[clap(short, long, default_value = DEFAULT_FORMAT, arg_enum)]
|
||||
pub output_format: OutputFormat,
|
||||
|
||||
/// Follow symlinks
|
||||
/// Follow symlinks.
|
||||
#[clap(short, long)]
|
||||
pub follow_symlinks: bool,
|
||||
|
||||
/// Directory to process
|
||||
/// The directory to process.
|
||||
// TODO: right now this can only take a single directory - should this be improved?
|
||||
#[clap(name = "DIR", default_value = ".", parse(from_os_str))]
|
||||
pub dirs: PathBuf,
|
||||
|
|
|
@ -8,9 +8,12 @@ use cfg_if::cfg_if;
|
|||
use mime_guess::mime::{APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
|
||||
use mime_guess::Mime;
|
||||
|
||||
use crate::findings::Findings;
|
||||
use crate::formats::{Format, Script};
|
||||
use std::borrow::Borrow;
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsStr;
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
|
||||
const JPEG_BYTES: &[u8] = b"\xFF\xD8\xFF";
|
||||
|
@ -149,7 +152,10 @@ fn simple_directory() {
|
|||
"pdf" => APPLICATION_PDF,
|
||||
"zip" => application_zip(),
|
||||
_ => APPLICATION_OCTET_STREAM, // general "fallback" type
|
||||
}
|
||||
},
|
||||
"Incorrect MIME type detected - got {:?} for a {:?} file",
|
||||
result.mime,
|
||||
ext.unwrap()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -180,7 +186,8 @@ fn argument_parsing() {
|
|||
hidden: false,
|
||||
extensionless: false,
|
||||
follow_symlinks: true
|
||||
}
|
||||
},
|
||||
"ScanOpts are incorrect"
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -224,3 +231,42 @@ fn identify_random_bytes() {
|
|||
}
|
||||
println!("No type found:\t{} counts", 500 - results.values().sum::<i32>())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn outputs_move_commands() {
|
||||
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
|
||||
let entries = vec![Ok(Findings {
|
||||
file: Path::new("misnamed_file.png"),
|
||||
valid: false,
|
||||
mime: IMAGE_JPEG,
|
||||
})];
|
||||
|
||||
let mut cursor = std::io::Cursor::new(Vec::new());
|
||||
let mut contents = std::string::String::new();
|
||||
|
||||
Script::new()
|
||||
.write_all(&entries, &mut cursor)
|
||||
.expect("Failed to write to cursor");
|
||||
cursor.set_position(0);
|
||||
cursor
|
||||
.read_to_string(&mut contents)
|
||||
.expect("Failed to read from cursor to string");
|
||||
|
||||
// the output should contain a command like "mv -i misnamed_file.png misnamed_file.jpg"
|
||||
assert!(
|
||||
contents.contains("misnamed_file.jpg"),
|
||||
"Output doesn't contain move command!"
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn media_contains_audio_video_images() {
|
||||
use crate::extension_set::ExtensionSet::{Audio, Images, Media, Videos};
|
||||
let media_exts = Media.extensions();
|
||||
|
||||
// assert every extension in the audio/video/image sets is contained in the media set
|
||||
[Audio.extensions(), Videos.extensions(), Images.extensions()]
|
||||
.concat()
|
||||
.into_iter()
|
||||
.for_each(|ext| assert!(media_exts.contains(&ext)));
|
||||
}
|
||||
|
|
43
test.py
43
test.py
|
@ -2,8 +2,19 @@
|
|||
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def main():
|
||||
def test_archs():
|
||||
archs = ["aarch64", "powerpc"]
|
||||
upto = 1
|
||||
target = len(archs)
|
||||
|
||||
for arch in archs:
|
||||
print(f"Testing {arch} ({upto} of {target})")
|
||||
subprocess.run(f"cross test --features=infer-backend --target {arch}-unknown-linux-gnu".split(" "))
|
||||
upto += 1
|
||||
|
||||
def test_versions():
|
||||
match = re.search(
|
||||
r'rust-version ?= ?"([\d.]+)"',
|
||||
open("Cargo.toml", "r").read(-1)
|
||||
|
@ -13,26 +24,36 @@ def main():
|
|||
print("Couldn't find rust-version")
|
||||
exit(1)
|
||||
|
||||
versions = [match.group(1), "stable", "beta", "nightly"]
|
||||
versions = [match.group(1), "stable", "nightly"]
|
||||
backends = ["xdg-mime", "infer"]
|
||||
|
||||
done = 0
|
||||
upto = 1
|
||||
target = len(versions) * len(backends) * 2
|
||||
|
||||
for version in versions:
|
||||
for backend in backends:
|
||||
print(f"[{version}, {backend}] Tests")
|
||||
print(f"[{version}, {backend}] Tests ({upto} of {target})")
|
||||
subprocess.run(f"cargo +{version} test --features={backend}-backend".split(" "))
|
||||
done += 1
|
||||
print(f"Success - {done} of {target} complete")
|
||||
upto += 1
|
||||
|
||||
print(f"[{version}, {backend}] Scanning imgs")
|
||||
subprocess.run(f"cargo +{version} run --release --features={backend}-backend -- -E images imgs".split(" "))
|
||||
done += 1
|
||||
print(f"Success - {done} of {target} complete")
|
||||
print(f"[{version}, {backend}] Scanning imgs ({upto} of {target})")
|
||||
subprocess.run(f"cargo +{version} run --release --features={backend}-backend -- imgs".split(" "))
|
||||
upto += 1
|
||||
|
||||
def main():
|
||||
done_something = False
|
||||
if "versions" in sys.argv:
|
||||
test_versions()
|
||||
done_something = True
|
||||
if "archs" in sys.argv:
|
||||
test_archs()
|
||||
done_something = True
|
||||
|
||||
if not done_something:
|
||||
print("You must supply at least one of `versions` or `archs` as an argument! 0uo")
|
||||
sys.exit(2)
|
||||
|
||||
print("Done! You might want to run cargo clean...")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
|
Loading…
Reference in a new issue