Compare commits

...

20 Commits

Author SHA1 Message Date
Lynne Megido 480acf515f
cleanup 2021-10-06 02:37:30 +10:00
Lynne Megido 58d5ac7e75
mostly just formatting 2021-10-06 01:41:08 +10:00
Lynne Megido b5ef95b6ef
documentation 2021-10-06 01:33:15 +10:00
Lynne Megido 6d32beedcc
cleanup 2021-10-06 00:26:04 +10:00
Lynne Megido ce79cf14ba
licensing stuff via reuse tool 2021-10-06 00:24:08 +10:00
Lynne Megido 955f0e1510
litl bit more testing 2021-10-05 04:47:33 +10:00
Lynne Megido 38ca71cc14
test smartstring::validate(), hooray 2021-10-05 02:13:32 +10:00
Lynne Megido 47c55ae61e
bumped MSRV to 1.48.0, minor improvements
- this is the rust version that debian stable (buster) currently has, so that's nice
- now we can use `const fn`s in more places, hooray
- smartstring is no longer outdated, hooray
- bitflags is no longer outdated, hooray
- mime_type in files.rs is now a bit smarter about not reading more than it has to, hooray
- removed some redundant clippy lints, hooray
- added a teensy smidgen more documentation, hooray
2021-10-05 02:12:16 +10:00
Lynne Megido c30aba35fd
fix broken multi-threaded scan_from_walkdir
i don't fully understand *why* this was broken but... it was
2021-10-05 01:45:18 +10:00
Lynne Megido 17a784732b
beeg beeg cleanup, set default prompt to error 2021-10-05 00:18:42 +10:00
Lynne Megido 556ea82a06
added --fix - fif can now rename files itself!
this alone warrants a bump to 0.4.0 imo, and now that i think about it, there's not really much keeping me from calling it 1.0...
i think i'd want to get more tests, and maybe upgrade to clap 3 stable when that happens, before calling it 1.0, though. maybe even get some sort of configuration file...
2021-10-04 23:33:48 +10:00
Lynne Megido c4fabbc0f4
refactoring, initial work on --fix feature 2021-10-04 20:22:15 +10:00
Lynne Megido 451ea3d5d9
cargo fmt, cargo update 2021-10-04 01:00:49 +10:00
Lynne Megido 3f40c61d6d
better output 0u0 2021-10-04 00:59:20 +10:00
Lynne Megido 37b9cccc9c
update gitignore 2021-10-02 19:25:35 +10:00
Lynne Megido 25d8ed64ae
capped help output to 120 chars max width 2021-09-29 23:52:20 +10:00
Lynne Megido eb3e650361
removed broken MSI support 0uo 2021-09-29 23:40:57 +10:00
Lynne Megido f1982f670e
cargo update 2021-09-29 23:33:03 +10:00
Lynne Megido 0712bf0a6b
add support for MSI files to infer backend 2021-09-29 23:31:51 +10:00
Lynne Megido 47d94cf27b
simplify InferDb custom mimetypes
using `buf.starts_with(b"\xAB\xCD")` instead of `buf[0] == 0xAB && buf[1] == 0xCD` is shorter, easier to read, removes the need for the `buf.len() > xyz` check, and actually compiles to (much) less assembly:
https://godbolt.org/z/M7GePGn1T

isn't that just lovely
2021-09-29 23:29:21 +10:00
29 changed files with 930 additions and 284 deletions

View File

@ -1,3 +1,6 @@
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
---
kind: pipeline
type: docker

8
.gitignore vendored
View File

@ -1,5 +1,9 @@
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
/target
/imgs
/imgs.tar.zst
/fif_*
/old
/.mypy_cache
@ -7,9 +11,11 @@
*.sh
!clippy.sh
cargo-timing*.html
todo.txt
/*.txt
/pkg/*
/out
/packages
/fif
/fif.exe
.idea/workspace.xml
*.sync-conflict*

View File

@ -1,3 +1,6 @@
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
image: "rust:latest"
variables:
@ -81,15 +84,8 @@ clippy:
cleanup-stable:
extends: .cargo-cleanup
# commented out because cargo-sweep no longer works on our MSRV
#cleanup-msrv:
# extends: cleanup-stable
# image: "rust:1.43.0"
# cache:
# key: msrv
# paths:
# - target
# - .cargo
# there's no point running cargo sweep on MSRV, since it's always at the same version (and therefore cargo sweep will
# never delete anything)
cleanup-nightly:
extends: cleanup-stable
@ -107,8 +103,7 @@ build-base-stable:
build-base-msrv:
extends: build-base-stable
# needs: ["cleanup-msrv"]
image: "rust:1.43.0"
image: "rust:1.48.0"
cache:
key: msrv
paths:
@ -134,7 +129,7 @@ build-stable:
build-msrv:
extends: build-stable
needs: ["build-base-msrv"]
image: "rust:1.43.0"
image: "rust:1.48.0"
cache:
key: msrv
paths:
@ -159,7 +154,7 @@ test-stable:
test-msrv:
extends: test-stable
image: "rust:1.43.0"
image: "rust:1.48.0"
needs: ["build-msrv"]
cache:
key: msrv

View File

@ -3,6 +3,9 @@
<option name="myName" value="Project Default" />
<inspection_tool class="DuplicatedCode" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
<inspection_tool class="RegExpRepeatedSpace" enabled="true" level="INFORMATION" enabled_by_default="true" />
<inspection_tool class="RsExperimentalChecks" enabled="true" level="ERROR" enabled_by_default="true" />
<inspection_tool class="RsUnusedImport" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
<inspection_tool class="RsVariableMutable" enabled="true" level="WEAK WARNING" enabled_by_default="true" />
<inspection_tool class="ShellCheck" enabled="true" level="ERROR" enabled_by_default="true">
<shellcheck_settings value="SC2016" />
</inspection_tool>

3
.idea/scopes/Default.xml Normal file
View File

@ -0,0 +1,3 @@
<component name="DependencyValidationManager">
<scope name="Default" pattern="(file[fif]:src//*||file[fif]:doc//*||file:.gitignore||file:.*.yml||file:*.sh||file:*.toml||file:Cargo.lock||file:test.py||file:*.md)&amp;&amp;!file[fif]:doc/*||file:build.rs" />
</component>

View File

@ -2,6 +2,5 @@
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/src/walkdir" vcs="Git" />
</component>
</project>

12
.reuse/dep5 Normal file
View File

@ -0,0 +1,12 @@
Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: fif
Upstream-Contact: Lynnesbian <lynne@bune.city>
Source: https://gitlab.com/Lynnesbian/fif/
Files: .idea/**/* .idea/* Cargo.lock .mailmap .kateproject
Copyright: Lynnesbian
License: CC0-1.0
Files: doc/* README.md CHANGELOG.md
Copyright: Lynnesbian
License: CC-BY-SA-4.0

View File

@ -5,6 +5,25 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## Unreleased
### Added
- `--fix` mode - instead of outputting a shell script or text file, fif will rename the misnamed files for you!
- By default, the user will be prompted only if fif encounters an error while renaming the file, or if renaming
the file would cause another file to be overwritten. This behaviour can be changed with the new `p`/`--prompt`
flag: `-p always` to be prompted each time, `-p error` to be prompted on errors and when a file would be
overwritten by renaming, and `-p never` to disable prompting altogether - this behaves the same as
answering "yes" to every prompt.
- The `--overwrite` flag must be specified along with `--fix` in order for fif to process renames that would cause an
existing file to be overwritten. Without it, fif will never overwrite existing files, even with `-p always`.
**Caution**: If this flag is set in combination with `--prompt never`, fif will overwrite files **without asking**!
- For a more thorough breakdown of how these flags work, see [the corresponding wiki
page](https://gitlab.com/Lynnesbian/fif/-/wikis/Fix).
### Changed
- The Minimum Supported Rust Version (MSRV) is now **1.48.0**.
- Capped help output (`-h`/`--help`) width at 120 characters max
- Output is now sorted by filename - specifically, errors will appear first, followed by files that fif is unable to
recommend an extension for, in order of filename, followed by files that fif knows how to rename, again in order
of filename.
---
## v0.3.7 - 2021-09-25
### Added

24
Cargo.lock generated
View File

@ -177,7 +177,6 @@ checksum = "de853764b47027c2e862a995c34978ffa63c1501f2e15f987ba11bd4f9bba193"
name = "fif"
version = "0.3.7"
dependencies = [
"bitflags",
"cfg-if",
"clap",
"clap_derive",
@ -186,6 +185,7 @@ dependencies = [
"infer",
"itertools",
"log",
"maplit",
"mime",
"new_mime_guess",
"num_cpus",
@ -297,9 +297,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.102"
version = "0.2.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2a5ac8f984bfcf3a823267e5fde638acc3325f6496633a5da6bb6eb2171e103"
checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6"
[[package]]
name = "log"
@ -310,6 +310,12 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "maplit"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "memchr"
version = "2.4.1"
@ -457,9 +463,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.9"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05"
dependencies = [
"proc-macro2",
]
@ -617,9 +623,9 @@ checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b"
[[package]]
name = "smartstring"
version = "0.2.7"
version = "0.2.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29620fe111ceaba7a50fd806b5f44c1ef44a697a739f6677a4464c7ea8685997"
checksum = "31aa6a31c0c2b21327ce875f7e8952322acfcfd0c27569a6e18a647281352c9b"
dependencies = [
"static_assertions",
]
@ -642,9 +648,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.77"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5239bc68e0fef57495900cfea4e8dc75596d9a319d7e16b1e0a440d24e6fe0a0"
checksum = "a4eac2e6c19f5c3abc0c229bea31ff0b9b091c7b14990e8924b92902a303a0c0"
dependencies = [
"proc-macro2",
"quote",

View File

@ -1,3 +1,6 @@
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
[package]
name = "fif"
description = "A command-line tool for detecting and optionally correcting files with incorrect extensions."
@ -5,7 +8,7 @@ version = "0.3.7"
authors = ["Lynnesbian <lynne@bune.city>"]
edition = "2018"
license = "GPL-3.0-or-later"
rust-version = "1.43.0" # this can actually go as low as 1.41.0 after removing cached, but i'll leave it 1.43.0
rust-version = "1.48.0" # this can actually go as low as 1.41.0 after removing cached, but i'll leave it 1.43.0
repository = "https://gitlab.com/Lynnesbian/fif"
readme = "README.md"
keywords = ["mime", "mimetype", "utilities", "tools"]
@ -35,8 +38,8 @@ cfg-if = "1.0.0"
itertools = "0.10.0"
serde = { version = "1.0", features = ["derive"], optional = true }
serde_json = { version = "1.0", optional = true }
bitflags = "~1.2.1" # 1.3+ requires Rust >= 1.46
num_cpus = { version = "1.13.0", optional = true }
maplit = "1.0.2"
[target.'cfg(not(unix))'.dependencies]
xdg-mime = { version = "0.3.3", optional = true }
@ -48,16 +51,13 @@ infer = { version = "0.5.0", optional = true }
[target.'cfg(not(all(target_endian = "big", target_pointer_width = "32")))'.dependencies]
# the seemingly weird target constraint here is due to this:
# https://github.com/bodil/smartstring/blob/v0.2.7/src/config.rs#L102-L104
# https://github.com/bodil/smartstring/blob/v0.2.9/src/config.rs#L91-L93
# essentially, smartstring is intentionally blocked from compiling on 32-bit big endian archs, so our dependency on it
# needs to be too. otherwise, fif won't work on platforms like powerpc, even though this dependency is the only
# blocker -- fif runs just fine on powerpc without smartstring. or at least, just fine under qemu user-mode powerpc ~u0
# additionally, smartstring 0.2.8 requires rust >=1.46 (due to https://github.com/rust-lang/rust/issues/49146), and
# 0.2.3 doesn't impl Display on its SmartString type.
# so, we need at least 0.2.4 so we can println! strings, and at most 0.2.7 until we bump the MSRV to at least 1.46.
# see https://github.com/bodil/smartstring/blob/master/CHANGELOG.md
smartstring = "<= 0.2.7, >= 0.2.4"
# additionally, versions before 0.2.4 didn't impl Display, so we need at least that version for displaying Strings.
smartstring = ">= 0.2.4"
[dependencies.clap]
# beta.4 requires rust >= 1.54.0 (and beta.3 was yanked)
@ -89,4 +89,4 @@ opt-level = 3
opt-level = 3
[package.metadata]
msrv = "1.43.0"
msrv = "1.48.0"

View File

@ -1,2 +1,5 @@
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
[build.env]
passthrough = ["RUST_BACKTRACE", "RUST_LOG", "FIF_LOG"]

170
LICENSES/CC-BY-SA-4.0.txt Normal file
View File

@ -0,0 +1,170 @@
Creative Commons Attribution-ShareAlike 4.0 International
Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.
Using Creative Commons Public Licenses
Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.
Considerations for licensors: Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. More considerations for licensors.
Considerations for the public: By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensors permission is not necessary for any reasonfor example, because of any applicable exception or limitation to copyrightthen that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described.
Although not required by our licenses, you are encouraged to respect those requests where reasonable. More considerations for the public.
Creative Commons Attribution-ShareAlike 4.0 International Public License
By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
Section 1 Definitions.
a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
c. BY-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License.
d. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
e. Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
f. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
g. License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike.
h. Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
i. Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
j. Licensor means the individual(s) or entity(ies) granting rights under this Public License.
k. Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
l. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
m. You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
Section 2 Scope.
a. License grant.
1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
A. reproduce and Share the Licensed Material, in whole or in part; and
B. produce, reproduce, and Share Adapted Material.
2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
3. Term. The term of this Public License is specified in Section 6(a).
4. Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
5. Downstream recipients.
A. Offer from the Licensor Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
B. Additional offer from the Licensor Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapters License You apply.
C. No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
6. No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
b. Other rights.
1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
2. Patent and trademark rights are not licensed under this Public License.
3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.
Section 3 License Conditions.
Your exercise of the Licensed Rights is expressly made subject to the following conditions.
a. Attribution.
1. If You Share the Licensed Material (including in modified form), You must:
A. retain the following if it is supplied by the Licensor with the Licensed Material:
i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
ii. a copyright notice;
iii. a notice that refers to this Public License;
iv. a notice that refers to the disclaimer of warranties;
v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
b. ShareAlike.In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.
1. The Adapters License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License.
2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.
3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.
Section 4 Sui Generis Database Rights.
Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;
b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and
c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
Section 5 Disclaimer of Warranties and Limitation of Liability.
a. Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.
b. To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.
c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
Section 6 Term and Termination.
a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
2. upon express reinstatement by the Licensor.
c. For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
d. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
e. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
Section 7 Other Terms and Conditions.
a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
Section 8 Interpretation.
a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at creativecommons.org/policies, Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.
Creative Commons may be contacted at creativecommons.org.

121
LICENSES/CC0-1.0.txt Normal file
View File

@ -0,0 +1,121 @@
Creative Commons Legal Code
CC0 1.0 Universal
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
Statement of Purpose
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.

View File

@ -0,0 +1,71 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License.
"The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version".
The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version:
a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following:
a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license document.
c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.
1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version.
e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library.

View File

@ -7,7 +7,7 @@
[![Version](https://img.shields.io/crates/v/fif.svg?logo=rust&style=flat-square)
](https://crates.io/crates/fif)
[![Minimum Supported Rust Version](https://img.shields.io/badge/msrv-1.43.0-orange?logo=rust&style=flat-square)
[![Minimum Supported Rust Version](https://img.shields.io/badge/msrv-1.48.0-orange?logo=rust&style=flat-square)
](https://gitlab.com/Lynnesbian/fif/-/blob/master/README.md#version-policy)
[![License](https://img.shields.io/crates/l/fif.svg?style=flat-square)
](https://gitlab.com/Lynnesbian/fif/-/blob/master/LICENSE)
@ -215,4 +215,4 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
along with this program. If not, see <https://www.gnu.org/licenses/>.

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
use std::process::Command;
#[allow(unreachable_code, clippy::pedantic)]

View File

@ -1,4 +1,8 @@
#!/bin/bash
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
set -e
source "$HOME"/.cargo/env || true
@ -23,6 +27,7 @@ for backend in "${_backends[@]}"; do
-W clippy::pedantic \
-W clippy::complexity \
-W clippy::cargo \
-W clippy::style \
-W clippy::float_cmp_const \
-W clippy::lossy_float_literal \
-W clippy::multiple_inherent_impl \
@ -32,8 +37,6 @@ for backend in "${_backends[@]}"; do
-A clippy::shadow_unrelated \
-A clippy::option_if_let_else \
-A clippy::multiple-crate-versions \
-A clippy::cast-possible-truncation \
-A clippy::cast-possible-wrap \
-A clippy::must_use_candidate \
-A clippy::missing_panics_doc \
-A clippy::missing_errors_doc \
@ -45,9 +48,7 @@ done
# redundant_closure...: the alternative is often much more verbose
# shadow_unrelated: sometimes things that seem unrelated are actually related ;)
# option_if_let_else: the suggested code is usually harder to read than the original
# multiple_crate_versions: cached uses an old version of hashbrown :c
# cast_possible_truncation: only ever used where it would be totally fine
# cast_possible_wrap: ditto
# multiple_crate_versions: this doesn't actually trip right now, but it's not something i would want CI to fail over
# must_use_candidate: useless
# missing_panics_doc: the docs are just for me, fif isn't really intended to be used as a library, so this is unneeded
# missing_errors_doc: ditto

View File

@ -1,2 +1,6 @@
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
# avoid-breaking-exported-api = false # only available on nightly for now
cognitive-complexity-threshold = 15
cognitive-complexity-threshold = 15
msrv = "1.48.0"

View File

@ -1,6 +1,9 @@
# SPDX-FileCopyrightText: 2021 Lynnesbian
# SPDX-License-Identifier: CC0-1.0
max_width = 120
fn_single_line = true
hard_tabs = true
tab_spaces = 2
newline_style = "Unix"
group_imports = "StdExternalCrate"
group_imports = "StdExternalCrate"

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
//! File handling - scanning, detecting MIME types, and so on.
use std::collections::{BTreeSet, HashMap};
@ -8,6 +11,7 @@ use std::str::FromStr;
use std::sync::RwLock;
use cfg_if::cfg_if;
use itertools::{Either, Itertools};
use log::{debug, error};
use mime::Mime;
use mime_guess::from_ext;
@ -140,7 +144,7 @@ pub fn scan_from_walkdir(
entries: &[DirEntry],
canonical_paths: bool,
use_threads: bool,
) -> Vec<Result<Findings, ScanError>> {
) -> (Vec<Findings>, Vec<ScanError>) {
cfg_if! {
if #[cfg(feature = "multi-threaded")] {
use rayon::prelude::*;
@ -155,8 +159,10 @@ pub fn scan_from_walkdir(
.iter() // iter over the chunk, which is a slice of DirEntry structs
.map(|entry| scan_file(entry, canonical_paths))
.collect::<Vec<_>>() // TODO: is there a way to avoid having to collect here?
})
.collect()
}).partition_map(|result| match result {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
});
}
} else {
// should always be false when multi-threading is disabled at compile time
@ -170,8 +176,10 @@ pub fn scan_from_walkdir(
// - fif was compiled without the `multi-threading` feature
entries
.iter()
.map(|entry: &DirEntry| scan_file(entry, canonical_paths))
.collect()
.partition_map(|entry: &DirEntry| match scan_file(entry, canonical_paths) {
Ok(f) => Either::Left(f),
Err(e) => Either::Right(e),
})
}
/// Scans a given directory with [`WalkDir`], filters with [`wanted_file`], checks for errors, and returns a vector of
@ -244,9 +252,29 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
let mut file = File::open(path)?;
// read a small amount to start with
file.read(&mut buffer)?;
let r = db.get_type(&buffer).filter(|mime|
let mut read = io::Result::Ok(0);
for _ in 0..3 {
// try to read the file up to 3 times, retrying if interrupted, bailing otherwise
file.seek(SeekFrom::Start(0))?;
read = file.read(&mut buffer);
match read {
Ok(_) => break,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(_) => break,
}
}
let read = read?;
let r = db.get_type(&buffer);
if read < INITIAL_BUF_SIZE {
// the file is smaller than INITIAL_BUF_SIZE - there's no point reading it again
return Ok(r);
}
let r = r.filter(|mime|
// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already
// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)
mime != &mime::TEXT_XML
@ -263,24 +291,17 @@ pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
return Ok(r);
}
// attempt to read up to the BUF_SIZE bytes of the file.
// we've already read the first 128 bytes into a buffer, but i can't see an obvious way to reuse them in a way that's
// faster than simply moving the seek position back to the start of the file and re-reading the whole BUF_SIZE bytes.
// for example, reading (BUF_SIZE - INITIAL_BUF_SIZE) bytes, then concatenating the original INITIAL_BUF_SIZE buffer
// with this new one would presumably be faster - but it's not. i think it's more expensive to create the iterator,
// collect the contents, etc. i'll have to look into this more. i don't at all doubt that there IS a way to do this
// efficiently, and i can think of a way in principle, but i'm not sure how to express it in a way that is both
// idiomatic/safe and fast.
// attempt to read up to BUF_SIZE bytes of the file.
let mut buffer = [0; BUF_SIZE];
file.seek(SeekFrom::Start(0))?;
file.read(&mut buffer)?;
Ok(db.get_type(&buffer))
}
// Returns a list of known extensions for this mime type, if any.
// This function uses the [Mime]'s "essence" rather than the [Mime] itself - mime_guess::get_mime_extensions ignores
// the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
// essence_str (which includes the suffix) fixes this.
/// Returns a list of known extensions for this mime type, if any.
/// This function uses the [`Mime`]'s "essence" rather than the [`Mime`] itself - [`mime_guess::get_mime_extensions`]
/// ignores the type suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. Passing the
/// `essence_str` (which includes the suffix) fixes this.
pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
if let Ok(cache) = MIMEXT.read() {
if let Some(exts) = cache.get(&essence) {
@ -288,7 +309,6 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
}
}
let essence = essence;
let mut exts = mime_guess::get_mime_extensions_str(essence.as_str());
if exts.is_none() {
// no matches :c
@ -342,9 +362,13 @@ pub fn mime_extension_lookup(essence: String) -> Option<Vec<String>> {
]
.concat()
} else if essence == "application/x-ms-dos-executable" {
// both .dll and .exe files are given the same mime type... but you definitely don't want to rename one to the
// .dll, .exe, and .scr files are given the same mime type... but you definitely don't want to rename one to the
// other!
[vec![String::from("dll"), String::from("exe")], possible_exts].concat()
[
vec![String::from("dll"), String::from("exe"), String::from("scr")],
possible_exts,
]
.concat()
} else {
possible_exts
})

View File

@ -1,6 +1,10 @@
//! The [`Findings`] and [`ScanError`] structs, used for conveying whether a given file was able to be scanned and
//! whether its MIME type could be inferred.
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
//! The [`Findings`] and [`ScanError`] structs, used for conveying whether a given file was able to be scanned, whether
//! its MIME type could be inferred, and whether the file should be renamed.
use std::cmp::Ordering;
use std::fmt::{Display, Formatter};
use std::path::{Path, PathBuf};
@ -11,8 +15,8 @@ use serde::{ser::SerializeStruct, Serializer};
use crate::files::mime_extension_lookup;
use crate::String;
/// Information about a scanned file.
#[derive(Ord, PartialOrd, Eq, PartialEq)]
/// Information about a successfully scanned file.
#[derive(Eq, PartialEq, Debug)]
pub struct Findings {
/// The location of the scanned file.
pub file: PathBuf,
@ -22,6 +26,36 @@ pub struct Findings {
pub mime: Mime,
}
impl Findings {
/// Returns the recommended extension for this file, if known.
pub fn recommended_extension(&self) -> Option<String> {
mime_extension_lookup(self.mime.essence_str().into()).map(|extensions| extensions[0].clone())
}
/// Returns the recommended path for this file - i.e. what it should be renamed to - if known.
pub fn recommended_path(&self) -> Option<PathBuf> {
self
.recommended_extension()
.map(|ext| self.file.with_extension(ext.as_str()))
}
}
impl PartialOrd<Self> for Findings {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
}
impl Ord for Findings {
fn cmp(&self, other: &Self) -> Ordering {
// files with no recommended extension should appear first, so that fif outputs the "no known extension for x"
// comments before the "mv x y" instructions
match (self.recommended_extension(), other.recommended_extension()) {
(None, Some(_)) => Ordering::Greater,
(Some(_), None) => Ordering::Less,
_ => self.file.cmp(&other.file),
}
}
}
#[cfg(feature = "json")]
impl serde::Serialize for Findings {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
@ -38,12 +72,7 @@ impl serde::Serialize for Findings {
}
}
impl Findings {
pub fn recommended_extension(&self) -> Option<String> {
mime_extension_lookup(self.mime.essence_str().into()).map(|extensions| extensions[0].clone())
}
}
/// Errors that can occur while scanning a file with [`scan_file`](crate::files::scan_file).
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
#[cfg_attr(feature = "json", derive(serde::Serialize))]
#[cfg_attr(feature = "json", serde(tag = "type", content = "path"))]

View File

@ -1,4 +1,9 @@
//! The various formats that [fif](crate) can output to.
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
//! Logic for handling the various output formats that fif can output to.
#![allow(missing_copy_implementations)]
use std::ffi::OsStr;
use std::io::{self, Write};
@ -7,7 +12,6 @@ use std::os::unix::ffi::OsStrExt;
use std::path::Path;
use cfg_if::cfg_if;
use itertools::Itertools;
use snailquote::escape;
use crate::findings::ScanError;
@ -15,7 +19,7 @@ use crate::utils::CLAP_LONG_VERSION;
use crate::Findings;
use crate::String;
/// A macro for creating an array of `Writable`s without needing to pepper your code with `into()`s.
/// A macro for creating an array of [`Writable`]s without needing to pepper your code with `into()`s.
/// # Usage
/// ```
/// use crate::fif::writables;
@ -44,16 +48,13 @@ macro_rules! writables {
}
#[macro_export]
/// Does the same thing as [writables], but adds a Newline to the end.
/// Does the same thing as [`writables`], but adds a Newline to the end.
macro_rules! writablesln {
[$($args:tt),+] => {
&[$(writables!(@do $args),)* writables!(@do Newline)]
};
}
#[doc(hidden)]
type Entries<'a> = [Result<Findings, ScanError<'a>>];
#[derive(Debug, PartialEq)]
pub enum Writable<'a> {
String(&'a str),
@ -126,26 +127,10 @@ pub trait FormatSteps {
fn no_known_extension<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn unreadable<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn unknown_type<W: Write>(&self, _f: &mut W, _path: &Path) -> io::Result<()>;
fn header<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
fn footer<W: Write>(&self, _f: &mut W, _entries: &Entries) -> io::Result<()>;
fn write_steps<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
self.header(f, entries)?;
// output will be generated in the order:
// - files that couldn't be read
// - files with no known mime type
// - files with no known extension
// - files with a known extension
// files that already have a correct extension won't be represented in the output.
// sort errors so unreadable files appear before files with unknown mimetypes - ScanError impls Ord such that
// ScanError::File > ScanError::Mime
let errors = entries.iter().filter_map(|e| e.as_ref().err()).sorted_unstable();
// sort files so that files with no known extension come before those with known extensions - None > Some("jpg")
let findings = entries
.iter()
.filter_map(|e| e.as_ref().ok())
.sorted_unstable_by(|a, b| b.recommended_extension().cmp(&a.recommended_extension()).reverse());
fn header<W: Write>(&self, _f: &mut W) -> io::Result<()>;
fn footer<W: Write>(&self, _f: &mut W) -> io::Result<()>;
fn write_steps<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.header(f)?;
for error in errors {
match error {
@ -156,33 +141,34 @@ pub trait FormatSteps {
}
}
if findings.len() != entries.len() {
// if these lengths aren't the same, there was at least one error
if !errors.is_empty() {
// add a blank line between the errors and commands
smart_write(f, writables![Newline])?;
}
for finding in findings {
if let Some(ext) = finding.recommended_extension() {
self.rename(f, finding.file.as_path(), &finding.file.with_extension(ext.as_str()))?;
if let Some(name) = finding.recommended_path() {
self.rename(f, finding.file.as_path(), &name)?;
} else {
self.no_known_extension(f, finding.file.as_path())?;
}
}
self.footer(f, entries)
self.footer(f)
}
}
pub trait Format {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()>;
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()>;
}
/// Bourne-Shell compatible script.
pub struct Shell;
impl Format for Shell {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
}
impl FormatSteps for Shell {
@ -212,7 +198,7 @@ impl FormatSteps for Shell {
smart_write(f, writablesln!["# Failed to detect mime type for ", path])
}
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(
f,
writablesln!["#!/usr/bin/env sh", Newline, "# ", (generated_by().as_str())],
@ -225,9 +211,7 @@ impl FormatSteps for Shell {
smart_write(f, writablesln![Newline, "set -e", Newline])
}
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
smart_write(f, writablesln![Newline, "echo 'Done.'"])
}
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> { smart_write(f, writablesln![Newline, "echo 'Done.'"]) }
}
// PowerShell is a noun, not a type
@ -236,7 +220,9 @@ impl FormatSteps for Shell {
pub struct PowerShell;
impl Format for PowerShell {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
}
impl FormatSteps for PowerShell {
@ -280,7 +266,7 @@ impl FormatSteps for PowerShell {
smart_write(f, writablesln!["<# Failed to detect mime type for ", path, " #>"])
}
fn header<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(
f,
writablesln!["#!/usr/bin/env pwsh", Newline, "<# ", (generated_by().as_str()), " #>"],
@ -293,14 +279,16 @@ impl FormatSteps for PowerShell {
smart_write(f, writables![Newline])
}
fn footer<W: Write>(&self, f: &mut W, _: &Entries) -> io::Result<()> {
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![Newline, "Write-Output 'Done!'"])
}
}
pub struct Text;
impl Format for Text {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> { self.write_steps(f, entries) }
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
self.write_steps(f, findings, errors)
}
}
impl FormatSteps for Text {
@ -320,14 +308,15 @@ impl FormatSteps for Text {
smart_write(f, writablesln!["Couldn't determine type for ", path])
}
fn header<W: Write>(&self, f: &mut W, _entries: &Entries) -> io::Result<()> {
fn header<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(f, writablesln![(generated_by().as_str()), Newline])
}
fn footer<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
fn footer<W: Write>(&self, f: &mut W) -> io::Result<()> {
smart_write(
f,
writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
// writablesln![Newline, "Processed ", (entries.len().to_string().as_str()), " files"],
writablesln![Newline, "Done."],
)
}
}
@ -337,20 +326,13 @@ pub struct Json;
#[cfg(feature = "json")]
impl Format for Json {
fn write_all<W: Write>(&self, f: &mut W, entries: &Entries) -> io::Result<()> {
use itertools::Either;
fn write_all<W: Write>(&self, f: &mut W, findings: &[Findings], errors: &[ScanError]) -> io::Result<()> {
#[derive(serde::Serialize)]
struct SerdeEntries<'a> {
errors: &'a Vec<&'a ScanError<'a>>,
findings: &'a Vec<&'a Findings>,
errors: &'a [ScanError<'a>],
findings: &'a [Findings],
}
let (errors, findings) = &entries.iter().partition_map(|entry| match entry {
Err(e) => Either::Left(e),
Ok(f) => Either::Right(f),
});
let result = serde_json::to_writer_pretty(f, &SerdeEntries { errors, findings });
if let Err(err) = result {

View File

@ -1,8 +1,21 @@
#![forbid(unsafe_code)]
#![warn(trivial_casts, unused_lifetimes, unused_qualifications)]
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
//! This library consists of all of the things fif needs to run. It only exists as a library to separate code, and to
//! make testing a bit easier. I don't recommend using this as a library for your crate, as it may have breaking
//! changes without incrementing the major version, as it's really only meant to be a place for fif's internals to live.
//! make documentation and testing a bit easier. I don't recommend using this as a library for your crate, as it may
//! have breaking changes without incrementing the major version - it's really only meant to be a place for fif's
//! internals to live.
//!
//! You can view [fif's README](https://gitlab.com/Lynnesbian/fif/-/blob/master/README.md#fif) to learn more.
#![forbid(unsafe_code)]
#![warn(
trivial_casts,
unused_lifetimes,
unused_qualifications,
missing_copy_implementations,
unused_allocation
)]
pub mod files;
pub mod findings;
@ -19,20 +32,25 @@ use crate::mime_db::MimeDb;
cfg_if! {
if #[cfg(not(all(target_endian = "big", target_pointer_width = "32")))] {
// most architectures
/// On most architectures, this is a type alias for [`SmartString`](crate). However, on [architectures
/// unsupported by `smartstring`](https://github.com/bodil/smartstring/blob/v0.2.9/src/config.rs#L91-L93), this
/// is simply an alias to [`std::string::String`].
pub use smartstring::alias::String;
} else {
// powerpc and other big endian 32-bit archs
/// On most architectures, this is a type alias for [`SmartString`](crate). However, on [architectures
/// unsupported by `smartstring`](https://github.com/bodil/smartstring/blob/v0.2.9/src/config.rs#L91-L93), this
/// is simply an alias to [`std::string::String`].
// one particular arch that this needs to be turned off for is powerpc (the 32 bit variant that the pre-G5
// powerpc macs used)
pub use std::string::String;
}
}
cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
/// A [Lazy] holding an instance of [mime_db::MimeDb].
/// A [`Lazy`] holding an instance of [`mime_db::MimeDb`]. Initialised at program startup.
pub static MIMEDB: Lazy<mime_db::InferDb> = Lazy::new(crate::mime_db::InferDb::init);
} else {
/// A [Lazy] holding an instance of [mime_db::MimeDb].
/// A [`Lazy`] holding an instance of [`mime_db::MimeDb`]. Initialised at program startup.
pub static MIMEDB: Lazy<mime_db::XdgDb> = Lazy::new(crate::mime_db::XdgDb::init);
}
}

View File

@ -1,45 +1,40 @@
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
// fif - a command-line tool for detecting and optionally correcting files with incorrect extensions.
// Copyright (C) 2021 Lynnesbian
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
#![forbid(unsafe_code)]
#![warn(trivial_casts, unused_lifetimes, unused_qualifications)]
#![warn(
trivial_casts,
unused_lifetimes,
unused_qualifications,
missing_copy_implementations,
unused_allocation
)]
use std::io::{stdout, BufWriter, Write};
use std::io::{stdin, stdout, BufWriter, Write};
use std::process::exit;
use cfg_if::cfg_if;
use clap::Clap;
use fif::files::{scan_directory, scan_from_walkdir};
use fif::formats::Format;
use fif::parameters::OutputFormat;
use fif::formats::{self, Format};
use fif::parameters::{self, OutputFormat, Prompt};
use fif::utils::{os_name, CLAP_LONG_VERSION};
use fif::{formats, parameters};
use itertools::Itertools;
use log::{debug, error, info, trace, warn, Level};
#[cfg(test)]
mod tests;
#[doc(hidden)]
#[allow(clippy::cognitive_complexity)]
#[allow(clippy::cognitive_complexity, clippy::too_many_lines)]
fn main() {
let args: parameters::Parameters = parameters::Parameters::parse();
let mut builder = env_logger::Builder::new();
builder
.filter_level(args.default_verbosity()) // set default log level
.filter_level(args.get_verbosity()) // set default log level
.parse_default_env() // set log level from RUST_LOG
.parse_env("FIF_LOG") // set log level from FIF_LOG
.format(|buf, r| {
@ -109,58 +104,118 @@ fn main() {
}
}
let results: Vec<_> = scan_from_walkdir(&entries, args.canonical_paths, use_threads)
.into_iter()
.filter(
|result| result.is_err() || !result.as_ref().unwrap().valid,
// TODO: find a way to trace! the valid files without doing ↓
// || if result.as_ref().unwrap().valid { trace!("{:?} ok", result.as_ref().unwrap().file); false } else { true }
)
.collect();
let (findings, errors) = scan_from_walkdir(&entries, args.canonical_paths, use_threads);
trace!("Scanning complete");
for result in &results {
match result {
Ok(r) => {
// check to see if debug logging is enabled before invoking debug! macro
// https://github.com/rust-lang/log/pull/394#issuecomment-630490343
if log::max_level() >= log::Level::Debug {
debug!(
"{:?} is of type {}, should have extension \"{}\"",
r.file,
r.mime,
r.recommended_extension().unwrap_or_else(|| "???".into())
);
}
}
Err(f) => warn!("{}", f),
}
}
if results.is_empty() {
if findings.is_empty() && errors.is_empty() {
info!("All files have valid extensions!");
exit(exitcode::OK);
}
let mut buffered_stdout = BufWriter::new(stdout());
// remove files that already have the correct extension, then sort - first by whether or not they have a
// recommended_extension() (with None before Some(ext)), then by filename
let findings = findings
.into_iter()
.filter(|f| !f.valid)
.sorted_unstable()
.collect_vec();
// sort errors (File errors before Mime errors), then log a warning for each error
let errors = errors
.into_iter()
.sorted_unstable()
.map(|e| {
warn!("{}", &e);
e
})
.collect_vec();
let result = match args.output_format {
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &results),
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &results),
#[cfg(feature = "json")]
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &results),
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &results),
};
if args.fix {
fn ask(message: &str) -> bool {
let mut buf = String::with_capacity(1);
print!("{} [y/N] ", message);
if result.is_err() {
error!("Failed to write to stdout.");
exit(exitcode::IOERR);
}
// flush stdout to ensure message is displayed
stdout().flush().expect("Failed to flush stdout");
if buffered_stdout.flush().is_err() {
error!("Failed to flush stdout.");
exit(exitcode::IOERR);
if let Err(e) = stdin().read_line(&mut buf) {
// something went wrong while reading input - just exit
error!("{}", e);
exit(exitcode::IOERR)
}
buf.starts_with('y') || buf.starts_with('Y')
}
let prompt = args.prompt.unwrap_or(Prompt::Error);
for f in findings {
if let Some(rename_to) = f.recommended_path() {
let will_rename = {
if !args.overwrite && rename_to.exists() {
// handles: --prompt never, --prompt error, --prompt always
// user didn't specify --overwrite, and the destination exists
info!("Not renaming {:#?}: Target {:#?} exists", f.file, rename_to);
false
} else if prompt == Prompt::Never {
// handles: --prompt never --overwrite
// user specified --prompt never in conjunction with --overwrite, so always rename
true
} else if prompt == Prompt::Error || ask(&*format!("Rename {:#?} to {:#?}?", &f.file, &rename_to)) {
// handles: --prompt error --overwrite, --prompt always --overwrite [y]
// if the target exists, prompt before renaming; otherwise, just rename
!rename_to.exists() || ask(&*format!("Destination {:#?} already exists, overwrite?", rename_to))
} else {
// handles: --prompt always --overwrite [n]
// user was prompted and replied "no"
false
}
};
if !will_rename {
continue;
}
loop {
match std::fs::rename(&f.file, &rename_to) {
Ok(_) => {
info!("Renamed {:#?} -> {:#?}", f.file, rename_to);
break;
}
Err(e) => {
warn!("Couldn't rename {:#?} to {:#?}: {:#?}", f.file, rename_to, e);
// if the user passed --prompt never, continue to the next file
// otherwise, prompt user to retry move, retrying until the rename succeeds or they respond "N"
if prompt == Prompt::Never || !ask(&*format!("Error while renaming file: {:#?}. Try again?", e)) {
break;
}
}
}
}
} else {
// no recommended name :c
info!("No known extension for file {:#?} of type {}", f.file, f.mime);
}
}
} else {
let mut buffered_stdout = BufWriter::new(stdout());
let result = match args.output_format {
// TODO: simplify this to something like formats::write_all(args.output_format, ...)
OutputFormat::Sh => formats::Shell.write_all(&mut buffered_stdout, &findings, &errors),
OutputFormat::PowerShell => formats::PowerShell.write_all(&mut buffered_stdout, &findings, &errors),
#[cfg(feature = "json")]
OutputFormat::Json => formats::Json.write_all(&mut buffered_stdout, &findings, &errors),
OutputFormat::Text => formats::Text.write_all(&mut buffered_stdout, &findings, &errors),
};
if result.is_err() {
error!("Failed to write to stdout.");
exit(exitcode::IOERR);
}
if buffered_stdout.flush().is_err() {
error!("Failed to flush stdout.");
exit(exitcode::IOERR);
}
}
debug!("Done");

View File

@ -1,8 +1,20 @@
//! Backend-neutral Mime database implementation.
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
//! Backend-neutral Mime database abstraction.
use cfg_if::cfg_if;
use mime::Mime;
/// A thin wrapper around either [`Infer`] or [`xdg-mime::SharedMimeInfo`], depending on which [cargo features]
/// fif was compiled with. By default, fif uses an [`Infer`]-based implementation on Windows, and an
/// [`xdg-mime`]-based one everywhere else. This behaviour can be changed at compile time by using the aforementioned
/// [cargo features].
///
/// [cargo features]: https://gitlab.com/Lynnesbian/fif/-/wikis/Cargo-Features
/// [`Infer`]: https://docs.rs/infer/
/// [`xdg-mime::SharedMimeInfo`]: https://docs.rs/xdg-mime/0/xdg_mime/struct.SharedMimeInfo.html
/// [`xdg-mime`]: https://docs.rs/xdg-mime/
pub trait MimeDb {
/// Initialise the database.
fn init() -> Self;
@ -14,6 +26,7 @@ cfg_if! {
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
use std::str::FromStr;
/// The [`Infer`](https://docs.rs/infer/)-based implementation of [`MimeDb`].
pub struct InferDb {
db: infer::Infer,
}
@ -48,17 +61,15 @@ cfg_if! {
info.add("audio/x-aiff", "aiff", |buf| {
// as added by https://github.com/bojand/infer/pull/48/files
// this should be removed when (if) that PR is accepted
buf.len() > 11
&& buf[0] == 0x46
&& buf[1] == 0x4F
&& buf[2] == 0x52
&& buf[3] == 0x4D
&& buf[8] == 0x41
&& buf[9] == 0x49
&& buf[10] == 0x46
&& buf[11] == 0x46
buf.starts_with(b"\x46\x4f\x52\x4d\x41\x49\x46\x46")
});
// info.add("application/x-msi", "msi", |buf| {
// TODO: find a way to detect MSI files properly - this just detects those weird windows OLE files and therefore
// also picks up on .doc files
// buf.starts_with(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1")
// });
info.add("image/svg+xml", "svg", |buf| {
// before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish,
// by which i mean, starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign,
@ -92,10 +103,16 @@ cfg_if! {
}
fn get_type(&self, data: &[u8]) -> Option<Mime> {
self.db.get(data).map(|f| Mime::from_str(f.mime_type()).unwrap())
if let Some(mime) = self.db.get(data) {
match Mime::from_str(mime.mime_type()) {
Err(_) => None,
Ok(m) => Some(m),
}
} else { None }
}
}
} else {
/// The [`xdg-mime`](https://docs.rs/xdg-mime/)-based implementation of [`MimeDb`].
pub struct XdgDb {
db: xdg_mime::SharedMimeInfo,
}

View File

@ -1,4 +1,7 @@
//! [Clap] struct used to parse command line arguments.
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
//! Command line argument parsing logic and associated functions.
use std::collections::BTreeSet;
use std::path::PathBuf;
@ -11,13 +14,17 @@ use crate::String as StringType;
cfg_if! {
if #[cfg(windows)] {
/// The default [`OutputFormat`] to use.
const DEFAULT_FORMAT: &str = "powershell";
} else {
/// The default [`OutputFormat`] to use.
const DEFAULT_FORMAT: &str = "sh";
}
}
#[derive(Clap, PartialEq, Debug)]
#[derive(Clap, PartialEq, Debug, Copy, Clone)]
/// The format to use when running fif without the `--fix` flag. Specified at runtime with the `-o`/`--output-format`
/// flag.
pub enum OutputFormat {
/// A Bourne shell compatible script.
#[clap(alias = "shell", alias = "bash")]
@ -32,6 +39,18 @@ pub enum OutputFormat {
Json,
}
#[derive(Clap, PartialEq, Debug, Copy, Clone)]
/// Specifies under what conditions the user should be prompted when running fif in `--fix` mode. Defaults to `Error`.
/// Specified at runtime with the `-p`/`--prompt` flag.
pub enum Prompt {
/// Never prompt.
Never,
/// Prompt only on errors, and on overwrites, if `--overwrite` is set.
Error,
/// Prompt for every rename.
Always,
}
#[derive(Clap, Debug)]
#[allow(clippy::struct_excessive_bools)]
#[clap(
@ -44,9 +63,24 @@ pub enum OutputFormat {
This program is free software: you can redistribute it and/or modify \
it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 \
of the License, or (at your option) any later version.",
setting(AppSettings::ColoredHelp)
setting(AppSettings::ColoredHelp),
max_term_width = 120
)]
/// [`Clap`]-derived struct used to parse command line arguments.
pub struct Parameters {
/// Automatically rename files to use the correct extension, prompting the user for every rename.
#[clap(long)]
pub fix: bool,
/// Requires --fix. Should fif prompt you `Never`, only on `Error`s and overwrites, or `Always`?
#[clap(short = 'p', long, arg_enum, requires = "fix")]
pub prompt: Option<Prompt>,
/// Requires --fix. Allow overwriting files. Warning: When used in combination with `--prompt never`, fif will
/// overwrite files without prompting!
#[clap(long, requires = "fix")]
pub overwrite: bool,
// NOTE: clap's comma-separated argument parser makes it impossible to specify extensions with commas in their name -
// `-e sil\,ly` is treated as ["sil", "ly"] rather than as ["silly"], no matter how i escape the comma (in bash,
// anyway). is this really an issue? it does technically exclude some perfectly valid extensions, but i've never seen
@ -143,6 +177,7 @@ pub struct Parameters {
pub jobs: usize,
}
/// Validation function for argument parsing that ensures passed-in extensions are lowercase.
fn lowercase_exts(exts: &str) -> Result<(), String> {
// TODO: i would much rather accept uppercase exts and convert them to lowercase than just rejecting lowercase exts...
if exts.to_lowercase() != exts {
@ -152,7 +187,7 @@ fn lowercase_exts(exts: &str) -> Result<(), String> {
}
/// Further options relating to scanning.
#[derive(PartialEq, Debug)]
#[derive(PartialEq, Debug, Copy, Clone)]
#[allow(clippy::struct_excessive_bools)]
pub struct ScanOpts {
/// Whether hidden files and directories should be scanned.
@ -225,6 +260,7 @@ impl Parameters {
}
}
/// Populates a [`ScanOpts`] struct with values from [`self`].
pub const fn get_scan_opts(&self) -> ScanOpts {
ScanOpts {
hidden: self.scan_hidden,
@ -234,10 +270,8 @@ impl Parameters {
}
}
pub fn default_verbosity(&self) -> log::LevelFilter {
#![allow(clippy::missing_const_for_fn)]
// match was not permitted inside const functions until 1.46
/// Gets the verbosity set by the user via the `-v` and `-q` flags.
pub const fn get_verbosity(&self) -> log::LevelFilter {
use log::LevelFilter;
match self.quiet {
@ -256,7 +290,7 @@ impl Parameters {
}
/// Sets of extensions for use with [Parameter](crate::parameters::Parameters)'s `-E` flag.
#[derive(Clap, PartialEq, Debug)]
#[derive(Clap, PartialEq, Debug, Copy, Clone)]
pub enum ExtensionSet {
/// Extensions used for image file formats, such as `png`, `jpeg`, `webp`, etc.
Images,

View File

@ -1,4 +1,8 @@
use std::collections::HashMap;
// SPDX-FileCopyrightText: 2021 2021 Lynnesbian
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
use std::collections::{BTreeMap, HashMap};
use std::ffi::OsStr;
use std::path::{Path, PathBuf};
@ -7,7 +11,10 @@ use fif::files::{mime_extension_lookup, scan_directory, scan_from_walkdir, BUF_S
use fif::findings::Findings;
use fif::formats::{Format, PowerShell, Shell};
use fif::mime_db::MimeDb;
use fif::utils::APPLICATION_ZIP;
use fif::{String, MIMEDB};
use itertools::Itertools;
use maplit::{btreeset, hashmap};
use mime::{Mime, APPLICATION_OCTET_STREAM, APPLICATION_PDF, IMAGE_JPEG, IMAGE_PNG};
use crate::parameters::ExtensionSet;
@ -18,21 +25,17 @@ const PNG_BYTES: &[u8] = b"\x89\x50\x4E\x47\x0D\x0A\x1A\x0A";
const PDF_BYTES: &[u8] = b"%PDF-";
const ZIP_BYTES: &[u8] = b"PK\x03\x04";
fn application_zip() -> Mime {
use std::str::FromStr;
Mime::from_str("application/zip").unwrap()
}
#[test]
/// Ensure that `extension_from_path` successfully returns the extension from a set of paths.
fn get_ext() {
let mut ext_checks: HashMap<_, Option<&OsStr>> = HashMap::new();
ext_checks.insert(Path::new("test.txt"), Some(OsStr::new("txt")));
ext_checks.insert(Path::new("test.zip"), Some(OsStr::new("zip")));
ext_checks.insert(Path::new("test.tar.gz"), Some(OsStr::new("gz")));
ext_checks.insert(Path::new("test."), Some(OsStr::new("")));
ext_checks.insert(Path::new("test"), None);
ext_checks.insert(Path::new(".hidden"), None);
let ext_checks: HashMap<_, Option<&OsStr>> = hashmap![
Path::new("test.txt") => Some(OsStr::new("txt")),
Path::new("test.zip") => Some(OsStr::new("zip")),
Path::new("test.tar.gz") => Some(OsStr::new("gz")),
Path::new("test.") => Some(OsStr::new("")),
Path::new("test") => None,
Path::new(".hidden") => None,
];
for (path, ext) in ext_checks {
assert_eq!(path.extension(), ext);
@ -45,7 +48,7 @@ fn detect_type() {
assert_eq!(MIMEDB.get_type(JPEG_BYTES), Some(IMAGE_JPEG));
assert_eq!(MIMEDB.get_type(PNG_BYTES), Some(IMAGE_PNG));
assert_eq!(MIMEDB.get_type(PDF_BYTES), Some(APPLICATION_PDF));
assert_eq!(MIMEDB.get_type(ZIP_BYTES), Some(application_zip()));
assert_eq!(MIMEDB.get_type(ZIP_BYTES), Some(APPLICATION_ZIP.clone()));
}
#[test]
@ -61,7 +64,7 @@ fn recommend_ext() {
assert!(mime_extension_lookup(APPLICATION_PDF.essence_str().into())
.unwrap()
.contains(&String::from("pdf")));
assert!(mime_extension_lookup(application_zip().essence_str().into())
assert!(mime_extension_lookup(APPLICATION_ZIP.essence_str().into())
.unwrap()
.contains(&String::from("zip")));
}
@ -81,14 +84,15 @@ fn simple_directory() {
// set of files to scan. all but the last files have magic numbers corresponding to their extension, except for
// "wrong.jpg", which is actually a png.
let mut files = HashMap::new();
files.insert("test.jpg", JPEG_BYTES);
files.insert("test.jpeg", JPEG_BYTES);
files.insert("test.png", PNG_BYTES);
files.insert("test.pdf", PDF_BYTES);
files.insert("test.zip", ZIP_BYTES);
files.insert("wrong.jpg", PNG_BYTES);
files.insert("ignore.fake_ext", ZIP_BYTES);
let files = hashmap![
"test.jpg" => JPEG_BYTES,
"test.jpeg" => JPEG_BYTES,
"test.png" => PNG_BYTES,
"test.pdf" => PDF_BYTES,
"test.zip" => ZIP_BYTES,
"wrong.jpg" => PNG_BYTES,
"ignore.fake_ext" => ZIP_BYTES,
];
let dir = tempdir().expect("Failed to create temporary directory.");
set_current_dir(dir.path()).expect("Failed to change directory.");
@ -116,14 +120,14 @@ fn simple_directory() {
let use_threads = cfg!(feature = "multi-threaded");
let results = scan_from_walkdir(&entries, false, use_threads);
let canonical_results = scan_from_walkdir(&entries, true, use_threads);
let results = scan_from_walkdir(&entries, false, use_threads).0;
let canonical_results = scan_from_walkdir(&entries, true, use_threads).0;
assert_eq!(results.len(), canonical_results.len());
for (result, canonical_result) in results.iter().zip(canonical_results.iter()) {
// there should be no IO errors during this test. any IO errors encountered are outside the scope of this test.
let result = result.as_ref().expect("Error while scanning file");
let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
// let result = result.as_ref().expect("Error while scanning file");
// let canonical_result = canonical_result.as_ref().expect("Error while scanning file");
// paths should be canonical
assert_eq!(canonicalize(&result.file).unwrap(), canonical_result.file);
@ -136,6 +140,11 @@ fn simple_directory() {
assert_eq!(result.mime, IMAGE_PNG);
// 3. ensure the recommended extension for "wrong.jpg" is "png"
assert_eq!(&result.recommended_extension().unwrap(), &String::from("png"));
// 4. ensure the recommended filename for "wrong.jpg" is "wrong.png"
assert_eq!(
result.recommended_path().unwrap().file_name(),
Some(OsStr::new("wrong.png"))
);
continue;
}
@ -146,6 +155,15 @@ fn simple_directory() {
.unwrap()
.contains(&result.recommended_extension().unwrap()));
// ensure that the recommended_name function outputs something beginning with "test"
assert!(result
.recommended_path()
.unwrap()
.file_name()
.unwrap()
.to_string_lossy()
.starts_with("test"));
// make sure the guessed mimetype is correct based on the extension of the scanned file
// because we already know that the extensions match the mimetype (as we created these files ourselves earlier in
// the test), all files with the "jpg" extension should be IMAGE_JPEGs, etc.
@ -156,7 +174,7 @@ fn simple_directory() {
"jpg" | "jpeg" => IMAGE_JPEG,
"png" => IMAGE_PNG,
"pdf" => APPLICATION_PDF,
"zip" => application_zip(),
"zip" => APPLICATION_ZIP.clone(),
_ => APPLICATION_OCTET_STREAM, // general "fallback" type
},
"Incorrect MIME type detected - got {:?} for a {:?} file",
@ -234,12 +252,7 @@ fn exclude_overrides() {
let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "abc,def,ghi,jkl", "-x", "abc,def"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should be set!");
let extensions = extensions.unwrap();
assert!(!extensions.contains(&"abc"));
assert!(!extensions.contains(&"def"));
assert!(extensions.contains(&"ghi"));
assert!(extensions.contains(&"jkl"));
assert_eq!(extensions, Some(btreeset!["ghi", "jkl"]));
}
#[test]
@ -249,10 +262,7 @@ fn exclude_set_overrides_includes() {
let args: Parameters = Parameters::parse_from(vec!["fif", "-e", "jpg,flac", "-X", "images"]);
let extensions = args.extensions();
assert!(extensions.is_some(), "Extensions should be set!");
let mut extensions = extensions.unwrap().into_iter();
assert_eq!(extensions.next(), Some("flac"), "Extensions should contain flac!");
assert_eq!(extensions.next(), None, "Too many extensions!");
assert_eq!(extensions, Some(btreeset!["flac"]));
}
#[test]
@ -295,6 +305,10 @@ fn rejects_bad_args() {
vec!["fif", "-e", ",,,,,"],
// `-j` with a negative value:
vec!["fif", "-j", "-1"],
// `--prompt` without `--fix`:
vec!["fif", "--prompt", "always"],
// `--overwrite` without `--fix`:
vec!["fif", "--overwrite"],
];
for test in &tests {
@ -309,7 +323,7 @@ fn identify_random_bytes() {
use rand::RngCore;
let mut rng = rand::thread_rng();
let mut bytes: [u8; BUF_SIZE * 2] = [0; BUF_SIZE * 2];
let mut results: HashMap<Mime, i32> = HashMap::new();
let mut results: BTreeMap<Mime, i32> = BTreeMap::new();
for _ in 1..1000 {
rng.fill_bytes(&mut bytes);
@ -331,19 +345,19 @@ fn outputs_move_commands() {
use std::io::Read;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
})];
}];
for format in &["Shell", "PowerShell"] {
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
match *format {
"Shell" => Shell.write_all(&mut cursor, &entries),
"PowerShell" => PowerShell.write_all(&mut cursor, &entries),
"Shell" => Shell.write_all(&mut cursor, &findings, &[]),
"PowerShell" => PowerShell.write_all(&mut cursor, &findings, &[]),
_ => unreachable!(),
}
.expect("Failed to write to cursor");
@ -371,17 +385,17 @@ fn test_json() {
use crate::formats::Json;
// create an example finding stating that "misnamed_file.png" has been identified as a jpeg file
let entries = vec![Ok(Findings {
let findings = vec![Findings {
file: Path::new("misnamed_file.png").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
})];
}];
let mut cursor = std::io::Cursor::new(Vec::new());
let mut contents = std::string::String::new();
Json
.write_all(&mut cursor, &entries)
.write_all(&mut cursor, &findings, &[])
.expect("Failed to write to cursor");
cursor.set_position(0);
@ -436,24 +450,62 @@ fn verbosity() {
"Failed to reject usage of both -q and -v!"
);
let mut expected_results = HashMap::new();
expected_results.insert("-qqqqqqqq", LevelFilter::Off);
expected_results.insert("-qqq", LevelFilter::Off);
expected_results.insert("-qq", LevelFilter::Error);
expected_results.insert("-q", LevelFilter::Warn);
expected_results.insert("-s", LevelFilter::Info);
expected_results.insert("-v", LevelFilter::Debug);
expected_results.insert("-vv", LevelFilter::Trace);
expected_results.insert("-vvv", LevelFilter::Trace);
expected_results.insert("-vvvvvvvv", LevelFilter::Trace);
let expected_results = hashmap![
"-qqqqqqqq" => LevelFilter::Off,
"-qqq" => LevelFilter::Off,
"-qq" => LevelFilter::Error,
"-q" => LevelFilter::Warn,
"-s" => LevelFilter::Info,
"-v" => LevelFilter::Debug,
"-vv" => LevelFilter::Trace,
"-vvv" => LevelFilter::Trace,
"-vvvvvvvv" => LevelFilter::Trace,
];
for (flags, level) in expected_results {
assert_eq!(Parameters::parse_from(&["fif", flags]).default_verbosity(), level);
assert_eq!(Parameters::parse_from(&["fif", flags]).get_verbosity(), level);
}
}
#[test]
/// Ensures that smart strings don't deviate from std's Strings
/// Ensures `os_name()`'s output is the same as [`std::env::consts::OS`], capitalisation notwithstanding
fn validate_os_name() {
assert_eq!(
fif::utils::os_name().to_lowercase(),
std::env::consts::OS.to_lowercase()
);
}
#[test]
/// Ensures that [`Findings`] are sorted properly.
fn sort_findings() {
let findings = vec![
Findings {
file: Path::new("ccc").to_path_buf(),
valid: false,
mime: IMAGE_JPEG,
},
Findings {
file: Path::new("bbb.xyz").to_path_buf(),
valid: true,
mime: IMAGE_PNG,
},
Findings {
file: Path::new("aaa").to_path_buf(),
valid: true,
mime: APPLICATION_PDF,
},
];
let mut findings = findings.iter().sorted_unstable();
assert_eq!(findings.next().unwrap().file, Path::new("aaa"));
assert_eq!(findings.next().unwrap().file, Path::new("bbb.xyz"));
assert_eq!(findings.next().unwrap().file, Path::new("ccc"));
assert_eq!(findings.next(), None);
}
#[test]
/// Ensures that [`SmartString`]s don't deviate from std's Strings
fn validate_string_type() {
use std::string::String as StdString;
@ -465,5 +517,5 @@ fn validate_string_type() {
StdString::from("A long and therefore heap-allocated string")
);
// uncomment if i ever update to smartstring >= 0.2.9
// smartstring::validate();
smartstring::validate();
}

View File

@ -1,6 +1,12 @@
// SPDX-FileCopyrightText: 2021 Lynnesbian
// SPDX-License-Identifier: LGPL-3.0-or-later
//! Various minor utilities.
use std::str::FromStr;
use cfg_if::cfg_if;
use mime::Mime;
use once_cell::sync::Lazy;
use crate::String;
@ -18,11 +24,11 @@ cfg_if! {
}
}
/// The version defined in Cargo.toml, prefixed with a v (e.g. "v0.3.1")
/// The version defined in Cargo.toml, prefixed with a v (e.g. "v0.4.0")
pub(crate) static CLAP_VERSION: Lazy<String> = Lazy::new(|| String::from("v") + VERSION.unwrap_or("???"));
/// Similar to [`CLAP_VERSION`], followed by the chosen backend and abbreviated git commit hash in parentheses - For
/// example, "v0.3.6 (XDG-Mime backend, commit #043e097)"
/// The version defined in Cargo.toml, prefixed with a v (e.g. "v0.4.0"), followed by the chosen backend and
/// abbreviated git commit hash in parentheses - For example, "v0.4.0 (XDG-Mime backend, commit #043e097)"
pub static CLAP_LONG_VERSION: Lazy<String> = Lazy::new(|| {
format!(
"v{} ({} backend, commit #{})",
@ -33,6 +39,9 @@ pub static CLAP_LONG_VERSION: Lazy<String> = Lazy::new(|| {
.into()
});
/// A [`Mime`] representing the "application/zip" mimetype.
pub static APPLICATION_ZIP: Lazy<Mime> = Lazy::new(|| Mime::from_str("application/zip").unwrap());
/// Returns the name of the target operating system with proper casing, like "Windows" or "macOS".
#[allow(clippy::option_map_unit_fn)]
pub fn os_name() -> String {

View File

@ -1,5 +1,9 @@
#!/usr/bin/env python
# SPDX-FileCopyrightText: 2021 Lynnesbian
#
# SPDX-License-Identifier: LGPL-3.0-or-later
import re
import subprocess
import sys