2022-01-22 16:41:24 +00:00
|
|
|
// SPDX-FileCopyrightText: 2021-2022 Lynnesbian
|
|
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
2021-10-05 14:24:08 +00:00
|
|
|
|
2021-10-05 15:30:13 +00:00
|
|
|
//! Backend-neutral Mime database abstraction.
|
2021-02-28 14:06:05 +00:00
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
use cfg_if::cfg_if;
|
2021-08-06 13:33:42 +00:00
|
|
|
use mime::Mime;
|
2021-02-14 18:58:57 +00:00
|
|
|
|
2021-10-05 15:30:13 +00:00
|
|
|
/// A thin wrapper around either [`Infer`] or [`xdg-mime::SharedMimeInfo`], depending on which [cargo features]
|
|
|
|
/// fif was compiled with. By default, fif uses an [`Infer`]-based implementation on Windows, and an
|
|
|
|
/// [`xdg-mime`]-based one everywhere else. This behaviour can be changed at compile time by using the aforementioned
|
|
|
|
/// [cargo features].
|
|
|
|
///
|
|
|
|
/// [cargo features]: https://gitlab.com/Lynnesbian/fif/-/wikis/Cargo-Features
|
|
|
|
/// [`Infer`]: https://docs.rs/infer/
|
|
|
|
/// [`xdg-mime::SharedMimeInfo`]: https://docs.rs/xdg-mime/0/xdg_mime/struct.SharedMimeInfo.html
|
|
|
|
/// [`xdg-mime`]: https://docs.rs/xdg-mime/
|
2021-02-14 18:58:57 +00:00
|
|
|
pub trait MimeDb {
|
2021-09-24 14:53:35 +00:00
|
|
|
/// Initialise the database.
|
2021-02-14 18:58:57 +00:00
|
|
|
fn init() -> Self;
|
2021-11-24 20:29:27 +00:00
|
|
|
/// Given a slice of bytes, returns the inferred MIME type, if any.
|
2021-02-14 18:58:57 +00:00
|
|
|
fn get_type(&self, data: &[u8]) -> Option<Mime>;
|
|
|
|
}
|
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
cfg_if! {
|
2021-03-02 15:12:29 +00:00
|
|
|
if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] {
|
2021-02-27 02:13:57 +00:00
|
|
|
use std::str::FromStr;
|
2021-02-21 22:46:17 +00:00
|
|
|
|
2021-10-05 15:30:13 +00:00
|
|
|
/// The [`Infer`](https://docs.rs/infer/)-based implementation of [`MimeDb`].
|
2021-02-27 02:13:57 +00:00
|
|
|
pub struct InferDb {
|
|
|
|
db: infer::Infer,
|
|
|
|
}
|
2021-02-21 11:30:58 +00:00
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
fn open_document_check(buf: &[u8], kind: &str) -> bool {
|
|
|
|
let mime = format!("application/vnd.oasis.opendocument.{}", kind);
|
|
|
|
let mime = mime.as_bytes();
|
2021-02-14 18:58:57 +00:00
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
buf.len() > 38 + mime.len() && buf.starts_with(b"PK\x03\x04") && buf[38..mime.len() + 38] == mime[..]
|
|
|
|
}
|
2021-02-14 18:58:57 +00:00
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
impl MimeDb for InferDb {
|
|
|
|
fn init() -> Self {
|
|
|
|
let mut info = infer::Infer::new();
|
|
|
|
|
2022-01-18 08:44:46 +00:00
|
|
|
// In addition to the file inferences provided by Infer, I've also added a few of my own below. Some of them
|
|
|
|
// replace Infer's existing ones, some of them are less than perfect, and still others are for relatively
|
|
|
|
// obscure formats, so I'm not really sure whether or not they should be contributed upstream.
|
|
|
|
|
|
|
|
// OpenDocument Text (used by e.g. LibreOffice Writer)
|
2021-02-27 02:13:57 +00:00
|
|
|
info.add("application/vnd.oasis.opendocument.text", "odt", |buf| {
|
|
|
|
open_document_check(buf, "text")
|
|
|
|
});
|
|
|
|
|
2022-01-18 08:44:46 +00:00
|
|
|
// OpenDocument Spreadsheet (LibreOffice Calc)
|
2021-02-27 02:13:57 +00:00
|
|
|
info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| {
|
|
|
|
open_document_check(buf, "spreadsheet")
|
|
|
|
});
|
|
|
|
|
2022-01-18 08:44:46 +00:00
|
|
|
// OpenOffice Presentation (LibreOffice Impress)
|
2021-02-27 02:13:57 +00:00
|
|
|
info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| {
|
|
|
|
open_document_check(buf, "presentation")
|
|
|
|
});
|
|
|
|
|
2022-01-18 08:44:46 +00:00
|
|
|
// Ren'Py Archive (Ren'Py: https://www.renpy.org/)
|
2021-04-14 09:08:18 +00:00
|
|
|
info.add("application/x-rpa", "rpa", |buf| {
|
|
|
|
buf.len() >= 34 && buf.starts_with(b"RPA-") && buf[7] == b' ' && buf[24] ==b' '
|
|
|
|
});
|
|
|
|
|
2022-01-18 08:44:46 +00:00
|
|
|
// Mach-O Binaries (The executable format used by macOS)
|
|
|
|
// my source for most of this info is this article: https://h3adsh0tzz.com/2020/01/macho-file-format/
|
|
|
|
info.add("application/x-mach-binary", "macho", |buf| {
|
|
|
|
// a 32-bit mach-o header occupies 28 bits of space, so any input smaller than that cannot be a mach-o
|
|
|
|
// binary, even if it starts with the magic numbers.
|
|
|
|
|
|
|
|
// java class files also start with 0xCAFEBABE. since infer doesn't support detecting these files,
|
|
|
|
// collisions are not an issue. if, however, infer does gain support for identifying java class files, the
|
|
|
|
// 0xCAFEBABE check should be removed, as java bytecode files are far more prevalent than 32-bit universal
|
|
|
|
// mach-o binaries [citation needed].
|
2022-01-22 17:18:25 +00:00
|
|
|
|
|
|
|
// check for magic numbers (0xFEEDCACF, 0xFEEDFACE, 0xCAFEBABE) in both big and little endian forms
|
2022-01-18 08:44:46 +00:00
|
|
|
buf.len() >= 28 && [b"\xFE\xED\xFA\xCF", b"\xFE\xED\xFA\xCE", b"\xCA\xFE\xBA\xBE", b"\xCF\xFA\xED\xFE",
|
|
|
|
b"\xCE\xFA\xED\xFE", b"\xBE\xBA\xFE\xCA"].iter().any(|magic_numbers| buf.starts_with(&magic_numbers[..]))
|
|
|
|
});
|
|
|
|
|
2021-09-29 13:40:57 +00:00
|
|
|
// info.add("application/x-msi", "msi", |buf| {
|
|
|
|
// TODO: find a way to detect MSI files properly - this just detects those weird windows OLE files and therefore
|
|
|
|
// also picks up on .doc files
|
|
|
|
// buf.starts_with(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1")
|
|
|
|
// });
|
2021-09-29 13:31:51 +00:00
|
|
|
|
2022-01-18 08:44:46 +00:00
|
|
|
// Scalable Vector Graphics
|
2021-02-27 02:13:57 +00:00
|
|
|
info.add("image/svg+xml", "svg", |buf| {
|
2021-09-22 15:19:15 +00:00
|
|
|
// before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish,
|
2022-01-22 17:18:25 +00:00
|
|
|
// by which i mean, starts with the pattern "\s*<".
|
2021-02-27 02:13:57 +00:00
|
|
|
|
|
|
|
// so, here comes our fancy pants """""SGML-ish validator"""""
|
|
|
|
for c in buf {
|
|
|
|
match c {
|
|
|
|
// whitespace (according to https://www.w3.org/TR/xml/#NT-S)
|
|
|
|
b'\t' | b'\r' | b'\n' | b'\x20' => continue,
|
|
|
|
b'<' => break,
|
|
|
|
_ => return false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// finally, to check whether or not the file is an SVG:
|
|
|
|
// - split the buffer up into chunks separated by the less than sign
|
|
|
|
// - check to see if this chunk starts with any of these identifiers:
|
|
|
|
let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"];
|
2022-01-18 08:44:46 +00:00
|
|
|
// - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to
|
|
|
|
// do the same
|
2021-02-27 02:13:57 +00:00
|
|
|
// - and finally, if none of the chunks match, we'll return false
|
|
|
|
|
|
|
|
// TODO: this is kind of messy, i'd like to clean it up somehow :(
|
|
|
|
buf
|
|
|
|
.split(|c| *c == b'<')
|
|
|
|
.any(|buf| identifiers.iter().any(|id| buf.starts_with(id)))
|
|
|
|
});
|
|
|
|
|
|
|
|
Self { db: info }
|
|
|
|
}
|
2021-02-14 18:58:57 +00:00
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
fn get_type(&self, data: &[u8]) -> Option<Mime> {
|
2021-10-05 15:41:08 +00:00
|
|
|
if let Some(mime) = self.db.get(data) {
|
|
|
|
match Mime::from_str(mime.mime_type()) {
|
|
|
|
Err(_) => None,
|
|
|
|
Ok(m) => Some(m),
|
|
|
|
}
|
|
|
|
} else { None }
|
2021-02-27 02:13:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
2021-10-05 15:30:13 +00:00
|
|
|
/// The [`xdg-mime`](https://docs.rs/xdg-mime/)-based implementation of [`MimeDb`].
|
2021-02-27 02:13:57 +00:00
|
|
|
pub struct XdgDb {
|
|
|
|
db: xdg_mime::SharedMimeInfo,
|
|
|
|
}
|
2021-02-14 18:58:57 +00:00
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
impl MimeDb for XdgDb {
|
|
|
|
fn init() -> Self {
|
|
|
|
Self { db: xdg_mime::SharedMimeInfo::new() }
|
|
|
|
}
|
2021-02-14 18:58:57 +00:00
|
|
|
|
2021-02-27 02:13:57 +00:00
|
|
|
fn get_type(&self, data: &[u8]) -> Option<Mime> {
|
2021-02-27 02:24:51 +00:00
|
|
|
self.db.get_mime_type_for_data(data).map(|m| m.0)
|
2021-02-27 02:13:57 +00:00
|
|
|
}
|
2021-02-14 18:58:57 +00:00
|
|
|
}
|
|
|
|
}
|
2021-02-18 09:48:38 +00:00
|
|
|
}
|