// SPDX-FileCopyrightText: 2021-2022 Lynnesbian // SPDX-License-Identifier: GPL-3.0-or-later //! Backend-neutral Mime database abstraction. use cfg_if::cfg_if; use mime::Mime; /// A thin wrapper around either [`Infer`] or [`xdg-mime::SharedMimeInfo`], depending on which [cargo features] /// fif was compiled with. By default, fif uses an [`Infer`]-based implementation on Windows, and an /// [`xdg-mime`]-based one everywhere else. This behaviour can be changed at compile time by using the aforementioned /// [cargo features]. /// /// [cargo features]: https://gitlab.com/Lynnesbian/fif/-/wikis/Cargo-Features /// [`Infer`]: https://docs.rs/infer/ /// [`xdg-mime::SharedMimeInfo`]: https://docs.rs/xdg-mime/0/xdg_mime/struct.SharedMimeInfo.html /// [`xdg-mime`]: https://docs.rs/xdg-mime/ pub trait MimeDb { /// Initialise the database. fn init() -> Self; /// Given a slice of bytes, returns the inferred MIME type, if any. fn get_type(&self, data: &[u8]) -> Option; } cfg_if! { if #[cfg(any(all(unix, feature = "infer-backend"), all(not(unix), not(feature = "xdg-mime-backend"))))] { use std::str::FromStr; /// The [`Infer`](https://docs.rs/infer/)-based implementation of [`MimeDb`]. pub struct InferDb { db: infer::Infer, } fn open_document_check(buf: &[u8], kind: &str) -> bool { let mime = format!("application/vnd.oasis.opendocument.{}", kind); let mime = mime.as_bytes(); buf.len() > 38 + mime.len() && buf.starts_with(b"PK\x03\x04") && buf[38..mime.len() + 38] == mime[..] } impl MimeDb for InferDb { fn init() -> Self { let mut info = infer::Infer::new(); // In addition to the file inferences provided by Infer, I've also added a few of my own below. Some of them // replace Infer's existing ones, some of them are less than perfect, and still others are for relatively // obscure formats, so I'm not really sure whether or not they should be contributed upstream. // OpenDocument Text (used by e.g. LibreOffice Writer) info.add("application/vnd.oasis.opendocument.text", "odt", |buf| { open_document_check(buf, "text") }); // OpenDocument Spreadsheet (LibreOffice Calc) info.add("application/vnd.oasis.opendocument.spreadsheet", "ods", |buf| { open_document_check(buf, "spreadsheet") }); // OpenOffice Presentation (LibreOffice Impress) info.add("application/vnd.oasis.opendocument.presentation", "odp", |buf| { open_document_check(buf, "presentation") }); // Ren'Py Archive (Ren'Py: https://www.renpy.org/) info.add("application/x-rpa", "rpa", |buf| { buf.len() >= 34 && buf.starts_with(b"RPA-") && buf[7] == b' ' && buf[24] ==b' ' }); // Mach-O Binaries (The executable format used by macOS) // my source for most of this info is this article: https://h3adsh0tzz.com/2020/01/macho-file-format/ // like linux's ELF binaries, mach-o binaries do not typically have an extension, but if they did, it'd // probably be something like ".macho", so, that'll do i guess. fif doesn't actually use the extensions // specified here anyway. info.add("application/x-mach-binary", "macho", |buf| { // a 32-bit mach-o header occupies 28 bits of space, so any input smaller than that cannot be a mach-o // binary, even if it starts with the magic numbers. // the three magic numbers that can appear are 0xFEEDFACF, 0xFEEDFACE, and 0xCAFEBABE. the code below // checks for all three of these, in both big and little endian order. // java class files also start with 0xCAFEBABE. since infer doesn't support detecting these files, // collisions are not an issue. if, however, infer does gain support for identifying java class files, the // 0xCAFEBABE check should be removed, as java bytecode files are far more prevalent than 32-bit universal // mach-o binaries [citation needed]. buf.len() >= 28 && [b"\xFE\xED\xFA\xCF", b"\xFE\xED\xFA\xCE", b"\xCA\xFE\xBA\xBE", b"\xCF\xFA\xED\xFE", b"\xCE\xFA\xED\xFE", b"\xBE\xBA\xFE\xCA"].iter().any(|magic_numbers| buf.starts_with(&magic_numbers[..])) }); // info.add("application/x-msi", "msi", |buf| { // TODO: find a way to detect MSI files properly - this just detects those weird windows OLE files and therefore // also picks up on .doc files // buf.starts_with(b"\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1") // }); // Scalable Vector Graphics info.add("image/svg+xml", "svg", |buf| { // before doing the moderately expensive SVG check, we should make sure that the input is actually SGML-ish, // by which i mean, starts with anywhere from zero to ∞-1 whitespace characters, and then a less than sign, // and then there's some other stuff we don't care about right now // so, here comes our fancy pants """""SGML-ish validator""""" for c in buf { match c { // whitespace (according to https://www.w3.org/TR/xml/#NT-S) b'\t' | b'\r' | b'\n' | b'\x20' => continue, b'<' => break, _ => return false, } } // finally, to check whether or not the file is an SVG: // - split the buffer up into chunks separated by the less than sign // - check to see if this chunk starts with any of these identifiers: let identifiers: Vec<&[u8]> = vec![b"svg", b"SVG", b"!DOCTYPE svg", b"!DOCTYPE SVG"]; // - if it does, the nested `any` will short circuit and immediately return true, causing the parent `any` to // do the same // - and finally, if none of the chunks match, we'll return false // TODO: this is kind of messy, i'd like to clean it up somehow :( buf .split(|c| *c == b'<') .any(|buf| identifiers.iter().any(|id| buf.starts_with(id))) }); Self { db: info } } fn get_type(&self, data: &[u8]) -> Option { if let Some(mime) = self.db.get(data) { match Mime::from_str(mime.mime_type()) { Err(_) => None, Ok(m) => Some(m), } } else { None } } } } else { /// The [`xdg-mime`](https://docs.rs/xdg-mime/)-based implementation of [`MimeDb`]. pub struct XdgDb { db: xdg_mime::SharedMimeInfo, } impl MimeDb for XdgDb { fn init() -> Self { Self { db: xdg_mime::SharedMimeInfo::new() } } fn get_type(&self, data: &[u8]) -> Option { self.db.get_mime_type_for_data(data).map(|m| m.0) } } } }