lynnesbian/fif - fif

fif

101 lines

4.9 KiB

Rust

Raw Normal View History

implemented parallel functionality! 0u0 2021-02-05 12:45:51 +00:00			`use std::fs::File;`
begone, proc_macro! removes a bunch of dependencies that came through darling. 2021-02-14 14:30:12 +00:00			`use std::io;`
first read a smol chunk, if we can't ID the file, read BUF_SIZE 2021-02-06 11:48:31 +00:00			`use std::io::{Read, Seek, SeekFrom};`
begone, proc_macro! removes a bunch of dependencies that came through darling. 2021-02-14 14:30:12 +00:00			`use std::path::Path;`
quick hack to work around non-existent document support ;3 2021-02-21 16:32:38 +00:00			`use std::str::FromStr;`
begone, proc_macro! removes a bunch of dependencies that came through darling. 2021-02-14 14:30:12 +00:00
			`use cached::cached;`
			`use mime_guess::Mime;`
implemented parallel functionality! 0u0 2021-02-05 12:45:51 +00:00			`use smartstring::alias::String;`
support using either infer or xdg_mime for mime detection i guess we can build for windows now 2021-02-14 18:58:57 +00:00
			`use crate::mimedb::MimeDb;`
begone, proc_macro! removes a bunch of dependencies that came through darling. 2021-02-14 14:30:12 +00:00
better document support, print version properly, display version 2021-02-21 22:46:17 +00:00			`// rather than reading once into a large buffer, it tends to be faster to first try identifying the file from a small`
			`// chunk read from the top, and then proceeding with the large buffer. many file formats can be easily identified by`
			`// the first 128 bytes. of course, not all formats can, and some (OOXML...) require reading a long ways in.`
SVG support, better and more comments, minor code cleanup 2021-02-21 11:30:58 +00:00
better document support, print version properly, display version 2021-02-21 22:46:17 +00:00			`const INITIAL_BUF_SIZE: usize = 128;`
			`const BUF_SIZE: usize = 4096;`
work toward parallelisation 2021-02-05 09:15:12 +00:00
added rustfmt.toml, ran rustfmt 2021-02-18 09:48:38 +00:00			`pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {`
SVG support, better and more comments, minor code cleanup 2021-02-21 11:30:58 +00:00			`let mut buffer = [0; INITIAL_BUF_SIZE];`
implemented parallel functionality! 0u0 2021-02-05 12:45:51 +00:00			`let mut file = File::open(path)?;`
work toward parallelisation 2021-02-05 09:15:12 +00:00
first read a smol chunk, if we can't ID the file, read BUF_SIZE 2021-02-06 11:48:31 +00:00			`// this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the`
implemented parallel functionality! 0u0 2021-02-05 12:45:51 +00:00			`// first few bytes for the purpose of mime sniffing`
			`#[allow(clippy::unused_io_amount)]`
thanks rustfmt 2021-02-21 14:15:09 +00:00			`file.read(&mut buffer)?;`
first read a smol chunk, if we can't ID the file, read BUF_SIZE 2021-02-06 11:48:31 +00:00
better document support, print version properly, display version 2021-02-21 22:46:17 +00:00			`let r = db.get_type(&buffer).filter(\|mime\|`
			`// some mime types should be investigated further, reading up to BUF_SIZE even if they've been determined already`
			`// one such type is XML - there's many more specific types that can be determined by reading further (such as SVG)`
			`mime != &mime_guess::mime::TEXT_XML`
			`// another is ZIP - many file formats (DOCX, ODT, JAR...) are just ZIP files with particular data structures.`
			`// determining that a file is in one of the MS office formats in particular requires looking quite far into the`
			`// file.`
			`&& mime != &Mime::from_str("application/zip").unwrap());`
SVG support, better and more comments, minor code cleanup 2021-02-21 11:30:58 +00:00
first read a smol chunk, if we can't ID the file, read BUF_SIZE 2021-02-06 11:48:31 +00:00			`if r.is_some() {`
			`return Ok(r);`
			`}`

SVG support, better and more comments, minor code cleanup 2021-02-21 11:30:58 +00:00			`// attempt to read up to the BUF_SIZE bytes of the file.`
better document support, print version properly, display version 2021-02-21 22:46:17 +00:00			`// we've already read the first 128 bytes into a buffer, but i can't see an obvious way to reuse them in a way that's`
			`// faster than simply moving the seek position back to the start of the file and re-reading the whole BUF_SIZE bytes.`
			`// for example, reading (BUF_SIZE - INITIAL_BUF_SIZE) bytes, then concatenating the original INITIAL_BUF_SIZE buffer`
			`// with this new one would presumably be faster - but it's not. i think it's more expensive to create the iterator,`
			`// collect the contents, etc. i'll have to look into this more. i don't at all doubt that there IS a way to do this`
			`// efficiently, and i can think of a way in principle, but i'm not sure how to express it in a way that is both`
			`// idiomatic/safe and fast.`
first read a smol chunk, if we can't ID the file, read BUF_SIZE 2021-02-06 11:48:31 +00:00			`let mut buffer = [0; BUF_SIZE];`
			`file.seek(SeekFrom::Start(0))?;`
			`file.read(&mut buffer)?;`
support using either infer or xdg_mime for mime detection i guess we can build for windows now 2021-02-14 18:58:57 +00:00			`Ok(db.get_type(&buffer))`
implemented parallel functionality! 0u0 2021-02-05 12:45:51 +00:00			`}`

			`// TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime`
			`// object, pass a hash of it?`
begone, proc_macro! removes a bunch of dependencies that came through darling. 2021-02-14 14:30:12 +00:00			`cached! {`
			`MIMEXT;`
			`fn mime_extension_lookup(mime: Mime) -> Option<Vec<String>> = {`
SVG support, better and more comments, minor code cleanup 2021-02-21 11:30:58 +00:00
			// match on the mime's `essence_str` rather than the mime itself - mime_guess::get_mime_extensions ignores the type
			`// suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. passing the essence_str`
			`// (which includes the suffix) fixes this.`
			`match mime_guess::get_mime_extensions_str(mime.essence_str()) {`
			`Some(exts) => {`
			`let possible_exts: Vec<String> = exts.iter().map(\|e\| String::from(*e)).collect();`

			`Some(if mime == mime_guess::mime::IMAGE_JPEG {`
			`// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are`
			`// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can`
			`// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.`
			`[vec![String::from("jpg")], possible_exts].concat()`

			`} else if mime == mime_guess::mime::TEXT_XML {`
			`// a somewhat similar case arises with XML files - the first suggested extension is "addin", when it should`
			`// (in my opinion) be "xml".`
			`// there's also another problem: SVG files can easily be misidentified as XML files, because they usually`
			`// are valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read`
			`// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"`
			`// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered`
			`// to have valid extensions.`
			`[vec![String::from("xml"), String::from("svg")], possible_exts].concat()`

better document support, print version properly, display version 2021-02-21 22:46:17 +00:00			`} else if mime == Mime::from_str("application/msword").unwrap() {`
			`// classic office files considered harmful`
			`vec![String::from("doc"), String::from("xls"), String::from("ppt")]`
use xdg-mime by default on linux, infer elsewhere 2021-02-27 02:02:49 +00:00
			`} else if mime == Mime::from_str("application/zip").unwrap() {`
			`// neither xdg-mime nor infer seem to be able to detect office XML files properly...`
			`[vec![String::from("zip"), String::from("docx"), String::from("xlsx"), String::from("pptx")], possible_exts].concat()`

SVG support, better and more comments, minor code cleanup 2021-02-21 11:30:58 +00:00			`} else {`
			`possible_exts`
			`})`
			`},`
begone, proc_macro! removes a bunch of dependencies that came through darling. 2021-02-14 14:30:12 +00:00			`None => None`
			`}`
implemented parallel functionality! 0u0 2021-02-05 12:45:51 +00:00			`}`
			`}`

101 lines 4.9 KiB Rust Raw Normal View History Unescape Escape

101 lines

4.9 KiB

Rust

Raw Normal View History