use std::fs::File;
use std::io;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;

use cached::cached;
use mime_guess::Mime;
use smartstring::alias::String;

use crate::mimedb::MimeDb;

// use log::{debug, warn};

// from looking at the files in https://github.com/bojand/infer/tree/master/src/matchers, the format with the largest
// buffer size requirement for identification requires 262 bytes, and the next largest buffer necessary is only 131
// bytes. as only two formats need more than 128 bytes, it would be fairly reasonable to only read 128 bytes.
// unfortunately, the format that requires 262 bytes for identification is tar, an extremely popular format (in the *nix
// world, at least). however, tar files almost always appear wrapped in other formats (.tar.gz, .tar.zst, etc) anyway,
// so maybe it's fine...? maybe this should be configurable by the user? i don't know.
// empirical testing (or rather, starting from 256 and incrementing until it worked) reveals that xdg-mime requires
// at least 265 bytes to identify a tar file.

// additionally, since many formats can by identified with ≤64 bytes, it's worth reading 64 bytes, checking for the mime
// type, and then reading the full 512 bytes if necessary. in most cases, this will end up being faster on the whole,
// even though two reads are needed for certain formats, unless the directory being scanned is predominantly made up of
// such formats.

const INITIAL_BUF_SIZE: usize = 64;
const BUF_SIZE: usize = 512;

pub fn mime_type<T: MimeDb>(db: &T, path: &Path) -> io::Result<Option<Mime>> {
	let mut buffer = [0; INITIAL_BUF_SIZE];
	let mut file = File::open(path)?;

	// this lint can be ignored: it's okay if the file isn't long enough to fill the buffer, as we only care about the
	// first few bytes for the purpose of mime sniffing
	#[allow(clippy::unused_io_amount)]
		file.read(&mut buffer)?;

	let r = db.get_type(&buffer);

	if r.is_some() {
		return Ok(r);
	}

	// attempt to read up to the BUF_SIZE bytes of the file.
	// we've already read the first 64 bytes into a buffer, but i can't see an obvious way to reuse those 64 bytes that's
	// faster than simply moving the seek position back to the start of the file and re-reading the whole 512 bytes.
	// for example, starting with a buffer of 64 bytes, then creating a new 512 byte buffer from the contents of the first
	// buffer with (512 - 64) blank bytes, then finally reading the rest, is much slower than simply reading the file
	// twice. i don't at all doubt that there IS a way to do this efficiently, and i can think of a way in principle, but
	// i'm not sure how to express it in a way that is both idiomatic/safe and fast.
	let mut buffer = [0; BUF_SIZE];
	file.seek(SeekFrom::Start(0))?;
	file.read(&mut buffer)?;
	Ok(db.get_type(&buffer))
}

// TODO: avoid cloning mime if possible, although i don't really see how it would be - maybe instead of passing the mime
// object, pass a hash of it?
cached! {
	MIMEXT;
	fn mime_extension_lookup(mime: Mime) -> Option<Vec<String>> = {

		// match on the mime's `essence_str` rather than the mime itself - mime_guess::get_mime_extensions ignores the type
		// suffix, treating "image/svg+xml" as "image/svg", and thus fails to find any extensions. passing the essence_str
		// (which includes the suffix) fixes this.
		match mime_guess::get_mime_extensions_str(mime.essence_str()) {
			Some(exts) => {
				let possible_exts: Vec<String> = exts.iter().map(|e| String::from(*e)).collect();

				Some(if mime == mime_guess::mime::IMAGE_JPEG {
					// possible_exts starts with "jpe", because it's alphabetically before "jpeg" and "jpg". however, jpg/jpeg are
					// far more common than jpe, so it makes sense to suggest one of those rather than jpe. to do this, we can
					// add "jpg" to the start of the possible_exts list, ensuring that it will be the extension suggested by fif.
					[vec![String::from("jpg")], possible_exts].concat()

				} else if mime == mime_guess::mime::TEXT_XML {
					// a somewhat similar case arises with XML files - the first suggested extension is "addin", when it should
					// (in my opinion) be "xml".
					// there's also another problem: SVG files can easily be misidentified as XML files, because they usually
					// *are* valid XML - the more whitespace and comments an SVG file begins with, the more bytes must be read
					// before it's possible to determine that it's an SVG rather than an XML file. to "fix" this, we can add "svg"
					// as a valid extension for XML files, ensuring that SVG files misidentified as XML will still be considered
					// to have valid extensions.
					[vec![String::from("xml"), String::from("svg")], possible_exts].concat()

				} else {
					possible_exts
				})
			},
			None => None
		}
	}
}