put everything in main(), zero mypy issues

This commit is contained in:
Lynne Megido 2020-10-26 22:41:01 +10:00
parent a6409c9c35
commit 7334f67e45
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90

348
bcao.py
View file

@ -34,6 +34,7 @@ from PIL import Image
fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav"] fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav"]
MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType] MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType]
MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict] MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict]
args: argparse.Namespace
class SongInfo: class SongInfo:
tag_lookup: Dict[str, Dict[str, str]] = { tag_lookup: Dict[str, Dict[str, str]] = {
@ -61,6 +62,7 @@ class SongInfo:
if fallbacks is None: if fallbacks is None:
die("Couldn't determine fallback tags!") die("Couldn't determine fallback tags!")
return # needed for mypy return # needed for mypy
# set default values for the tags, in case the file is missing any (or all!) of them # set default values for the tags, in case the file is missing any (or all!) of them
self.tags: Dict[str, str] = { self.tags: Dict[str, str] = {
"track": fallbacks.group("track"), "track": fallbacks.group("track"),
@ -210,203 +212,209 @@ def sanitise(in_str: str) -> str:
return re.sub(r"[?\\/:|*\"<>]", "_", in_str) return re.sub(r"[?\\/:|*\"<>]", "_", in_str)
return in_str return in_str
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(usage='%(prog)s zip [options]',
formatter_class=argparse.RawTextHelpFormatter,
description="Extracts the given zip file downloaded from Bandcamp and organises it.",
epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n"
"If the song is in any other format, %(prog)s will behave as though you passed '-c n', "
"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt "
"to parse the song's filenames to retrieve the artist, album, title, and track number.")
parser.add_argument('zip', help='The zip file to use.')
parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'],
help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s")
parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/',
help="The directory to organise the music into.\nDefault: %(default)s")
parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Disable non-error output and assume default artist name.')
parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false',
help="Don't replace NTFS-unsafe characters with underscores. Not recommended.")
parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300,
help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s")
args = parser.parse_args() def main():
# convert args.threshold to bytes # noinspection PyTypeChecker
args.threshold *= 1024 parser = argparse.ArgumentParser(usage='%(prog)s zip [options]',
formatter_class=argparse.RawTextHelpFormatter,
description="Extracts the given zip file downloaded from Bandcamp and organises it.",
epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n"
"If the song is in any other format, %(prog)s will behave as though you passed '-c n', "
"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt "
"to parse the song's filenames to retrieve the artist, album, title, and track number.")
parser.add_argument('zip', help='The zip file to use.')
parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'],
help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s")
parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/',
help="The directory to organise the music into.\nDefault: %(default)s")
parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Disable non-error output and assume default artist name.')
parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false',
help="Don't replace NTFS-unsafe characters with underscores. Not recommended.")
parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300,
help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s")
if not path.exists(args.zip): global args
die(f"Couldn't find {args.zip}.", 2) args = parser.parse_args()
# convert args.threshold to bytes
args.threshold *= 1024
log("Extracting...") if not path.exists(args.zip):
tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory() die(f"Couldn't find {args.zip}.", 2)
tmp: str = tmp_dir.name
cover: Optional[str] = None
song_names: List[str] = []
with ZipFile(args.zip, 'r') as zip_file: log("Extracting...")
for file in zip_file.namelist(): tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory()
if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file): tmp: str = tmp_dir.name
# bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3" cover: Optional[str] = None
# for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg" song_names: List[str] = []
# this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't
# be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't
# have the space for it).
song_names.append(file)
zip_file.extract(file, tmp)
elif cover is None and re.match(r"cover\.(jpe?g|png)", file):
cover = file
zip_file.extract(file, tmp)
# save the format of the songs (ogg, mp3, etc) with ZipFile(args.zip, 'r') as zip_file:
# we'll need this to know what metadata format we should write for file in zip_file.namelist():
song_format: str = path.splitext(song_names[0])[1][1:] if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file):
if song_format not in fully_supported: # bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3"
log(f"Format {song_format} is not fully supported - cover images will not be modified", 1) # for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg"
args.process_cover = 'n' # this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't
# be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't
# have the space for it).
song_names.append(file)
zip_file.extract(file, tmp)
elif cover is None and re.match(r"cover\.(jpe?g|png)", file):
cover = file
zip_file.extract(file, tmp)
if cover is None: # save the format of the songs (ogg, mp3, etc)
die("Unable to find cover image!") # we'll need this to know what metadata format we should write
# return # needed for mypy song_format: str = path.splitext(song_names[0])[1][1:]
if song_format not in fully_supported:
log(f"Format {song_format} is not fully supported - cover images will not be modified", 1)
args.process_cover = 'n'
if args.process_cover != 'n': if cover is None:
log("Resizing album art to embed in songs...") die("Unable to find cover image!")
with Image.open(str(Path(tmp, cover))) as image: return # needed for mypy
temp_cover: Path = Path(tmp, "cover-lq.jpg")
if image.mode in ["RGBA", "P"]: if args.process_cover != 'n':
# remove alpha channel log("Resizing album art to embed in songs...")
image = image.convert("RGB") with Image.open(str(Path(tmp, cover))) as image:
temp_cover: Path = Path(tmp, "cover-lq.jpg")
image.save(temp_cover, quality=85, optimize=True) if image.mode in ["RGBA", "P"]:
image_smol = image # remove alpha channel
image = image.convert("RGB")
while path.getsize(temp_cover) > args.threshold: image.save(temp_cover, quality=85, optimize=True)
# keep shrinking the image by 90% until it's less than {args.threshold} kilobytes image_smol = image
ratio = 0.9
if path.getsize(temp_cover) > args.threshold * 2: while path.getsize(temp_cover) > args.threshold:
# if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead # keep shrinking the image by 90% until it's less than {args.threshold} kilobytes
ratio = 0.8 ratio = 0.9
image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size]) if path.getsize(temp_cover) > args.threshold * 2:
image_smol.save(temp_cover, quality=85, optimize=True) # if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead
if image_smol.size[0] == 10: ratio = 0.8
# something very bad has happened here
die("Failed to resize image")
# read the image file to get its raw data image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size])
with open(temp_cover, 'r+b') as cover_file: image_smol.save(temp_cover, quality=85, optimize=True)
data = cover_file.read() if image_smol.size[0] == 10:
# something very bad has happened here
die("Failed to resize image")
# it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art # read the image file to get its raw data
# for it in mutagen. with open(temp_cover, 'r+b') as cover_file:
# vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to data = cover_file.read()
# make them work more like mutagen.
# cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc
# id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2").
# cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying
# that you have to specify the file encoding, and how you need both a type and a desc.
# m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty
# much exclusively in the realm of apple stuff.
# cover-annoy-o-meter: all you need is the file data and the format type.
if song_format in ["ogg", "flac"]: # it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art
# i hate this # for it in mutagen.
with Image.open(io.BytesIO(data)) as image: # vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to
embed_cover = Picture() # make them work more like mutagen.
embed_cover.data = data # cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc
embed_cover.type = PictureType.COVER_FRONT # id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2").
embed_cover.mime = "image/jpeg" # cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying
embed_cover.width = image.size[0] # that you have to specify the file encoding, and how you need both a type and a desc.
embed_cover.height = image.size[1] # m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty
embed_cover.depth = image.bits # much exclusively in the realm of apple stuff.
# cover-annoy-o-meter: all you need is the file data and the format type.
elif song_format == "mp3": if song_format in ["ogg", "flac"]:
# apparently APIC files get compressed on save if they are "large": # i hate this
# https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC with Image.open(io.BytesIO(data)) as image:
# i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how embed_cover = Picture()
# i can disable it, which kinda sucks... embed_cover.data = data
# if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to embed_cover.type = PictureType.COVER_FRONT
# below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above embed_cover.mime = "image/jpeg"
# 200 when saving MP3 files. embed_cover.width = image.size[0]
# the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags: embed_cover.height = image.size[1]
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265 embed_cover.depth = image.bits
# however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC:
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510
# given that this is the only reference to compression that i could find in the source code, and it says that
# ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird.
# it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain
# any method for JPEG compression, so i'm 99% sure the files won't be mangled.
embed_cover = APIC( elif song_format == "mp3":
encoding=3, # utf-8 # apparently APIC files get compressed on save if they are "large":
mime="image/jpeg", # https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC
type=PictureType.COVER_FRONT, # i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how
desc='cover', # i can disable it, which kinda sucks...
data=data # if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to
) # below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above
# 200 when saving MP3 files.
# the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags:
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265
# however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC:
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510
# given that this is the only reference to compression that i could find in the source code, and it says that
# ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird.
# it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain
# any method for JPEG compression, so i'm 99% sure the files won't be mangled.
elif song_format == "m4a": embed_cover = APIC(
embed_cover = MP4Cover( encoding=3, # utf-8
data=data, mime="image/jpeg",
imageformat=MP4Cover.FORMAT_JPEG type=PictureType.COVER_FRONT,
) desc='cover',
data=data
)
artists: List[str] = [] elif song_format == "m4a":
album: Optional[str] = None embed_cover = MP4Cover(
songs: Dict[str, str] = {} data=data,
zeroes = min(len(song_names), 2) imageformat=MP4Cover.FORMAT_JPEG
first_loop: bool = True )
for song_name in song_names: artists: List[str] = []
song = SongInfo(Path(tmp, song_name)) album: Optional[str] = None
if first_loop: songs: Dict[str, str] = {}
# the first item in the artists list should be the album artist zeroes = min(len(song_names), 2)
artists.append(song["album_artist"]) first_loop: bool = True
album = song["album"]
first_loop = False
# add the song's artist(s) to the list for song_name in song_names:
map(artists.append, song.list_tags["artist"]) song = SongInfo(Path(tmp, song_name))
songs[song_name] = song.get_target_name(zeroes) if first_loop:
# the first item in the artists list should be the album artist
artists.append(song["album_artist"])
album = song["album"]
first_loop = False
if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False): # add the song's artist(s) to the list
song.set_cover(embed_cover) map(artists.append, song.list_tags["artist"])
songs[song_name] = song.get_target_name(zeroes)
# remove duplicate artists if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False):
artists = list(dict.fromkeys(artists)) song.set_cover(embed_cover)
if len(artists) > 1 and "Various Artists" not in artists: # remove duplicate artists
artists.append("Various Artists") artists = list(dict.fromkeys(artists))
artist: Optional[str] = None if len(artists) > 1 and "Various Artists" not in artists:
while artist is None: artists.append("Various Artists")
log("Artist directory:")
for i, artist_name in enumerate(artists):
log(f"{i+1}) {artist_name}")
log(f"{len(artists) + 1}) Custom...")
user_choice: str = "1" if args.quiet else input("> ") artist: Optional[str] = None
if user_choice.isdecimal(): while artist is None:
choice: int = int(user_choice) log("Artist directory:")
if choice == len(artists) + 1: for i, artist_name in enumerate(artists):
log("Enter the name to use:") log(f"{i+1}) {artist_name}")
artist = input("> ") log(f"{len(artists) + 1}) Custom...")
user_choice: str = "1" if args.quiet else input("> ")
if user_choice.isdecimal():
choice: int = int(user_choice)
if choice == len(artists) + 1:
log("Enter the name to use:")
artist = input("> ")
else:
try:
artist = artists[choice - 1]
except KeyError:
log(f"Please choose a number between 1 and {len(artists) + 1}.")
else: else:
try: log(f"Please choose a number between 1 and {len(artists) + 1}")
artist = artists[choice - 1]
except KeyError:
log(f"Please choose a number between 1 and {len(artists) + 1}.")
else:
log(f"Please choose a number between 1 and {len(artists) + 1}")
destination: Path = Path(args.destination, artist, album) destination: Path = Path(args.destination, artist, album)
log(f"Moving files to \"{destination}\"...") log(f"Moving files to \"{destination}\"...")
os.makedirs(destination, exist_ok=True) os.makedirs(destination, exist_ok=True)
for source_name, dest_name in songs.items(): for source_name, dest_name in songs.items():
shutil.move(str(Path(tmp, source_name)), str(Path(destination, dest_name))) shutil.move(str(Path(tmp, source_name)), str(Path(destination, dest_name)))
shutil.move(str(Path(tmp, cover)), str(Path(destination, cover))) shutil.move(str(Path(tmp, cover)), str(Path(destination, cover)))
tmp_dir.cleanup() tmp_dir.cleanup()
log("Done!") log("Done!")
if __name__ == "__main__":
main()