put everything in main(), zero mypy issues

This commit is contained in:
Lynne Megido 2020-10-26 22:41:01 +10:00
parent a6409c9c35
commit 7334f67e45
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90

348
bcao.py
View file

@ -34,6 +34,7 @@ from PIL import Image
fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav"]
MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType]
MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict]
args: argparse.Namespace
class SongInfo:
tag_lookup: Dict[str, Dict[str, str]] = {
@ -61,6 +62,7 @@ class SongInfo:
if fallbacks is None:
die("Couldn't determine fallback tags!")
return # needed for mypy
# set default values for the tags, in case the file is missing any (or all!) of them
self.tags: Dict[str, str] = {
"track": fallbacks.group("track"),
@ -210,203 +212,209 @@ def sanitise(in_str: str) -> str:
return re.sub(r"[?\\/:|*\"<>]", "_", in_str)
return in_str
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(usage='%(prog)s zip [options]',
formatter_class=argparse.RawTextHelpFormatter,
description="Extracts the given zip file downloaded from Bandcamp and organises it.",
epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n"
"If the song is in any other format, %(prog)s will behave as though you passed '-c n', "
"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt "
"to parse the song's filenames to retrieve the artist, album, title, and track number.")
parser.add_argument('zip', help='The zip file to use.')
parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'],
help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s")
parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/',
help="The directory to organise the music into.\nDefault: %(default)s")
parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Disable non-error output and assume default artist name.')
parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false',
help="Don't replace NTFS-unsafe characters with underscores. Not recommended.")
parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300,
help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s")
args = parser.parse_args()
# convert args.threshold to bytes
args.threshold *= 1024
def main():
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(usage='%(prog)s zip [options]',
formatter_class=argparse.RawTextHelpFormatter,
description="Extracts the given zip file downloaded from Bandcamp and organises it.",
epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n"
"If the song is in any other format, %(prog)s will behave as though you passed '-c n', "
"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt "
"to parse the song's filenames to retrieve the artist, album, title, and track number.")
parser.add_argument('zip', help='The zip file to use.')
parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'],
help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s")
parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/',
help="The directory to organise the music into.\nDefault: %(default)s")
parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Disable non-error output and assume default artist name.')
parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false',
help="Don't replace NTFS-unsafe characters with underscores. Not recommended.")
parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300,
help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s")
if not path.exists(args.zip):
die(f"Couldn't find {args.zip}.", 2)
global args
args = parser.parse_args()
# convert args.threshold to bytes
args.threshold *= 1024
log("Extracting...")
tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory()
tmp: str = tmp_dir.name
cover: Optional[str] = None
song_names: List[str] = []
if not path.exists(args.zip):
die(f"Couldn't find {args.zip}.", 2)
with ZipFile(args.zip, 'r') as zip_file:
for file in zip_file.namelist():
if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file):
# bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3"
# for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg"
# this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't
# be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't
# have the space for it).
song_names.append(file)
zip_file.extract(file, tmp)
elif cover is None and re.match(r"cover\.(jpe?g|png)", file):
cover = file
zip_file.extract(file, tmp)
log("Extracting...")
tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory()
tmp: str = tmp_dir.name
cover: Optional[str] = None
song_names: List[str] = []
# save the format of the songs (ogg, mp3, etc)
# we'll need this to know what metadata format we should write
song_format: str = path.splitext(song_names[0])[1][1:]
if song_format not in fully_supported:
log(f"Format {song_format} is not fully supported - cover images will not be modified", 1)
args.process_cover = 'n'
with ZipFile(args.zip, 'r') as zip_file:
for file in zip_file.namelist():
if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file):
# bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3"
# for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg"
# this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't
# be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't
# have the space for it).
song_names.append(file)
zip_file.extract(file, tmp)
elif cover is None and re.match(r"cover\.(jpe?g|png)", file):
cover = file
zip_file.extract(file, tmp)
if cover is None:
die("Unable to find cover image!")
# return # needed for mypy
# save the format of the songs (ogg, mp3, etc)
# we'll need this to know what metadata format we should write
song_format: str = path.splitext(song_names[0])[1][1:]
if song_format not in fully_supported:
log(f"Format {song_format} is not fully supported - cover images will not be modified", 1)
args.process_cover = 'n'
if args.process_cover != 'n':
log("Resizing album art to embed in songs...")
with Image.open(str(Path(tmp, cover))) as image:
temp_cover: Path = Path(tmp, "cover-lq.jpg")
if cover is None:
die("Unable to find cover image!")
return # needed for mypy
if image.mode in ["RGBA", "P"]:
# remove alpha channel
image = image.convert("RGB")
if args.process_cover != 'n':
log("Resizing album art to embed in songs...")
with Image.open(str(Path(tmp, cover))) as image:
temp_cover: Path = Path(tmp, "cover-lq.jpg")
image.save(temp_cover, quality=85, optimize=True)
image_smol = image
if image.mode in ["RGBA", "P"]:
# remove alpha channel
image = image.convert("RGB")
while path.getsize(temp_cover) > args.threshold:
# keep shrinking the image by 90% until it's less than {args.threshold} kilobytes
ratio = 0.9
image.save(temp_cover, quality=85, optimize=True)
image_smol = image
if path.getsize(temp_cover) > args.threshold * 2:
# if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead
ratio = 0.8
while path.getsize(temp_cover) > args.threshold:
# keep shrinking the image by 90% until it's less than {args.threshold} kilobytes
ratio = 0.9
image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size])
image_smol.save(temp_cover, quality=85, optimize=True)
if image_smol.size[0] == 10:
# something very bad has happened here
die("Failed to resize image")
if path.getsize(temp_cover) > args.threshold * 2:
# if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead
ratio = 0.8
# read the image file to get its raw data
with open(temp_cover, 'r+b') as cover_file:
data = cover_file.read()
image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size])
image_smol.save(temp_cover, quality=85, optimize=True)
if image_smol.size[0] == 10:
# something very bad has happened here
die("Failed to resize image")
# it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art
# for it in mutagen.
# vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to
# make them work more like mutagen.
# cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc
# id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2").
# cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying
# that you have to specify the file encoding, and how you need both a type and a desc.
# m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty
# much exclusively in the realm of apple stuff.
# cover-annoy-o-meter: all you need is the file data and the format type.
# read the image file to get its raw data
with open(temp_cover, 'r+b') as cover_file:
data = cover_file.read()
if song_format in ["ogg", "flac"]:
# i hate this
with Image.open(io.BytesIO(data)) as image:
embed_cover = Picture()
embed_cover.data = data
embed_cover.type = PictureType.COVER_FRONT
embed_cover.mime = "image/jpeg"
embed_cover.width = image.size[0]
embed_cover.height = image.size[1]
embed_cover.depth = image.bits
# it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art
# for it in mutagen.
# vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to
# make them work more like mutagen.
# cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc
# id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2").
# cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying
# that you have to specify the file encoding, and how you need both a type and a desc.
# m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty
# much exclusively in the realm of apple stuff.
# cover-annoy-o-meter: all you need is the file data and the format type.
elif song_format == "mp3":
# apparently APIC files get compressed on save if they are "large":
# https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC
# i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how
# i can disable it, which kinda sucks...
# if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to
# below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above
# 200 when saving MP3 files.
# the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags:
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265
# however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC:
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510
# given that this is the only reference to compression that i could find in the source code, and it says that
# ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird.
# it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain
# any method for JPEG compression, so i'm 99% sure the files won't be mangled.
if song_format in ["ogg", "flac"]:
# i hate this
with Image.open(io.BytesIO(data)) as image:
embed_cover = Picture()
embed_cover.data = data
embed_cover.type = PictureType.COVER_FRONT
embed_cover.mime = "image/jpeg"
embed_cover.width = image.size[0]
embed_cover.height = image.size[1]
embed_cover.depth = image.bits
embed_cover = APIC(
encoding=3, # utf-8
mime="image/jpeg",
type=PictureType.COVER_FRONT,
desc='cover',
data=data
)
elif song_format == "mp3":
# apparently APIC files get compressed on save if they are "large":
# https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC
# i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how
# i can disable it, which kinda sucks...
# if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to
# below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above
# 200 when saving MP3 files.
# the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags:
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265
# however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC:
# https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510
# given that this is the only reference to compression that i could find in the source code, and it says that
# ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird.
# it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain
# any method for JPEG compression, so i'm 99% sure the files won't be mangled.
elif song_format == "m4a":
embed_cover = MP4Cover(
data=data,
imageformat=MP4Cover.FORMAT_JPEG
)
embed_cover = APIC(
encoding=3, # utf-8
mime="image/jpeg",
type=PictureType.COVER_FRONT,
desc='cover',
data=data
)
artists: List[str] = []
album: Optional[str] = None
songs: Dict[str, str] = {}
zeroes = min(len(song_names), 2)
first_loop: bool = True
elif song_format == "m4a":
embed_cover = MP4Cover(
data=data,
imageformat=MP4Cover.FORMAT_JPEG
)
for song_name in song_names:
song = SongInfo(Path(tmp, song_name))
if first_loop:
# the first item in the artists list should be the album artist
artists.append(song["album_artist"])
album = song["album"]
first_loop = False
artists: List[str] = []
album: Optional[str] = None
songs: Dict[str, str] = {}
zeroes = min(len(song_names), 2)
first_loop: bool = True
# add the song's artist(s) to the list
map(artists.append, song.list_tags["artist"])
songs[song_name] = song.get_target_name(zeroes)
for song_name in song_names:
song = SongInfo(Path(tmp, song_name))
if first_loop:
# the first item in the artists list should be the album artist
artists.append(song["album_artist"])
album = song["album"]
first_loop = False
if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False):
song.set_cover(embed_cover)
# add the song's artist(s) to the list
map(artists.append, song.list_tags["artist"])
songs[song_name] = song.get_target_name(zeroes)
# remove duplicate artists
artists = list(dict.fromkeys(artists))
if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False):
song.set_cover(embed_cover)
if len(artists) > 1 and "Various Artists" not in artists:
artists.append("Various Artists")
# remove duplicate artists
artists = list(dict.fromkeys(artists))
artist: Optional[str] = None
while artist is None:
log("Artist directory:")
for i, artist_name in enumerate(artists):
log(f"{i+1}) {artist_name}")
log(f"{len(artists) + 1}) Custom...")
if len(artists) > 1 and "Various Artists" not in artists:
artists.append("Various Artists")
user_choice: str = "1" if args.quiet else input("> ")
if user_choice.isdecimal():
choice: int = int(user_choice)
if choice == len(artists) + 1:
log("Enter the name to use:")
artist = input("> ")
artist: Optional[str] = None
while artist is None:
log("Artist directory:")
for i, artist_name in enumerate(artists):
log(f"{i+1}) {artist_name}")
log(f"{len(artists) + 1}) Custom...")
user_choice: str = "1" if args.quiet else input("> ")
if user_choice.isdecimal():
choice: int = int(user_choice)
if choice == len(artists) + 1:
log("Enter the name to use:")
artist = input("> ")
else:
try:
artist = artists[choice - 1]
except KeyError:
log(f"Please choose a number between 1 and {len(artists) + 1}.")
else:
try:
artist = artists[choice - 1]
except KeyError:
log(f"Please choose a number between 1 and {len(artists) + 1}.")
else:
log(f"Please choose a number between 1 and {len(artists) + 1}")
log(f"Please choose a number between 1 and {len(artists) + 1}")
destination: Path = Path(args.destination, artist, album)
log(f"Moving files to \"{destination}\"...")
os.makedirs(destination, exist_ok=True)
destination: Path = Path(args.destination, artist, album)
log(f"Moving files to \"{destination}\"...")
os.makedirs(destination, exist_ok=True)
for source_name, dest_name in songs.items():
shutil.move(str(Path(tmp, source_name)), str(Path(destination, dest_name)))
shutil.move(str(Path(tmp, cover)), str(Path(destination, cover)))
for source_name, dest_name in songs.items():
shutil.move(str(Path(tmp, source_name)), str(Path(destination, dest_name)))
shutil.move(str(Path(tmp, cover)), str(Path(destination, cover)))
tmp_dir.cleanup()
log("Done!")
tmp_dir.cleanup()
log("Done!")
if __name__ == "__main__":
main()