From 5b95996d8c70fe44529c097ce39efa5b5327297d Mon Sep 17 00:00:00 2001 From: Lynnesbian Date: Sat, 17 Oct 2020 23:54:50 +1000 Subject: [PATCH] fairly major restructuring that should make future format support a lot easier, support for songs with partially or fully incomplete metadata --- bcao.py | 315 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 199 insertions(+), 116 deletions(-) diff --git a/bcao.py b/bcao.py index 0f00fed..156cdd5 100755 --- a/bcao.py +++ b/bcao.py @@ -6,6 +6,7 @@ # output: it organises it, adds cover art, puts it in the right place... import argparse +import io import os import re import sys @@ -30,22 +31,112 @@ from mutagen.id3 import APIC, PictureType from PIL import Image -vorbis_to_id3: Dict[str, str] = { - "track": "TRCK", - "artist": "TPE1", - "title": "TIT2", - "album": "TALB", - "album_artist": "TPE2" -} -vorbis_to_itunes: Dict[str, str] = { - "track": 'trkn', - "artist": '\xa9ART', - "title": '\xa9nam', - "album": '\xa9alb', - "album_artist": 'aART' -} -fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a"] +fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav"] MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType] +MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict] + +class SongInfo: + tag_lookup: Dict[str, Dict[str, str]] = { + "track": {"id3": "TRCK", "m4a": "trkn", "vorbis": "tracknumber"}, + "artist": {"id3": "TPE1", "m4a": "©ART", "vorbis": "artist"}, + "title": {"id3": "TIT2", "m4a": "©nam", "vorbis": "title"}, + "album": {"id3": "TALB", "m4a": "©alb", "vorbis": "album"}, + "album_artist": {"id3": "TPE2", "m4a": "aART", "vorbis": "albumartist"} + } + format_lookup: Dict[str, str] = { + "mp3": "id3", + "m4a": "m4a", + "ogg": "vorbis", + "flac": "vorbis" + } + + def __init__(self, file_name: Path): + self.m_file: MutagenFile = mutagen.File(file_name) + self.m_tags: MutagenTags = self.m_file.tags + self.file_name = str(file_name.name) + self.format = path.splitext(file_name)[1][1:] + self.fallback = False + + fallbacks = re.match(r"^(.+) - (.+) - (\d{2,}) (.+)\.(?:ogg|flac|alac|aiff|wav|mp3|m4a)$", self.file_name) + # set default values for the tags, in case the file is missing any (or all!) of them + self.tags: Dict[str, str] = { + "track": fallbacks.group(3), + "artist": fallbacks.group(1), + "title": fallbacks.group(4), + "album": fallbacks.group(2), + "album_artist": fallbacks.group(1) + } + # set list_tags to the default tags in list form + # i.e. for every tag, set list_tags[x] = [tags[x]] + self.list_tags: Dict[str, List[str]] = dict((x[0], [x[1]]) for x in self.tags.items()) + + if self.m_tags is None: + self.fallback = True + + else: + for standard_name, tag_set in self.tag_lookup.items(): + tag = tag_set[self.format_lookup[self.format]] + + if tag not in self.m_tags: + print(f"{tag} not in self.m_tags") + self.fallback = True + continue + + value_list = self.m_tags[tag] + if self.format == "m4a" and tag == "track": + # every tag in the MP4 file (from what i can tell) is a list + # this includes the track number tag, which is a tuple of ints in a list. + # because every other format is either a non-list, or a list of non-lists, we need to account for this case + # (a list of lists of non-lists) specially, by turning it into a list of non-lists. + value_list = value_list[0] + + if not isinstance(value_list, (list, tuple)): + value_list = [value_list] + + # convert the list of strings/ID3 frames/ints/whatevers to sanitised strings + value_list = [sanitise(str(val)) for val in value_list] + + self.tags[standard_name] = value_list[0] + self.list_tags[standard_name] = value_list + + def get_target_name(self, zeroes: int): + return f"{self.tags['track'].zfill(zeroes)} {self.tags['title']}.{self.format}" + + def has_cover(self): + if self.format == "ogg": + return "metadata_block_picture" in self.m_tags and len(self.m_tags["metadata_block_picture"]) != 0 + + if self.format == "flac": + return len(self.m_file.pictures) != 0 + + if self.format == "mp3": + apics: List[APIC] = self.m_tags.getall("APIC") + for apic in apics: + if apic.type == PictureType.COVER_FRONT: + return True + return False + + if self.format == "m4a": + return 'covr' in self.m_tags and len(self.m_tags['covr']) != 0 + + raise NotImplementedError("Song format not yet implemented.") + + def set_cover(self, embed_cover: Union[Picture, APIC, MP4Cover]): + # embed cover art + if self.format == "ogg": + self.m_tags["metadata_block_picture"] = [b64encode(embed_cover.write()).decode("ascii")] + elif self.format == "flac": + self.m_file.clear_pictures() + self.m_file.add_picture(embed_cover) + elif self.format == "mp3": + self.m_tags.add(embed_cover) + elif self.format == "m4a": + self.m_tags['covr'] = [embed_cover] + + self.m_file.save() + + def __getitem__(self, item): + return self.tags[item] def log(message: str, importance: int = 0): if not args.quiet or importance > 0: @@ -55,42 +146,42 @@ def die(message: str, code: int = 1): print(message) sys.exit(code) -def get_tag(mut_song: MutagenFile, tag: str, allow_list: bool = False, allow_sanitising: bool = True)\ - -> Union[str, List[str]]: - if isinstance(mut_song, MP3): - tag = vorbis_to_id3[tag] - tag_list = mut_song.tags.getall(tag) - - elif isinstance(mut_song, MP4): - # every tag in the MP4 file (from what i can tell) is a list - # this includes the track number tag, which is a list, containing a single tuple, containing two ints (track, total) - # unless we account for this, tag_list will be set to [(1, 5)], and then converted to a string, resulting in - # ['(1, 5)'], which (if not allow_list) will be returned as '(1, 5)', which is not exactly helpful. - tag = vorbis_to_itunes[tag] - if tag == 'trkn': - # mut_song[tag] == [(1, 5)] - # mut_song[tag][0] == (1, 5) - tag_list = mut_song[tag][0] - else: - tag_list = mut_song[tag] - - else: - if tag == "track": - tag = "tracknumber" - tag = tag.replace("_", "") - tag_list = mut_song[tag] if isinstance(mut_song[tag], list) else [mut_song[tag]] - - # convert the list of strings/ID3 frames/ints/whatevers to strings - tag_list = list(map(str, tag_list)) - - # sanitise everything - if allow_sanitising: - tag_list = [sanitise(tag) for tag in tag_list] - - if allow_list: - return tag_list - - return tag_list[0] +# def get_tag(mut_song: MutagenFile, tag: str, allow_list: bool = False, allow_sanitising: bool = True)\ +# -> Union[str, List[str]]: +# if isinstance(mut_song, MP3): +# tag = vorbis_to_id3[tag] +# tag_list = mut_song.tags.getall(tag) +# +# elif isinstance(mut_song, MP4): +# # every tag in the MP4 file (from what i can tell) is a list +# # this includes the track number tag, which is a list, containing a single tuple, containing two ints (track, total) +# # unless we account for this, tag_list will be set to [(1, 5)], and then converted to a string, resulting in +# # ['(1, 5)'], which (if not allow_list) will be returned as '(1, 5)', which is not exactly helpful. +# tag = vorbis_to_itunes[tag] +# if tag == 'trkn': +# # mut_song[tag] == [(1, 5)] +# # mut_song[tag][0] == (1, 5) +# tag_list = mut_song[tag][0] +# else: +# tag_list = mut_song[tag] +# +# else: +# if tag == "track": +# tag = "tracknumber" +# tag = tag.replace("_", "") +# tag_list = mut_song[tag] if isinstance(mut_song[tag], list) else [mut_song[tag]] +# +# # convert the list of strings/ID3 frames/ints/whatevers to strings +# tag_list = list(map(str, tag_list)) +# +# # sanitise everything +# if allow_sanitising: +# tag_list = [sanitise(tag) for tag in tag_list] +# +# if allow_list: +# return tag_list +# +# return tag_list[0] def has_cover(mut_song: MutagenFile): if isinstance(mut_song, OggVorbis): @@ -120,8 +211,10 @@ def sanitise(in_str: str) -> str: parser = argparse.ArgumentParser(usage='%(prog)s zip [options]', formatter_class=argparse.RawTextHelpFormatter, description="Extracts the given zip file downloaded from Bandcamp and organises it.", - epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\nIf " - "the song is in any other format, %(prog)s will behave as though you passed '-c n', but will otherwise work normally.") + epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n" +"If the song is in any other format, %(prog)s will behave as though you passed '-c n', " +"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt " +"to parse the song's filenames to retrieve the artist, album, title, and track number.") parser.add_argument('zip', help='The zip file to use.') parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'], help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s") @@ -149,7 +242,7 @@ song_names: List[str] = [] with ZipFile(args.zip, 'r') as zip_file: for file in zip_file.namelist(): - if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|opus|m4a|aac)$", file): + if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file): # bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3" # for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg" # this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't @@ -198,21 +291,21 @@ if args.process_cover != 'n': with open(temp_cover, 'r+b') as cover_file: data = cover_file.read() - with Image.open(temp_cover) as image: - # it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art - # for it in mutagen. - # vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to - # make them work more like mutagen. - # cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc - # id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2"). - # cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying - # that you have to specify the file encoding, and how you need both a type and a desc. - # m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty - # much exclusively in the realm of apple stuff. - # cover-annoy-o-meter: all you need is the file data and the format type. + # it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art + # for it in mutagen. + # vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to + # make them work more like mutagen. + # cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc + # id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2"). + # cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying + # that you have to specify the file encoding, and how you need both a type and a desc. + # m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty + # much exclusively in the realm of apple stuff. + # cover-annoy-o-meter: all you need is the file data and the format type. - if song_format in ["ogg", "flac"]: - # i hate this + if song_format in ["ogg", "flac"]: + # i hate this + with Image.open(io.BytesIO(data)) as image: embed_cover = Picture() embed_cover.data = data embed_cover.type = PictureType.COVER_FRONT @@ -220,35 +313,37 @@ if args.process_cover != 'n': embed_cover.width = image.size[0] embed_cover.height = image.size[1] embed_cover.depth = image.bits - elif song_format == "mp3": - # apparently APIC files get compressed on save if they are "large": - # https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC - # i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how - # i can disable it, which kinda sucks... - # if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to - # below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above - # 200 when saving MP3 files. - # the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags: - # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265 - # however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC: - # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510 - # given that this is the only reference to compression that i could find in the source code, and it says that - # ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird. - # it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain - # any method for JPEG compression, so i'm 99% sure the files won't be mangled. - embed_cover = APIC( - encoding=3, # utf-8 - mime="image/jpeg", - type=PictureType.COVER_FRONT, - desc='cover', - data=data - ) - elif song_format == "m4a": - embed_cover = MP4Cover( - data=data, - imageformat=MP4Cover.FORMAT_JPEG - ) + elif song_format == "mp3": + # apparently APIC files get compressed on save if they are "large": + # https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC + # i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how + # i can disable it, which kinda sucks... + # if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to + # below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above + # 200 when saving MP3 files. + # the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags: + # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265 + # however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC: + # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510 + # given that this is the only reference to compression that i could find in the source code, and it says that + # ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird. + # it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain + # any method for JPEG compression, so i'm 99% sure the files won't be mangled. + + embed_cover = APIC( + encoding=3, # utf-8 + mime="image/jpeg", + type=PictureType.COVER_FRONT, + desc='cover', + data=data + ) + + elif song_format == "m4a": + embed_cover = MP4Cover( + data=data, + imageformat=MP4Cover.FORMAT_JPEG + ) artists: List[str] = [] album: Optional[str] = None @@ -256,32 +351,20 @@ songs: Dict[str, str] = {} zeroes = min(len(song_names), 2) first_loop: bool = True -for song in song_names: - m: MutagenFile = mutagen.File(Path(tmp, song)) +for song_name in song_names: + song = SongInfo(Path(tmp, song_name)) if first_loop: # the first item in the artists list should be the album artist - artists.append(get_tag(m, "album_artist")) - album = get_tag(m, "album") + artists.append(song["album_artist"]) + album = song["album"] first_loop = False # add the song's artist(s) to the list - map(artists.append, get_tag(m, "artist", allow_list=True)) - songs[song] = f"{str(get_tag(m, 'track')).zfill(zeroes)} {get_tag(m, 'title')}.{song_format}" + map(artists.append, song.list_tags["artist"]) + songs[song_name] = song.get_target_name(zeroes) - if args.process_cover == 'a' or (args.process_cover == 'w' and has_cover(m) is False): - log("Embedding cover art...") - # embed cover art - if song_format == "ogg": - m["metadata_block_picture"] = [b64encode(embed_cover.write()).decode("ascii")] - elif song_format == "flac": - m.clear_pictures() - m.add_picture(embed_cover) - elif song_format == "mp3": - m.tags.add(embed_cover) - elif song_format == "m4a": - m['covr'] = [embed_cover] - - m.save() + if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False): + song.set_cover(embed_cover) # remove duplicate artists artists = list(dict.fromkeys(artists))