fairly major restructuring that should make future format support a lot easier, support for songs with partially or fully incomplete metadata

This commit is contained in:
Lynne Megido 2020-10-17 23:54:50 +10:00
parent f78c8d7c78
commit 5b95996d8c
Signed by: lynnesbian
GPG key ID: F0A184B5213D9F90

233
bcao.py
View file

@ -6,6 +6,7 @@
# output: it organises it, adds cover art, puts it in the right place... # output: it organises it, adds cover art, puts it in the right place...
import argparse import argparse
import io
import os import os
import re import re
import sys import sys
@ -30,22 +31,112 @@ from mutagen.id3 import APIC, PictureType
from PIL import Image from PIL import Image
vorbis_to_id3: Dict[str, str] = { fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav"]
"track": "TRCK",
"artist": "TPE1",
"title": "TIT2",
"album": "TALB",
"album_artist": "TPE2"
}
vorbis_to_itunes: Dict[str, str] = {
"track": 'trkn',
"artist": '\xa9ART',
"title": '\xa9nam',
"album": '\xa9alb',
"album_artist": 'aART'
}
fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a"]
MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType] MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType]
MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict]
class SongInfo:
tag_lookup: Dict[str, Dict[str, str]] = {
"track": {"id3": "TRCK", "m4a": "trkn", "vorbis": "tracknumber"},
"artist": {"id3": "TPE1", "m4a": "©ART", "vorbis": "artist"},
"title": {"id3": "TIT2", "m4a": "©nam", "vorbis": "title"},
"album": {"id3": "TALB", "m4a": "©alb", "vorbis": "album"},
"album_artist": {"id3": "TPE2", "m4a": "aART", "vorbis": "albumartist"}
}
format_lookup: Dict[str, str] = {
"mp3": "id3",
"m4a": "m4a",
"ogg": "vorbis",
"flac": "vorbis"
}
def __init__(self, file_name: Path):
self.m_file: MutagenFile = mutagen.File(file_name)
self.m_tags: MutagenTags = self.m_file.tags
self.file_name = str(file_name.name)
self.format = path.splitext(file_name)[1][1:]
self.fallback = False
fallbacks = re.match(r"^(.+) - (.+) - (\d{2,}) (.+)\.(?:ogg|flac|alac|aiff|wav|mp3|m4a)$", self.file_name)
# set default values for the tags, in case the file is missing any (or all!) of them
self.tags: Dict[str, str] = {
"track": fallbacks.group(3),
"artist": fallbacks.group(1),
"title": fallbacks.group(4),
"album": fallbacks.group(2),
"album_artist": fallbacks.group(1)
}
# set list_tags to the default tags in list form
# i.e. for every tag, set list_tags[x] = [tags[x]]
self.list_tags: Dict[str, List[str]] = dict((x[0], [x[1]]) for x in self.tags.items())
if self.m_tags is None:
self.fallback = True
else:
for standard_name, tag_set in self.tag_lookup.items():
tag = tag_set[self.format_lookup[self.format]]
if tag not in self.m_tags:
print(f"{tag} not in self.m_tags")
self.fallback = True
continue
value_list = self.m_tags[tag]
if self.format == "m4a" and tag == "track":
# every tag in the MP4 file (from what i can tell) is a list
# this includes the track number tag, which is a tuple of ints in a list.
# because every other format is either a non-list, or a list of non-lists, we need to account for this case
# (a list of lists of non-lists) specially, by turning it into a list of non-lists.
value_list = value_list[0]
if not isinstance(value_list, (list, tuple)):
value_list = [value_list]
# convert the list of strings/ID3 frames/ints/whatevers to sanitised strings
value_list = [sanitise(str(val)) for val in value_list]
self.tags[standard_name] = value_list[0]
self.list_tags[standard_name] = value_list
def get_target_name(self, zeroes: int):
return f"{self.tags['track'].zfill(zeroes)} {self.tags['title']}.{self.format}"
def has_cover(self):
if self.format == "ogg":
return "metadata_block_picture" in self.m_tags and len(self.m_tags["metadata_block_picture"]) != 0
if self.format == "flac":
return len(self.m_file.pictures) != 0
if self.format == "mp3":
apics: List[APIC] = self.m_tags.getall("APIC")
for apic in apics:
if apic.type == PictureType.COVER_FRONT:
return True
return False
if self.format == "m4a":
return 'covr' in self.m_tags and len(self.m_tags['covr']) != 0
raise NotImplementedError("Song format not yet implemented.")
def set_cover(self, embed_cover: Union[Picture, APIC, MP4Cover]):
# embed cover art
if self.format == "ogg":
self.m_tags["metadata_block_picture"] = [b64encode(embed_cover.write()).decode("ascii")]
elif self.format == "flac":
self.m_file.clear_pictures()
self.m_file.add_picture(embed_cover)
elif self.format == "mp3":
self.m_tags.add(embed_cover)
elif self.format == "m4a":
self.m_tags['covr'] = [embed_cover]
self.m_file.save()
def __getitem__(self, item):
return self.tags[item]
def log(message: str, importance: int = 0): def log(message: str, importance: int = 0):
if not args.quiet or importance > 0: if not args.quiet or importance > 0:
@ -55,42 +146,42 @@ def die(message: str, code: int = 1):
print(message) print(message)
sys.exit(code) sys.exit(code)
def get_tag(mut_song: MutagenFile, tag: str, allow_list: bool = False, allow_sanitising: bool = True)\ # def get_tag(mut_song: MutagenFile, tag: str, allow_list: bool = False, allow_sanitising: bool = True)\
-> Union[str, List[str]]: # -> Union[str, List[str]]:
if isinstance(mut_song, MP3): # if isinstance(mut_song, MP3):
tag = vorbis_to_id3[tag] # tag = vorbis_to_id3[tag]
tag_list = mut_song.tags.getall(tag) # tag_list = mut_song.tags.getall(tag)
#
elif isinstance(mut_song, MP4): # elif isinstance(mut_song, MP4):
# every tag in the MP4 file (from what i can tell) is a list # # every tag in the MP4 file (from what i can tell) is a list
# this includes the track number tag, which is a list, containing a single tuple, containing two ints (track, total) # # this includes the track number tag, which is a list, containing a single tuple, containing two ints (track, total)
# unless we account for this, tag_list will be set to [(1, 5)], and then converted to a string, resulting in # # unless we account for this, tag_list will be set to [(1, 5)], and then converted to a string, resulting in
# ['(1, 5)'], which (if not allow_list) will be returned as '(1, 5)', which is not exactly helpful. # # ['(1, 5)'], which (if not allow_list) will be returned as '(1, 5)', which is not exactly helpful.
tag = vorbis_to_itunes[tag] # tag = vorbis_to_itunes[tag]
if tag == 'trkn': # if tag == 'trkn':
# mut_song[tag] == [(1, 5)] # # mut_song[tag] == [(1, 5)]
# mut_song[tag][0] == (1, 5) # # mut_song[tag][0] == (1, 5)
tag_list = mut_song[tag][0] # tag_list = mut_song[tag][0]
else: # else:
tag_list = mut_song[tag] # tag_list = mut_song[tag]
#
else: # else:
if tag == "track": # if tag == "track":
tag = "tracknumber" # tag = "tracknumber"
tag = tag.replace("_", "") # tag = tag.replace("_", "")
tag_list = mut_song[tag] if isinstance(mut_song[tag], list) else [mut_song[tag]] # tag_list = mut_song[tag] if isinstance(mut_song[tag], list) else [mut_song[tag]]
#
# convert the list of strings/ID3 frames/ints/whatevers to strings # # convert the list of strings/ID3 frames/ints/whatevers to strings
tag_list = list(map(str, tag_list)) # tag_list = list(map(str, tag_list))
#
# sanitise everything # # sanitise everything
if allow_sanitising: # if allow_sanitising:
tag_list = [sanitise(tag) for tag in tag_list] # tag_list = [sanitise(tag) for tag in tag_list]
#
if allow_list: # if allow_list:
return tag_list # return tag_list
#
return tag_list[0] # return tag_list[0]
def has_cover(mut_song: MutagenFile): def has_cover(mut_song: MutagenFile):
if isinstance(mut_song, OggVorbis): if isinstance(mut_song, OggVorbis):
@ -120,8 +211,10 @@ def sanitise(in_str: str) -> str:
parser = argparse.ArgumentParser(usage='%(prog)s zip [options]', parser = argparse.ArgumentParser(usage='%(prog)s zip [options]',
formatter_class=argparse.RawTextHelpFormatter, formatter_class=argparse.RawTextHelpFormatter,
description="Extracts the given zip file downloaded from Bandcamp and organises it.", description="Extracts the given zip file downloaded from Bandcamp and organises it.",
epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\nIf " epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n"
"the song is in any other format, %(prog)s will behave as though you passed '-c n', but will otherwise work normally.") "If the song is in any other format, %(prog)s will behave as though you passed '-c n', "
"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt "
"to parse the song's filenames to retrieve the artist, album, title, and track number.")
parser.add_argument('zip', help='The zip file to use.') parser.add_argument('zip', help='The zip file to use.')
parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'], parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'],
help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s") help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s")
@ -149,7 +242,7 @@ song_names: List[str] = []
with ZipFile(args.zip, 'r') as zip_file: with ZipFile(args.zip, 'r') as zip_file:
for file in zip_file.namelist(): for file in zip_file.namelist():
if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|opus|m4a|aac)$", file): if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file):
# bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3" # bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3"
# for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg" # for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg"
# this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't # this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't
@ -198,7 +291,6 @@ if args.process_cover != 'n':
with open(temp_cover, 'r+b') as cover_file: with open(temp_cover, 'r+b') as cover_file:
data = cover_file.read() data = cover_file.read()
with Image.open(temp_cover) as image:
# it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art # it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art
# for it in mutagen. # for it in mutagen.
# vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to # vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to
@ -213,6 +305,7 @@ if args.process_cover != 'n':
if song_format in ["ogg", "flac"]: if song_format in ["ogg", "flac"]:
# i hate this # i hate this
with Image.open(io.BytesIO(data)) as image:
embed_cover = Picture() embed_cover = Picture()
embed_cover.data = data embed_cover.data = data
embed_cover.type = PictureType.COVER_FRONT embed_cover.type = PictureType.COVER_FRONT
@ -220,6 +313,7 @@ if args.process_cover != 'n':
embed_cover.width = image.size[0] embed_cover.width = image.size[0]
embed_cover.height = image.size[1] embed_cover.height = image.size[1]
embed_cover.depth = image.bits embed_cover.depth = image.bits
elif song_format == "mp3": elif song_format == "mp3":
# apparently APIC files get compressed on save if they are "large": # apparently APIC files get compressed on save if they are "large":
# https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC # https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC
@ -244,6 +338,7 @@ if args.process_cover != 'n':
desc='cover', desc='cover',
data=data data=data
) )
elif song_format == "m4a": elif song_format == "m4a":
embed_cover = MP4Cover( embed_cover = MP4Cover(
data=data, data=data,
@ -256,32 +351,20 @@ songs: Dict[str, str] = {}
zeroes = min(len(song_names), 2) zeroes = min(len(song_names), 2)
first_loop: bool = True first_loop: bool = True
for song in song_names: for song_name in song_names:
m: MutagenFile = mutagen.File(Path(tmp, song)) song = SongInfo(Path(tmp, song_name))
if first_loop: if first_loop:
# the first item in the artists list should be the album artist # the first item in the artists list should be the album artist
artists.append(get_tag(m, "album_artist")) artists.append(song["album_artist"])
album = get_tag(m, "album") album = song["album"]
first_loop = False first_loop = False
# add the song's artist(s) to the list # add the song's artist(s) to the list
map(artists.append, get_tag(m, "artist", allow_list=True)) map(artists.append, song.list_tags["artist"])
songs[song] = f"{str(get_tag(m, 'track')).zfill(zeroes)} {get_tag(m, 'title')}.{song_format}" songs[song_name] = song.get_target_name(zeroes)
if args.process_cover == 'a' or (args.process_cover == 'w' and has_cover(m) is False): if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False):
log("Embedding cover art...") song.set_cover(embed_cover)
# embed cover art
if song_format == "ogg":
m["metadata_block_picture"] = [b64encode(embed_cover.write()).decode("ascii")]
elif song_format == "flac":
m.clear_pictures()
m.add_picture(embed_cover)
elif song_format == "mp3":
m.tags.add(embed_cover)
elif song_format == "m4a":
m['covr'] = [embed_cover]
m.save()
# remove duplicate artists # remove duplicate artists
artists = list(dict.fromkeys(artists)) artists = list(dict.fromkeys(artists))