Normal file
Normal file
@ -0,0 +1,26 @@
#!/usr/bin/env python3
import re
import mutagen
from mutagen.flac import FLAC
from mutagen.mp3 import MP3
# noinspection PyProtectedMember
from mutagen.id3 import ID3Tags
# noinspection PyProtectedMember
from mutagen.mp4 import Tags
from mutagen.oggvorbis import OggVorbis
from typing import Dict, List, Union
format_lookup: Dict[str, str] = {
"mp3": "id3",
"m4a": "m4a",
"ogg": "vorbis",
"flac": "vorbis",
"wav": "id3",
"aiff": "id3"
fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav", "aiff"]
MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType]
MutagenTags = Union[ID3Tags, Tags, mutagen.oggvorbis.OggVCommentDict]
sanitisation_regex = re.compile(r"[?\\/:|*\"<>]")
@ -5,6 +5,14 @@
# input: a .zip from bandcamp
# output: it organises it, adds cover art, puts it in the right place...
# pycharm tells me some of these classes shouldn't be imported because they're not declared in __all__.
# however, the mutagen docs show example code where someone creates a mutagen.flac.Picture by referring to it as
# Picture(), implying that they had imported mutagen.flac.Picture, and therefore i'm right and the computer is WRONG
from . import *
from bcao.song_info import SongInfo
import argparse
import io
import os
@ -13,182 +21,21 @@ import sys
import tempfile
import shutil
from os import path
from base64 import b64encode
from zipfile import ZipFile
from typing import Optional, List, Dict
from pathlib import Path
from typing import Optional, Union, List, Dict
# pycharm tells me some of these classes shouldn't be imported because they're not declared in __all__.
# however, the mutagen docs show example code where someone creates a mutagen.flac.Picture by referring to it as
# Picture(), implying that they had imported mutagen.flac.Picture, and therefore i'm right and the computer is WRONG
import mutagen
# noinspection PyProtectedMember
from mutagen.flac import Picture, FLAC
from mutagen.oggvorbis import OggVorbis
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4, MP4Cover
from mutagen.flac import Picture
from mutagen.mp4 import MP4Cover
# noinspection PyProtectedMember
from mutagen.id3 import APIC, PictureType, Frame, TRCK, TPE1, TIT2, TALB, TPE2
from mutagen.id3 import APIC, PictureType
from PIL import Image
fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav", "aiff"]
MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType]
MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict]
args: argparse.Namespace
tmp_dir: tempfile.TemporaryDirectory # type: ignore
format_lookup: Dict[str, str] = {
"mp3": "id3",
"m4a": "m4a",
"ogg": "vorbis",
"flac": "vorbis",
"wav": "id3",
"aiff": "id3"
class SongInfo:
tag_lookup: Dict[str, Dict[str, str]] = {
"track": {"id3": "TRCK", "m4a": "trkn", "vorbis": "tracknumber"},
"artist": {"id3": "TPE1", "m4a": "©ART", "vorbis": "artist"},
"title": {"id3": "TIT2", "m4a": "©nam", "vorbis": "title"},
"album": {"id3": "TALB", "m4a": "©alb", "vorbis": "album"},
"album_artist": {"id3": "TPE2", "m4a": "aART", "vorbis": "albumartist"}
def __init__(self, file_name: Path):
self.m_file: MutagenFile = mutagen.File(file_name)
self.m_tags: MutagenTags = self.m_file.tags
self.file_name = str(
self.format = path.splitext(file_name)[1][1:]
self.fallback = False
if self.format not in format_lookup:
raise ValueError(f"Unsupported file type: {self.format}")
fallbacks = re.match(
r"^(?P<artist>.+) - (?P<album>.+) - (?P<track>\d{2,}) (?P<title>.+)\.(?:ogg|flac|aiff|wav|mp3|m4a)$",
if fallbacks is None:
die("Couldn't determine fallback tags!")
return # needed for mypy
# set default values for the tags, in case the file is missing any (or all!) of them
self.tags: Dict[str, str] = {
"track": str(int("track"))), # convert to int and str again to turn e.g. "01" into "1"
# set list_tags to the default tags in list form
# i.e. for every tag, set list_tags[x] = [tags[x]]
self.list_tags: Dict[str, List[str]] = dict((x[0], [x[1]]) for x in self.tags.items())
if self.m_tags is None:
# file has no tags
# generate empty tags
self.m_tags = self.m_file.tags
self.fallback = True
# write fallback tags to file
for standard_name, tag_set in self.tag_lookup.items():
tag = tag_set[format_lookup[self.format]]
self.m_tags[tag] = self.new_id3_tag(standard_name, self.tags[standard_name])
for standard_name, tag_set in self.tag_lookup.items():
tag = tag_set[format_lookup[self.format]]
if tag not in self.m_tags:
print(f"{tag} not in self.m_tags")
self.fallback = True
value_list = self.m_tags[tag]
if self.format == "m4a" and standard_name == "track":
# every tag in the MP4 file (from what i can tell) is a list
# this includes the track number tag, which is a tuple of ints in a list.
# because every other format is either a non-list, or a list of non-lists, we need to account for this case
# (a list of lists of non-lists) specially, by turning it into a list of non-lists.
value_list = value_list[0]
if not isinstance(value_list, (list, tuple)):
value_list = [value_list]
# convert the list of strings/ID3 frames/ints/whatevers to sanitised strings
value_list = [sanitise(str(val)) for val in value_list]
self.tags[standard_name] = value_list[0]
self.list_tags[standard_name] = value_list
def new_id3_tag(tag: str, value: str) -> Frame:
if tag == "track":
return TRCK(encoding=3, text=value)
elif tag == "artist":
return TPE1(encoding=3, text=value)
elif tag == "title":
return TIT2(encoding=3, text=value)
elif tag == "album":
return TALB(encoding=3, text=value)
elif tag == "album_artist":
return TPE2(encoding=3, text=value)
raise ValueError(f"Unknown tag type {tag}!")
def get_target_name(self, zeroes: int) -> str:
return f"{self.tags['track'].zfill(zeroes)} {self.tags['title']}.{self.format}"
def has_cover(self) -> bool:
if self.format == "flac":
# needs to be handled separately from ogg, as it doesn't use the vorbis tags for cover art for whatever reason
return len( != 0
if format_lookup[self.format] == "vorbis":
return "metadata_block_picture" in self.m_tags and len(self.m_tags["metadata_block_picture"]) != 0
if format_lookup[self.format] == "id3":
apics: List[APIC] = self.m_tags.getall("APIC")
for apic in apics:
if apic.type == PictureType.COVER_FRONT:
return True
return False
if format_lookup[self.format] == "m4a":
return 'covr' in self.m_tags and len(self.m_tags['covr']) != 0
raise NotImplementedError("Song format not yet implemented.")
def set_cover(self, to_embed: Union[Picture, APIC, MP4Cover]) -> None:
# embed cover art
if self.format == "flac":
elif format_lookup[self.format] == "vorbis":
self.m_tags["metadata_block_picture"] = [b64encode(to_embed.write()).decode("ascii")]
elif format_lookup[self.format] == "id3":
elif format_lookup[self.format] == "m4a":
self.m_tags['covr'] = [to_embed]
def __getitem__(self, item: str) -> str:
return self.tags[item]
def log(message: str, importance: int = 0) -> None:
if not args.quiet or importance > 0:
@ -202,10 +49,9 @@ def die(message: str, code: int = 1) -> None:
def sanitise(in_str: str) -> str:
if args.sanitise:
return re.sub(r"[?\\/:|*\"<>]", "_", in_str)
return re.sub(sanitisation_regex, "_", in_str)
return in_str
def main() -> None:
global args, tmp_dir
Normal file
Normal file
@ -0,0 +1,157 @@
from . import *
import re
from os import path
from typing import Union, List, Dict
from pathlib import Path
from base64 import b64encode
import mutagen
# noinspection PyProtectedMember
from mutagen.flac import Picture
from mutagen.mp4 import MP4Cover
# noinspection PyProtectedMember
from mutagen.id3 import APIC, PictureType, Frame, TRCK, TPE1, TIT2, TALB, TPE2
class FallbackError(Exception):
class SongInfo:
tag_lookup: Dict[str, Dict[str, str]] = {
"track": {"id3": "TRCK", "m4a": "trkn", "vorbis": "tracknumber"},
"artist": {"id3": "TPE1", "m4a": "©ART", "vorbis": "artist"},
"title": {"id3": "TIT2", "m4a": "©nam", "vorbis": "title"},
"album": {"id3": "TALB", "m4a": "©alb", "vorbis": "album"},
"album_artist": {"id3": "TPE2", "m4a": "aART", "vorbis": "albumartist"}
def __init__(self, file_name: Path):
self.m_file: MutagenFile = mutagen.File(file_name)
self.m_tags: MutagenTags = self.m_file.tags
self.file_name = str(
self.format = path.splitext(file_name)[1][1:]
self.fallback = False
if self.format not in format_lookup:
raise ValueError(f"Unsupported file type: {self.format}")
fallbacks = re.match(
r"^(?P<artist>.+) - (?P<album>.+) - (?P<track>\d{2,}) (?P<title>.+)\.(?:ogg|flac|aiff|wav|mp3|m4a)$",
if fallbacks is None:
raise FallbackError("Couldn't determine fallback tags!")
# set default values for the tags, in case the file is missing any (or all!) of them
self.tags: Dict[str, str] = {
"track": str(int("track"))), # convert to int and str again to turn e.g. "01" into "1"
# set list_tags to the default tags in list form
# i.e. for every tag, set list_tags[x] = [tags[x]]
self.list_tags: Dict[str, List[str]] = dict((x[0], [x[1]]) for x in self.tags.items())
if self.m_tags is None:
# file has no tags
# generate empty tags
self.m_tags = self.m_file.tags
self.fallback = True
# write fallback tags to file
for standard_name, tag_set in self.tag_lookup.items():
tag = tag_set[format_lookup[self.format]]
self.m_tags[tag] = self.new_id3_tag(standard_name, self.tags[standard_name])
for standard_name, tag_set in self.tag_lookup.items():
tag = tag_set[format_lookup[self.format]]
if tag not in self.m_tags:
print(f"{tag} not in self.m_tags")
self.fallback = True
value_list = self.m_tags[tag]
if self.format == "m4a" and standard_name == "track":
# every tag in the MP4 file (from what i can tell) is a list
# this includes the track number tag, which is a tuple of ints in a list.
# because every other format is either a non-list, or a list of non-lists, we need to account for this case
# (a list of lists of non-lists) specially, by turning it into a list of non-lists.
value_list = value_list[0]
if not isinstance(value_list, (list, tuple)):
value_list = [value_list]
# convert the list of strings/ID3 frames/ints/whatevers to sanitised strings
value_list = [re.sub(sanitisation_regex, "_", str(val)) for val in value_list]
self.tags[standard_name] = value_list[0]
self.list_tags[standard_name] = value_list
def new_id3_tag(tag: str, value: str) -> Frame:
if tag == "track":
return TRCK(encoding=3, text=value)
elif tag == "artist":
return TPE1(encoding=3, text=value)
elif tag == "title":
return TIT2(encoding=3, text=value)
elif tag == "album":
return TALB(encoding=3, text=value)
elif tag == "album_artist":
return TPE2(encoding=3, text=value)
raise ValueError(f"Unknown tag type {tag}!")
def get_target_name(self, zeroes: int) -> str:
return f"{self.tags['track'].zfill(zeroes)} {self.tags['title']}.{self.format}"
def has_cover(self) -> bool:
if self.format == "flac":
# needs to be handled separately from ogg, as it doesn't use the vorbis tags for cover art for whatever reason
return len( != 0
if format_lookup[self.format] == "vorbis":
return "metadata_block_picture" in self.m_tags and len(self.m_tags["metadata_block_picture"]) != 0
if format_lookup[self.format] == "id3":
apics: List[APIC] = self.m_tags.getall("APIC")
for apic in apics:
if apic.type == PictureType.COVER_FRONT:
return True
return False
if format_lookup[self.format] == "m4a":
return 'covr' in self.m_tags and len(self.m_tags['covr']) != 0
raise NotImplementedError("Song format not yet implemented.")
def set_cover(self, to_embed: Union[Picture, APIC, MP4Cover]) -> None:
# embed cover art
if self.format == "flac":
elif format_lookup[self.format] == "vorbis":
self.m_tags["metadata_block_picture"] = [b64encode(to_embed.write()).decode("ascii")]
elif format_lookup[self.format] == "id3":
elif format_lookup[self.format] == "m4a":
self.m_tags['covr'] = [to_embed]
def __getitem__(self, item: str) -> str:
return self.tags[item]
Reference in a new issue