check against tag format type instead of file extension
sorta like, "if tag_format == 'id3'" rather than "if song_format == ['mp3', 'wav', 'aiff']"
2020-10-28 22:32:20 +10:00
remove unneeded file extension 2020-10-28 01:57:49 +10:00
turns out i didn't need to do anything to add alac support - they work the same as aac m4a files do. although i did find and fix a bug in the m4a handling so that's good at least 0uo
added project files, aiff support 2020-10-26 23:55:06 +10:00
my py dot ini 2020-10-26 23:39:36 +10:00
wav support! 2020-10-26 23:34:04 +10:00
mypy strict support! 2020-10-26 23:05:18 +10:00
code cleanup 2020-10-26 22:44:33 +10:00
put everything in main(), zero mypy issues 2020-10-26 22:41:01 +10:00
mypy integration 2020-10-26 22:35:27 +10:00
fairly major restructuring that should make future format support a lot easier, support for songs with partially or fully incomplete metadata 2020-10-17 23:54:50 +10:00
m4a support 2020-10-17 21:02:52 +10:00
mp3 support! more helpful interface! better code! yahoo!! 2020-10-17 19:42:38 +10:00
flac support and an argument for choosing when to embed cover art 2020-10-17 17:26:25 +10:00
cleaner code, type annotations, and it even runs (slightly) faster! =u= 2020-10-17 16:23:55 +10:00
support for png covers with transparency, other fixes 2020-10-17 13:34:56 +10:00
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="bcao (ceres)" type="PythonConfigurationType" factoryName="Python">
<module name="bcao" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<env name="PYTHONUNBUFFERED" value="1" />
<option name="SDK_HOME" value="$PROJECT_DIR$/venv/bin/python" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/" />
<option name="PARAMETERS" value="&quot;A Cerulean State - As if I remembered; -d &quot;/tmp/out/&quot;" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />

<component name="ProjectRunConfigurationManager">
<configuration default="false" name="bcao (io)" type="PythonConfigurationType" factoryName="Python">
<module name="bcao" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<env name="PYTHONUNBUFFERED" value="1" />
<option name="SDK_HOME" value="$PROJECT_DIR$/venv/bin/python" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/" />
<option name="PARAMETERS" value="&quot;Braxton Burks - Time &amp;; -d &quot;$USER_HOME$/Documents&quot;" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />

<component name="ProjectRunConfigurationManager">
<configuration default="false" name="mypy" type="PythonConfigurationType" factoryName="Python">
<module name="bcao" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<env name="PYTHONUNBUFFERED" value="1" />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/venv/bin/mypy" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />

# output: it organises it, adds cover art, puts it in the right place...
import argparse
import base64
import io
import os
import re
import subprocess
import sys
import tempfile
import shutil
from zipfile import ZipFile, BadZipFile
from os import path
from base64 import b64encode
from zipfile import ZipFile
from pathlib import Path
from typing import Optional, Union, List, Dict
# pycharm tells me some of these classes shouldn't be imported because they're not declared in __all__.
# however, the mutagen docs show example code where someone creates a mutagen.flac.Picture by referring to it as
# Picture(), implying that they had imported mutagen.flac.Picture, and therefore i'm right and the computer is WRONG
import mutagen
from mutagen.flac import Picture
from mutagen import id3
# noinspection PyProtectedMember
from mutagen.flac import Picture, FLAC
from mutagen.oggvorbis import OggVorbis
from mutagen.mp3 import MP3
from mutagen.mp4 import MP4, MP4Cover
# noinspection PyProtectedMember
from mutagen.id3 import APIC, PictureType, Frame, TRCK, TPE1, TIT2, TALB, TPE2
from PIL import Image
def log(message: str, importance: int = 0):
fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav", "aiff"]
MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType]
MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict]
args: argparse.Namespace
tmp_dir: tempfile.TemporaryDirectory # type: ignore
format_lookup: Dict[str, str] = {
"mp3": "id3",
"m4a": "m4a",
"ogg": "vorbis",
"flac": "vorbis",
"wav": "id3",
"aiff": "id3"
class SongInfo:
tag_lookup: Dict[str, Dict[str, str]] = {
"track": {"id3": "TRCK", "m4a": "trkn", "vorbis": "tracknumber"},
"artist": {"id3": "TPE1", "m4a": "©ART", "vorbis": "artist"},
"title": {"id3": "TIT2", "m4a": "©nam", "vorbis": "title"},
"album": {"id3": "TALB", "m4a": "©alb", "vorbis": "album"},
"album_artist": {"id3": "TPE2", "m4a": "aART", "vorbis": "albumartist"}
def __init__(self, file_name: Path):
self.m_file: MutagenFile = mutagen.File(file_name)
self.m_tags: MutagenTags = self.m_file.tags
self.file_name = str(
self.format = path.splitext(file_name)[1][1:]
self.fallback = False
if self.format not in format_lookup:
raise ValueError(f"Unsupported file type: {self.format}")
fallbacks = re.match(
r"^(?P<artist>.+) - (?P<album>.+) - (?P<track>\d{2,}) (?P<title>.+)\.(?:ogg|flac|aiff|wav|mp3|m4a)$",
if fallbacks is None:
die("Couldn't determine fallback tags!")
return # needed for mypy
# set default values for the tags, in case the file is missing any (or all!) of them
self.tags: Dict[str, str] = {
"track": str(int("track"))), # convert to int and str again to turn e.g. "01" into "1"
# set list_tags to the default tags in list form
# i.e. for every tag, set list_tags[x] = [tags[x]]
self.list_tags: Dict[str, List[str]] = dict((x[0], [x[1]]) for x in self.tags.items())
if self.m_tags is None:
# file has no tags
# generate empty tags
self.m_tags = self.m_file.tags
self.fallback = True
# write fallback tags to file
for standard_name, tag_set in self.tag_lookup.items():
tag = tag_set[format_lookup[self.format]]
self.m_tags[tag] = self.new_id3_tag(standard_name, self.tags[standard_name])
for standard_name, tag_set in self.tag_lookup.items():
tag = tag_set[format_lookup[self.format]]
if tag not in self.m_tags:
print(f"{tag} not in self.m_tags")
self.fallback = True
value_list = self.m_tags[tag]
if self.format == "m4a" and standard_name == "track":
# every tag in the MP4 file (from what i can tell) is a list
# this includes the track number tag, which is a tuple of ints in a list.
# because every other format is either a non-list, or a list of non-lists, we need to account for this case
# (a list of lists of non-lists) specially, by turning it into a list of non-lists.
value_list = value_list[0]
if not isinstance(value_list, (list, tuple)):
value_list = [value_list]
# convert the list of strings/ID3 frames/ints/whatevers to sanitised strings
value_list = [sanitise(str(val)) for val in value_list]
self.tags[standard_name] = value_list[0]
self.list_tags[standard_name] = value_list
def new_id3_tag(tag: str, value: str) -> Frame:
if tag == "track":
return TRCK(encoding=3, text=value)
elif tag == "artist":
return TPE1(encoding=3, text=value)
elif tag == "title":
return TIT2(encoding=3, text=value)
elif tag == "album":
return TALB(encoding=3, text=value)
elif tag == "album_artist":
return TPE2(encoding=3, text=value)
raise ValueError(f"Unknown tag type {tag}!")
def get_target_name(self, zeroes: int) -> str:
return f"{self.tags['track'].zfill(zeroes)} {self.tags['title']}.{self.format}"
def has_cover(self) -> bool:
if self.format == "flac":
# needs to be handled separately from ogg, as it doesn't use the vorbis tags for cover art for whatever reason
return len( != 0
if format_lookup[self.format] == "vorbis":
return "metadata_block_picture" in self.m_tags and len(self.m_tags["metadata_block_picture"]) != 0
if format_lookup[self.format] == "id3":
apics: List[APIC] = self.m_tags.getall("APIC")
for apic in apics:
if apic.type == PictureType.COVER_FRONT:
return True
return False
if format_lookup[self.format] == "m4a":
return 'covr' in self.m_tags and len(self.m_tags['covr']) != 0
raise NotImplementedError("Song format not yet implemented.")
def set_cover(self, to_embed: Union[Picture, APIC, MP4Cover]) -> None:
# embed cover art
if self.format == "flac":
elif format_lookup[self.format] == "vorbis":
self.m_tags["metadata_block_picture"] = [b64encode(to_embed.write()).decode("ascii")]
elif format_lookup[self.format] == "id3":
elif format_lookup[self.format] == "m4a":
self.m_tags['covr'] = [to_embed]
def __getitem__(self, item: str) -> str:
return self.tags[item]
def log(message: str, importance: int = 0) -> None:
if not args.quiet or importance > 0:
def die(message: str, code: int = 1):
def die(message: str, code: int = 1) -> None:
if tmp_dir is not None:
def get_tag(m: mutagen.FileType, tag: str):
if tag == "title":
return sanitise(m['title'][0])
elif tag == "track":
return int(m['tracknumber'][0])
elif tag == "album":
return sanitise(m['album'][0])
# may as well try
return sanitise(m[tag])
def sanitise(input: str):
def sanitise(in_str: str) -> str:
if args.sanitise:
return re.sub(r"[?\\/:|*\"<>]", "_", input)
return input
parser = argparse.ArgumentParser(description="Extracts the given zip file downloaded from Bandcamp and organises it.")
parser.add_argument('zip', help='The zip file to use')
parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/',
help="The directory to organise the music into. Default: /home/lynne/Music/Music/")
parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Disable non-error output and assume default artist name.')
parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false',
help="Don't replace NTFS-unsafe characters with underscores. Not recommended.")
parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300,
help="Maximum acceptable cover art file size in kilobytes. Default: 300")
args = parser.parse_args()
if not os.path.exists(
die(f"Couldn't find {}.", 2)
tmp = tempfile.TemporaryDirectory()
cover = None
song_names = []
with ZipFile(, 'r') as zip_file:
for file in zip_file.namelist():
if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|opus|m4a|aac|oga)$", file):
# bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3"
# for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg"
# this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't
# be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't
# have the space for it).
zip_file.extract(file, tmp)
elif cover is None and re.match(r"cover\.(jpe?g|png)", file):
cover = file
zip_file.extract(file, tmp)
# save the format of the songs (ogg, mp3, etc)
# we'll need this to know what metadata format we should write
song_format = os.path.splitext(song_names[0])[1][1:]
log("Resizing album art to embed in songs...")
with, cover)) as image:
temp_cover = os.path.join(tmp, "cover-lq.jpg"), quality=85, optimize=True)
image_smol = image
# keep shrinking the image by 90% until it's less than {args.threshold} kilobytes
while os.path.getsize(temp_cover) / 1024 > args.threshold:
image_smol = image_smol.resize((round(image_smol.size[0] * 0.9), round(image_smol.size[1] * 0.9))), quality=85, optimize=True)
if image_smol.size[0] == 10:
die("Failed to resize image")
return re.sub(r"[?\\/:|*\"<>]", "_", in_str)
return in_str
# read the image file to get the file's raw data
with open(temp_cover, 'r+b') as cover_file:
data =
def main() -> None:
global args, tmp_dir
with as image:
if song_format == "ogg":
# i hate this
cover = Picture() = data
cover.type = mutagen.id3.PictureType.COVER_FRONT
cover.mime = "image/jpeg"
cover.width = image.size[0]
cover.height = image.size[1]
cover.depth = image.bits
cover = base64.b64encode(cover.write()).decode("ascii")
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(usage='%(prog)s zip [options]',
description="Extracts the given zip file downloaded from Bandcamp and organises it.",
epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n"
"If the song is in any other format, %(prog)s will behave as though you passed '-c n', "
"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt "
"to parse the song's filenames to retrieve the artist, album, title, and track number.")
parser.add_argument('zip', help='The zip file to use.')
parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'],
help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s")
parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/',
help="The directory to organise the music into.\nDefault: %(default)s")
parser.add_argument('-q', '--quiet', dest='quiet', action='store_true',
help='Disable non-error output and assume default artist name.')
parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false',
help="Don't replace NTFS-unsafe characters with underscores. Not recommended.")
parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300,
help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s")
args = parser.parse_args()
# convert args.threshold to bytes
args.threshold *= 1024
if not path.exists(
die(f"Couldn't find {}.", 2)
tmp_dir = tempfile.TemporaryDirectory()
tmp: str =
cover: Optional[str] = None
song_names: List[str] = []
with ZipFile(, 'r') as zip_file:
for file in zip_file.namelist():
if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|aiff|wav|mp3|m4a)$", file):
# bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3"
# for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg"
# this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't
# be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't
# have the space for it).
zip_file.extract(file, tmp)
elif cover is None and re.match(r"cover\.(jpe?g|png)", file):
cover = file
zip_file.extract(file, tmp)
# save the format of the songs (ogg, mp3, etc)
# we'll need this to know what metadata format we should write
song_format: str = path.splitext(song_names[0])[1][1:]
if song_format not in fully_supported:
log(f"Format {song_format} is not fully supported - cover images will not be modified", 1)
args.process_cover = 'n'
artists = []
album = None
songs = {}
zeroes = min(len(song_names), 2)
for song in song_names:
ext = os.path.splitext(song)[1:]
m = mutagen.File(os.path.join(tmp, song))
# add the song's artist to the list, if it hasn't been seen yet
[artists.append(sanitise(artist)) for artist in m['artist'] if artist not in artists]
songs[song] = f"{str(get_tag(m, 'track')).zfill(zeroes)} {get_tag(m, 'title')}.{song_format}"
album = get_tag(m, "album")
# embed cover art
if song_format == "ogg":
m["metadata_block_picture"] = [cover]
if cover is None:
die("Unable to find cover image!")
return # needed for mypy
if len(artists) > 1 and "Various Artists" not in artists:
artists.append("Various Artists")
if args.process_cover != 'n':
log("Resizing album art to embed in songs...")
with, cover))) as image:
temp_cover: Path = Path(tmp, "cover-lq.jpg")
artist = None
while artist is None:
log("Artist directory:")
for i in range(len(artists)):
log(f"{i+1}) {artists[i]}")
log(f"{len(artists) + 1}) Custom...")
choice = "1" if args.quiet else input("> ")
if choice.isdecimal():
if int(choice) == len(artists) + 1:
log("Enter the name to use.")
if image.mode in ["RGBA", "P"]:
# remove alpha channel
image = image.convert("RGB"), quality=85, optimize=True)
image_smol = image
while path.getsize(temp_cover) > args.threshold:
# keep shrinking the image by 90% until it's less than {args.threshold} kilobytes
ratio = 0.9
if path.getsize(temp_cover) > args.threshold * 2:
# if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead
ratio = 0.8
image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size]), quality=85, optimize=True)
if image_smol.size[0] == 10:
# something very bad has happened here
die("Failed to resize image")
# read the image file to get its raw data
with open(temp_cover, 'r+b') as cover_file:
data =
# it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art
# for it in mutagen.
# vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other tag formats to
# make them work more like vorbis comments.
# cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc
# id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2"?).
# cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying
# that you have to specify the file encoding, and how you need both a type and a desc.
# m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty
# much exclusively in the realm of apple stuff.
# cover-annoy-o-meter: all you need is the file data and the format type.
if format_lookup[song_format] == "vorbis":
# i hate this
with as image:
embed_cover = Picture() = data
embed_cover.type = PictureType.COVER_FRONT
embed_cover.mime = "image/jpeg"
embed_cover.width = image.size[0]
embed_cover.height = image.size[1]
embed_cover.depth = image.bits
elif format_lookup[song_format] == "id3":
# apparently APIC files get compressed on save if they are "large":
# i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how
# i can disable it, which kinda sucks...
# if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to
# below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above
# 200 when saving MP3 files.
# the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags:
# however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC:
# given that this is the only reference to compression that i could find in the source code, and it says that
# ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird.
# it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain
# any method for JPEG compression, so i'm 99% sure the files won't be mangled.
embed_cover = APIC(
encoding=3, # utf-8
elif format_lookup[song_format] == "m4a":
embed_cover = MP4Cover(
artists: List[str] = []
album: str = "Unknown album" # it SHOULD be impossible for this value to ever appear
songs: Dict[str, str] = {}
zeroes = min(len(song_names), 2)
first_loop: bool = True
for song_name in song_names:
song = SongInfo(Path(tmp, song_name))
if first_loop:
# the first item in the artists list should be the album artist
album = song["album"]
first_loop = False
# add the song's artist(s) to the list
map(artists.append, song.list_tags["artist"])
songs[song_name] = song.get_target_name(zeroes)
if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False):
# remove duplicate artists
artists = list(dict.fromkeys(artists))
if len(artists) > 1 and "Various Artists" not in artists:
artists.append("Various Artists")
artist: Optional[str] = None
while artist is None:
log("Artist directory:")
for i, artist_name in enumerate(artists):
log(f"{i+1}) {artist_name}")
log(f"{len(artists) + 1}) Custom...")
user_choice: str = "1" if args.quiet else input("> ")
if user_choice.isdecimal():
choice: int = int(user_choice)
if choice == len(artists) + 1:
log("Enter the name to use:")
artist = input("> ")
artist = artists[choice - 1]
except KeyError:
log(f"Please choose a number between 1 and {len(artists) + 1}.")
artist = artists[int(choice) - 1]
except KeyError:
log(f"Please choose a number between 1 and {len(artists) + 1}.")
log(f"Please choose a number between 1 and {len(artists) + 1}")
log(f"Please choose a number between 1 and {len(artists) + 1}")
destination = os.path.join(args.destination, artist, album)
log(f"Moving files to {destination}...")
os.makedirs(destination, exist_ok = True)
for source_name, dest_name in songs.items():
shutil.move(os.path.join(tmp, source_name), os.path.join(destination, dest_name))
shutil.move(os.path.join(tmp, "cover.jpg"), os.path.join(destination, "cover.jpg"))
destination: Path = Path(args.destination, artist, album)
log(f"Moving files to \"{destination}\"...")
os.makedirs(destination, exist_ok=True)
for source_name, dest_name in songs.items():
shutil.move(str(Path(tmp, source_name)), str(Path(destination, dest_name)))
shutil.move(str(Path(tmp, cover)), str(Path(destination, cover)))
if __name__ == "__main__":

strict = True
ignore_missing_imports = True
ignore_missing_imports = True