#!/usr/bin/env python3 # BCAO - BandCamp Automatic Organiser # copyright 2018-2019 @LynnearSoftware@fedi.lynnesbian.space # Licensed under the GPLv3: https://www.gnu.org/licenses/gpl-3.0.html#content # input: a .zip from bandcamp # output: it organises it, adds cover art, puts it in the right place... import argparse import os import re import sys import tempfile import shutil from os import path from base64 import b64encode from zipfile import ZipFile from pathlib import Path from typing import Optional, Union, List, Dict import mutagen from mutagen.flac import Picture, FLAC from mutagen.oggvorbis import OggVorbis from mutagen.mp3 import MP3 from mutagen.id3 import APIC, PictureType from PIL import Image vorbis_to_id3: Dict[str, str] = { "track": "TRCK", "artist": "TPE1", "title": "TIT2", "album": "TALB", "album_artist": "TPE2" } fully_supported: List[str] = ["ogg", "flac", "mp3"] MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType] def log(message: str, importance: int = 0): if not args.quiet or importance > 0: print(message) def die(message: str, code: int = 1): print(message) sys.exit(code) def get_tag(mut_song: MutagenFile, tag: str, allow_list: bool = False, allow_sanitising: bool = True) -> str: if isinstance(mut_song, MP3): tag = vorbis_to_id3[tag] tag_list = [str(x) for x in mut_song.tags.getall(tag)] else: if tag == "track": tag = "tracknumber" tag = tag.replace("_", "") tag_list = mut_song[tag] if isinstance(mut_song[tag], list) else [mut_song[tag]] # sanitise everything if allow_sanitising: tag_list = [sanitise(tag) for tag in tag_list] if allow_list: return tag_list return tag_list[0] def has_cover(mut_song: MutagenFile): if isinstance(mut_song, OggVorbis): return "metadata_block_picture" in mut_song and len(mut_song["metadata_block_picture"]) != 0 if isinstance(mut_song, FLAC): return len(mut_song.pictures) != 0 if isinstance(mut_song, MP3): apics: List[APIC] = mut_song.tags.getall("APIC") for apic in apics: if apic.type == PictureType.COVER_FRONT: return True return False return False def sanitise(in_str: str) -> str: if args.sanitise: return re.sub(r"[?\\/:|*\"<>]", "_", in_str) return in_str # noinspection PyTypeChecker parser = argparse.ArgumentParser(usage='%(prog)s zip [options]', formatter_class=argparse.RawTextHelpFormatter, description="Extracts the given zip file downloaded from Bandcamp and organises it.", epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\nIf " "the music is in any other format, %(prog)s will behave as though you passed the flag '-c n'.") parser.add_argument('zip', help='The zip file to use.') parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'], help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s") parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/', help="The directory to organise the music into.\nDefault: %(default)s") parser.add_argument('-q', '--quiet', dest='quiet', action='store_true', help='Disable non-error output and assume default artist name.') parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false', help="Don't replace NTFS-unsafe characters with underscores. Not recommended.") parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300, help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s") args = parser.parse_args() # convert args.threshold to bytes args.threshold *= 1024 if not path.exists(args.zip): die(f"Couldn't find {args.zip}.", 2) log("Extracting...") tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory() tmp: str = tmp_dir.name cover: Optional[str] = None song_names: List[str] = [] with ZipFile(args.zip, 'r') as zip_file: for file in zip_file.namelist(): if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|opus|m4a|aac)$", file): # bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3" # for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg" # this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't # be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't # have the space for it). song_names.append(file) zip_file.extract(file, tmp) elif cover is None and re.match(r"cover\.(jpe?g|png)", file): cover = file zip_file.extract(file, tmp) # save the format of the songs (ogg, mp3, etc) # we'll need this to know what metadata format we should write song_format: str = path.splitext(song_names[0])[1][1:] if song_format not in fully_supported: log(f"Format {song_format} is not fully supported - cover images will not be modified", 1) args.process_cover = 'n' if args.process_cover != 'n': log("Resizing album art to embed in songs...") with Image.open(str(Path(tmp, cover))) as image: temp_cover: Path = Path(tmp, "cover-lq.jpg") if image.mode in ["RGBA", "P"]: # remove alpha channel image = image.convert("RGB") image.save(temp_cover, quality=85, optimize=True) image_smol = image while path.getsize(temp_cover) > args.threshold: # keep shrinking the image by 90% until it's less than {args.threshold} kilobytes ratio = 0.9 if path.getsize(temp_cover) > args.threshold * 2: # if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead ratio = 0.8 image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size]) image_smol.save(temp_cover, quality=85, optimize=True) if image_smol.size[0] == 10: # something very bad has happened here die("Failed to resize image") # read the image file to get its raw data with open(temp_cover, 'r+b') as cover_file: data = cover_file.read() with Image.open(temp_cover) as image: if song_format in ["ogg", "flac"]: # i hate this embed_cover = Picture() embed_cover.data = data embed_cover.type = PictureType.COVER_FRONT embed_cover.mime = "image/jpeg" embed_cover.width = image.size[0] embed_cover.height = image.size[1] embed_cover.depth = image.bits elif song_format == "mp3": # apparently APIC files get compressed on save if they are "large": # https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC # i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how # i can disable it, which kinda sucks... # if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to # below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above # 200 when saving MP3 files. # the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags: # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265 # however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC: # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510 # given that this is the only reference to compression that i could find in the source code, and it says that # ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird. # it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain # any method for JPEG compression, so i'm 99% sure the files won't be mangled. embed_cover = APIC( encoding=3, # utf-8 mime="image/jpeg", type=PictureType.COVER_FRONT, desc='cover', data=data ) artists: List[str] = [] album: Optional[str] = None songs: Dict[str, str] = {} zeroes = min(len(song_names), 2) first_loop: bool = True for song in song_names: m: MutagenFile = mutagen.File(Path(tmp, song)) if first_loop: # the first item in the artists list should be the album artist artists.append(get_tag(m, "album_artist")) album = get_tag(m, "album") first_loop = False # add the song's artist(s) to the list map(artists.append, get_tag(m, "artist", allow_list=True)) songs[song] = f"{str(get_tag(m, 'track')).zfill(zeroes)} {get_tag(m, 'title')}.{song_format}" if args.process_cover == 'a' or (args.process_cover == 'w' and has_cover(m) is False): log("Embedding cover art...") # embed cover art if song_format == "ogg": m["metadata_block_picture"] = [b64encode(embed_cover.write()).decode("ascii")] elif song_format == "flac": m.clear_pictures() m.add_picture(embed_cover) elif song_format == "mp3": m.tags.add(embed_cover) m.save() # remove duplicate artists artists = list(dict.fromkeys(artists)) if len(artists) > 1 and "Various Artists" not in artists: artists.append("Various Artists") artist: Optional[str] = None while artist is None: log("Artist directory:") for i, artist_name in enumerate(artists): log(f"{i+1}) {artist_name}") log(f"{len(artists) + 1}) Custom...") choice = "1" if args.quiet else input("> ") if choice.isdecimal(): choice = int(choice) if choice == len(artists) + 1: log("Enter the name to use:") artist = input("> ") else: try: artist = artists[choice - 1] except KeyError: log(f"Please choose a number between 1 and {len(artists) + 1}.") else: log(f"Please choose a number between 1 and {len(artists) + 1}") destination: Path = Path(args.destination, artist, album) log(f"Moving files to \"{destination}\"...") os.makedirs(destination, exist_ok=True) for source_name, dest_name in songs.items(): shutil.move(Path(tmp, source_name), Path(destination, dest_name)) shutil.move(Path(tmp, cover), Path(destination, cover)) tmp_dir.cleanup() log("Done!")