diff --git a/bcao.py b/bcao.py index 09cdfe1..de31b22 100755 --- a/bcao.py +++ b/bcao.py @@ -34,6 +34,7 @@ from PIL import Image fully_supported: List[str] = ["ogg", "flac", "mp3", "m4a", "wav"] MutagenFile = Union[MP3, FLAC, OggVorbis, mutagen.FileType] MutagenTags = Union[mutagen.id3.ID3Tags, mutagen.mp4.Tags, mutagen.oggvorbis.OggVCommentDict] +args: argparse.Namespace class SongInfo: tag_lookup: Dict[str, Dict[str, str]] = { @@ -61,6 +62,7 @@ class SongInfo: if fallbacks is None: die("Couldn't determine fallback tags!") return # needed for mypy + # set default values for the tags, in case the file is missing any (or all!) of them self.tags: Dict[str, str] = { "track": fallbacks.group("track"), @@ -210,203 +212,209 @@ def sanitise(in_str: str) -> str: return re.sub(r"[?\\/:|*\"<>]", "_", in_str) return in_str -# noinspection PyTypeChecker -parser = argparse.ArgumentParser(usage='%(prog)s zip [options]', - formatter_class=argparse.RawTextHelpFormatter, - description="Extracts the given zip file downloaded from Bandcamp and organises it.", - epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n" -"If the song is in any other format, %(prog)s will behave as though you passed '-c n', " -"but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt " -"to parse the song's filenames to retrieve the artist, album, title, and track number.") -parser.add_argument('zip', help='The zip file to use.') -parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'], - help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s") -parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/', - help="The directory to organise the music into.\nDefault: %(default)s") -parser.add_argument('-q', '--quiet', dest='quiet', action='store_true', - help='Disable non-error output and assume default artist name.') -parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false', - help="Don't replace NTFS-unsafe characters with underscores. Not recommended.") -parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300, - help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s") -args = parser.parse_args() -# convert args.threshold to bytes -args.threshold *= 1024 +def main(): + # noinspection PyTypeChecker + parser = argparse.ArgumentParser(usage='%(prog)s zip [options]', + formatter_class=argparse.RawTextHelpFormatter, + description="Extracts the given zip file downloaded from Bandcamp and organises it.", + epilog=f"Cover art can only be embedded in files of the following types: {', '.join(fully_supported).upper()}.\n" + "If the song is in any other format, %(prog)s will behave as though you passed '-c n', " + "but will otherwise work normally.\nIf the song files contain no metadata, %(prog)s will attempt " + "to parse the song's filenames to retrieve the artist, album, title, and track number.") + parser.add_argument('zip', help='The zip file to use.') + parser.add_argument('-c', '--add-cover-images', dest='process_cover', default='w', choices=['n', 'a', 'w'], + help="When to embed cover art into songs.\nOptions: [n]ever, [a]lways, [w]hen necessary.\nDefault: %(default)s") + parser.add_argument('-d', '--destination', dest='destination', default='/home/lynne/Music/Music/', + help="The directory to organise the music into.\nDefault: %(default)s") + parser.add_argument('-q', '--quiet', dest='quiet', action='store_true', + help='Disable non-error output and assume default artist name.') + parser.add_argument('-u', '--unsanitised', dest='sanitise', action='store_false', + help="Don't replace NTFS-unsafe characters with underscores. Not recommended.") + parser.add_argument('-t', '--threshold', dest='threshold', nargs=1, default=300, + help="Maximum acceptable file size for cover art, in kilobytes.\nDefault: %(default)s") -if not path.exists(args.zip): - die(f"Couldn't find {args.zip}.", 2) + global args + args = parser.parse_args() + # convert args.threshold to bytes + args.threshold *= 1024 -log("Extracting...") -tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory() -tmp: str = tmp_dir.name -cover: Optional[str] = None -song_names: List[str] = [] + if not path.exists(args.zip): + die(f"Couldn't find {args.zip}.", 2) -with ZipFile(args.zip, 'r') as zip_file: - for file in zip_file.namelist(): - if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file): - # bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3" - # for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg" - # this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't - # be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't - # have the space for it). - song_names.append(file) - zip_file.extract(file, tmp) - elif cover is None and re.match(r"cover\.(jpe?g|png)", file): - cover = file - zip_file.extract(file, tmp) + log("Extracting...") + tmp_dir: tempfile.TemporaryDirectory = tempfile.TemporaryDirectory() + tmp: str = tmp_dir.name + cover: Optional[str] = None + song_names: List[str] = [] -# save the format of the songs (ogg, mp3, etc) -# we'll need this to know what metadata format we should write -song_format: str = path.splitext(song_names[0])[1][1:] -if song_format not in fully_supported: - log(f"Format {song_format} is not fully supported - cover images will not be modified", 1) - args.process_cover = 'n' + with ZipFile(args.zip, 'r') as zip_file: + for file in zip_file.namelist(): + if re.match(r"^(.+ - ){2}\d{2,} .+\.(ogg|flac|alac|aiff|wav|mp3|m4a)$", file): + # bandcamp zips contains songs with names formatted like "Album - Artist - 01 Song.mp3" + # for example, "King Crimson - In the Wake of Poseidon - 02 Pictures of a City.ogg" + # this regex should match only on those, and cut out (hopefully) all of the bonus material stuff, which shouldn't + # be added to the music folder (since i sync that to my phone, and "making of" videos are cool, but i don't + # have the space for it). + song_names.append(file) + zip_file.extract(file, tmp) + elif cover is None and re.match(r"cover\.(jpe?g|png)", file): + cover = file + zip_file.extract(file, tmp) -if cover is None: - die("Unable to find cover image!") - # return # needed for mypy + # save the format of the songs (ogg, mp3, etc) + # we'll need this to know what metadata format we should write + song_format: str = path.splitext(song_names[0])[1][1:] + if song_format not in fully_supported: + log(f"Format {song_format} is not fully supported - cover images will not be modified", 1) + args.process_cover = 'n' -if args.process_cover != 'n': - log("Resizing album art to embed in songs...") - with Image.open(str(Path(tmp, cover))) as image: - temp_cover: Path = Path(tmp, "cover-lq.jpg") + if cover is None: + die("Unable to find cover image!") + return # needed for mypy - if image.mode in ["RGBA", "P"]: - # remove alpha channel - image = image.convert("RGB") + if args.process_cover != 'n': + log("Resizing album art to embed in songs...") + with Image.open(str(Path(tmp, cover))) as image: + temp_cover: Path = Path(tmp, "cover-lq.jpg") - image.save(temp_cover, quality=85, optimize=True) - image_smol = image + if image.mode in ["RGBA", "P"]: + # remove alpha channel + image = image.convert("RGB") - while path.getsize(temp_cover) > args.threshold: - # keep shrinking the image by 90% until it's less than {args.threshold} kilobytes - ratio = 0.9 + image.save(temp_cover, quality=85, optimize=True) + image_smol = image - if path.getsize(temp_cover) > args.threshold * 2: - # if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead - ratio = 0.8 + while path.getsize(temp_cover) > args.threshold: + # keep shrinking the image by 90% until it's less than {args.threshold} kilobytes + ratio = 0.9 - image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size]) - image_smol.save(temp_cover, quality=85, optimize=True) - if image_smol.size[0] == 10: - # something very bad has happened here - die("Failed to resize image") + if path.getsize(temp_cover) > args.threshold * 2: + # if the file size of the cover is more than double the threshold, resize the cover image size by 80% instead + ratio = 0.8 - # read the image file to get its raw data - with open(temp_cover, 'r+b') as cover_file: - data = cover_file.read() + image_smol = image_smol.resize([round(n * ratio) for n in image_smol.size]) + image_smol.save(temp_cover, quality=85, optimize=True) + if image_smol.size[0] == 10: + # something very bad has happened here + die("Failed to resize image") - # it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art - # for it in mutagen. - # vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to - # make them work more like mutagen. - # cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc - # id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2"). - # cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying - # that you have to specify the file encoding, and how you need both a type and a desc. - # m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty - # much exclusively in the realm of apple stuff. - # cover-annoy-o-meter: all you need is the file data and the format type. + # read the image file to get its raw data + with open(temp_cover, 'r+b') as cover_file: + data = cover_file.read() - if song_format in ["ogg", "flac"]: - # i hate this - with Image.open(io.BytesIO(data)) as image: - embed_cover = Picture() - embed_cover.data = data - embed_cover.type = PictureType.COVER_FRONT - embed_cover.mime = "image/jpeg" - embed_cover.width = image.size[0] - embed_cover.height = image.size[1] - embed_cover.depth = image.bits + # it's really strange that the more annoying the file's metadata is, the *less* annoying it is to create cover art + # for it in mutagen. + # vorbis: open standard, so easy to use that mutagen supplies a bunch of "easy" wrappers around other formats to + # make them work more like mutagen. + # cover-annoy-o-meter: high. mutagen requires you to specify the width, height, colour depth, etc etc + # id3: well documented, but rather cryptic (which is more understandable, "album_artist" or "TPE2"). + # cover-annoy-o-meter: not bad at all - at least you get a constructor this time - although it is kinda annoying + # that you have to specify the file encoding, and how you need both a type and a desc. + # m4a: scarce documentation, closed format, half reverse engineered from whatever itunes is doing, exists pretty + # much exclusively in the realm of apple stuff. + # cover-annoy-o-meter: all you need is the file data and the format type. - elif song_format == "mp3": - # apparently APIC files get compressed on save if they are "large": - # https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC - # i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how - # i can disable it, which kinda sucks... - # if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to - # below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above - # 200 when saving MP3 files. - # the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags: - # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265 - # however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC: - # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510 - # given that this is the only reference to compression that i could find in the source code, and it says that - # ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird. - # it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain - # any method for JPEG compression, so i'm 99% sure the files won't be mangled. + if song_format in ["ogg", "flac"]: + # i hate this + with Image.open(io.BytesIO(data)) as image: + embed_cover = Picture() + embed_cover.data = data + embed_cover.type = PictureType.COVER_FRONT + embed_cover.mime = "image/jpeg" + embed_cover.width = image.size[0] + embed_cover.height = image.size[1] + embed_cover.depth = image.bits - embed_cover = APIC( - encoding=3, # utf-8 - mime="image/jpeg", - type=PictureType.COVER_FRONT, - desc='cover', - data=data - ) + elif song_format == "mp3": + # apparently APIC files get compressed on save if they are "large": + # https://mutagen.readthedocs.io/en/latest/api/id3_frames.html#mutagen.id3.APIC + # i don't know what that means (lossless text compression? automatic JPEG conversion?) and i don't know if or how + # i can disable it, which kinda sucks... + # if, for example, mutagen's threshold for "large" is 200KiB, then any file over that size would be reduced to + # below it, either by resizing or JPEG quality reduction or whatever, making the -t flag useless for values above + # 200 when saving MP3 files. + # the most i can tell is that mutagen uses zlib compression in some way or another for reading ID3 tags: + # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_frames.py#L265 + # however, it seems not to use zlib when *writing* tags, citing itunes incompatibility, in particular with APIC: + # https://github.com/quodlibet/mutagen/blob/release-1.45.1/mutagen/id3/_tags.py#L510 + # given that this is the only reference to compression that i could find in the source code, and it says that + # ID3v2 compression was disabled for itunes compatibility, i'm going to assume/hope it doesn't do anything weird. + # it's worth noting that mutagen has no dependencies outside of python's stdlib, which (currently) doesn't contain + # any method for JPEG compression, so i'm 99% sure the files won't be mangled. - elif song_format == "m4a": - embed_cover = MP4Cover( - data=data, - imageformat=MP4Cover.FORMAT_JPEG - ) + embed_cover = APIC( + encoding=3, # utf-8 + mime="image/jpeg", + type=PictureType.COVER_FRONT, + desc='cover', + data=data + ) -artists: List[str] = [] -album: Optional[str] = None -songs: Dict[str, str] = {} -zeroes = min(len(song_names), 2) -first_loop: bool = True + elif song_format == "m4a": + embed_cover = MP4Cover( + data=data, + imageformat=MP4Cover.FORMAT_JPEG + ) -for song_name in song_names: - song = SongInfo(Path(tmp, song_name)) - if first_loop: - # the first item in the artists list should be the album artist - artists.append(song["album_artist"]) - album = song["album"] - first_loop = False + artists: List[str] = [] + album: Optional[str] = None + songs: Dict[str, str] = {} + zeroes = min(len(song_names), 2) + first_loop: bool = True - # add the song's artist(s) to the list - map(artists.append, song.list_tags["artist"]) - songs[song_name] = song.get_target_name(zeroes) + for song_name in song_names: + song = SongInfo(Path(tmp, song_name)) + if first_loop: + # the first item in the artists list should be the album artist + artists.append(song["album_artist"]) + album = song["album"] + first_loop = False - if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False): - song.set_cover(embed_cover) + # add the song's artist(s) to the list + map(artists.append, song.list_tags["artist"]) + songs[song_name] = song.get_target_name(zeroes) -# remove duplicate artists -artists = list(dict.fromkeys(artists)) + if args.process_cover == 'a' or (args.process_cover == 'w' and song.has_cover() is False): + song.set_cover(embed_cover) -if len(artists) > 1 and "Various Artists" not in artists: - artists.append("Various Artists") + # remove duplicate artists + artists = list(dict.fromkeys(artists)) -artist: Optional[str] = None -while artist is None: - log("Artist directory:") - for i, artist_name in enumerate(artists): - log(f"{i+1}) {artist_name}") - log(f"{len(artists) + 1}) Custom...") + if len(artists) > 1 and "Various Artists" not in artists: + artists.append("Various Artists") - user_choice: str = "1" if args.quiet else input("> ") - if user_choice.isdecimal(): - choice: int = int(user_choice) - if choice == len(artists) + 1: - log("Enter the name to use:") - artist = input("> ") + artist: Optional[str] = None + while artist is None: + log("Artist directory:") + for i, artist_name in enumerate(artists): + log(f"{i+1}) {artist_name}") + log(f"{len(artists) + 1}) Custom...") + + user_choice: str = "1" if args.quiet else input("> ") + if user_choice.isdecimal(): + choice: int = int(user_choice) + if choice == len(artists) + 1: + log("Enter the name to use:") + artist = input("> ") + else: + try: + artist = artists[choice - 1] + except KeyError: + log(f"Please choose a number between 1 and {len(artists) + 1}.") else: - try: - artist = artists[choice - 1] - except KeyError: - log(f"Please choose a number between 1 and {len(artists) + 1}.") - else: - log(f"Please choose a number between 1 and {len(artists) + 1}") + log(f"Please choose a number between 1 and {len(artists) + 1}") -destination: Path = Path(args.destination, artist, album) -log(f"Moving files to \"{destination}\"...") -os.makedirs(destination, exist_ok=True) + destination: Path = Path(args.destination, artist, album) + log(f"Moving files to \"{destination}\"...") + os.makedirs(destination, exist_ok=True) -for source_name, dest_name in songs.items(): - shutil.move(str(Path(tmp, source_name)), str(Path(destination, dest_name))) -shutil.move(str(Path(tmp, cover)), str(Path(destination, cover))) + for source_name, dest_name in songs.items(): + shutil.move(str(Path(tmp, source_name)), str(Path(destination, dest_name))) + shutil.move(str(Path(tmp, cover)), str(Path(destination, cover))) -tmp_dir.cleanup() -log("Done!") + tmp_dir.cleanup() + log("Done!") + +if __name__ == "__main__": + main()