diff --git a/README.md b/README.md index dc49b62..05bc606 100644 --- a/README.md +++ b/README.md @@ -11,15 +11,22 @@ and then re-encode the content to a compressed format. At the time I'm writing this README, it will: -* ~~Rip audio CDs, look them up in cddb, encode them to VBR MP3, then tag them.~~ A rewrite broke this; I plan to fix it soon. +* Rip audio CDs, look them up in cddb, encode them to VBR MP3, then tag them. + * It also writes a shell script you can modify to quickly change the tags, since this is a pretty common thing to want to do. * Rip video DVDs, transcode them to mkv ## Requirements -* HandBrakeCLI -* cdparanoia -* cd-discid -* +The requirements are fairly light: a few CD tools, cdparanoia, HandBrakeCLI, and some +DVD libraries. + +Most notably, you do *not* need a relational database (SQLite, Postgres, MySQL). +You just need a file system. + +For a complete list of requirements, +look at the [Dockerfile](Dockerfile) +to see what Debian packages it installs. + ## How To Run This @@ -34,9 +41,8 @@ Mine is `/srv/ext/incoming`. -v /srv/ext/incoming:/incoming \ registry.gitlab.com/dartcatcher/media-sucker/media-sucker -I can't get it to work with docker swarm. -Presumably some magic is happening with `--device`. -It probably has something to do with selinux. +I can't get it to work with docker swarm, +which doesn't support `--device`. Stick a video DVD or audio CD in, and the drive should spin up for a while, @@ -46,9 +52,14 @@ or a new directory of `.mp3` files (for audio). You can watch what it's doing at http://localhost:8080/ + ## A note on filenames and tags This program does the absolute minimum to try and tag your media properly. +Partly because I'm a lazy programmer, +but mostly because the computer can only guess at things that you, +the operator, +can just read off the box. For DVDs, that means reading the "title" stored on the DVD, which I've seen vary from very helpful (eg. "Barbie A Fashion Fairytale") @@ -62,13 +73,10 @@ so CDDB takes the length of every track in seconds and tries to match that against something a user has uploaded in the past. This is wrong a whole lot of the time. -If CDDB can't find a match for an audio CD, -this program will append the datestamp of the rip to the album name, -in the hopes that you can remember about what time you put each CD in the drive. -So for stuff like multi-CD audiobooks, that's pretty helpful. - But the end result in almost every case is that you're going to have to -manually edit the metadata. +rename the movie file, or re-tag the audio files. +This is why you get a `tag.sh` file with every audio CD rip. + ## Answers @@ -76,35 +84,23 @@ I'm skipping the part where I make up questions I think people might have. ### Why I Wrote This -The `automatic-ripping-machine` looks really badass. +The automatic-ripping-machine looks really badass. But after multiple attempts across multiple months to get it running, I decided it would probably be faster just to write my own. -This isn't as cool as the aumomatic-ripping-machine. +media-sucker isn't as cool as the automatic-ripping-machine. But, at least for me, -it's a lot more functional, -in that it actually does something. +it's more useful, +in that I can get it to actually do something. ### Why You Should Run This The only reason I can think of that anybody would want to use this is if they, like me, -are too dumb to get the `automatic-ripping-machine` to work. +are too dumb to get the automatic-ripping-machine to work. ### What Kind Of Hardware I Use I run it on a Raspberry Pi 4, with a Samsung DVD drive from the stone age. - - -## Parting note - -As of 2022-08-22, large sections of this code were written under COVID brain-fog. -This means it's going to look a lot like a 13-year-old wrote it. - -I hope one day to clean it up a bit, -but it's working fairly well, -despite the mess. -Please don't judge me for the organization of things. -Judge bizarro universe Neale instead. diff --git a/src/cd.py b/src/cd.py index d47c32c..5a91429 100644 --- a/src/cd.py +++ b/src/cd.py @@ -25,6 +25,7 @@ def scan(state, device): ) discid = p.stdout.strip() state["discid"] = discid + cddb_id = discid.split()[0] # Look it up in cddb email = os.environ.get("EMAIL") # You should really set this variable, tho @@ -44,10 +45,9 @@ def scan(state, device): for k in ("title", "artist", "genre", "year", "tracks"): state[k] = disc[k] else: - now = time.strftime("%Y-%m-%dT%H%M%S") num_tracks = int(discid.split()[1]) - state["title"] = "Unknown CD - %s" % now - state["tracks"] = ["Track %02d" % i for i in range(num_tracks)] + state["title"] = "Unknown CD - %s" % cddb_id + state["tracks"] = ["Track %02d" % (i+1) for i in range(num_tracks)] def copy(state, device, directory): @@ -84,9 +84,20 @@ def copy(state, device, directory): def encode(state, directory): track_num = 1 + total_tracks = len(state["tracks"]) durations = [int(d) for d in state["discid"].split()[2:-1]] total_duration = sum(durations) encoded_duration = 0 + + tag_script = io.StringIO() + tag_script.write("#! /bin/sh\n") + tag_script.write("\n") + tag_script.write("ALBUM=%s\n" % state["title"]) + tag_script.write("ARTIST=%s\n" % state.get("artist", "")) + tag_script.write("GENRE=%s\n" % state.get("genre", "")) + tag_script.write("YEAR=%s\n" % state.get("year", "")) + tag_script.write("\n") + for track_name in state["tracks"]: logging.debug("Encoding track %d (%s)" % (track_num, track_name)) duration = durations[track_num-1] @@ -97,8 +108,13 @@ def encode(state, directory): "--disptime", "1", "--preset", "standard", "--tl", state["title"], - "--tn", "%d/%d" % (track_num, len(state["tracks"])), + "--tn", "%d/%d" % (track_num, total_tracks), ] + tag_script.write("id3v2") + tag_script.write(" --album \"$ALBUM\"") + tag_script.write(" --artist \"$ARTIST\"") + tag_script.write(" --genre \"$GENRE\"") + tag_script.write(" --year \"$YEAR\"") if state.get("artist"): argv.extend(["--ta", state["artist"]]) if state.get("genre"): @@ -107,11 +123,16 @@ def encode(state, directory): argv.extend(["--ty", state["year"]]) if track_name: argv.extend(["--tt", track_name]) + tag_script.write(" --song \"%s\"" % track_name) outfn = "%02d - %s.mp3" % (track_num, track_name) else: outfn = "%02d.mp3" % track_num argv.append("track%02d.cdda.wav" % track_num) argv.append(outfn) + tag_script.write("\\\n ") + tag_script.write(" --track %d/%d" % (track_num, total_tracks)) + tag_script.write(" \"%s\"\n" % outfn) + p = subprocess.Popen( argv, cwd = directory, @@ -125,14 +146,18 @@ def encode(state, directory): p = p.split("%")[0] pct = int(p) / 100 yield (encoded_duration + (duration * pct)) / total_duration - + encoded_duration += duration track_num += 1 + with open(os.path.join(directory, "tag.sh"), "w") as f: + f.write(tag_script.getvalue()) + def clean(state, directory): - pass - + for fn in os.listdir(directory): + if fn.endswith(".wav"): + os.remove(os.path.join(directory, fn)) if __name__ == "__main__": import pprint diff --git a/src/dvd.py b/src/dvd.py index 73d026c..12c82c4 100644 --- a/src/dvd.py +++ b/src/dvd.py @@ -43,7 +43,7 @@ def scan(state, device): title = lsdvd["provider_id"] if title == "$PACKAGE_STRING": title = "DVD" - now = time.strftime(r"%Y-%m-%dT%H%M%S") + now = time.strftime(r"%Y-%m-%dT%H:%M:%S") title = "%s %s" % (title, now) # Go through all the tracks, looking for the largest referenced sector. diff --git a/src/encoder.py b/src/encoder.py index dcfdfb5..5b6f7ab 100644 --- a/src/encoder.py +++ b/src/encoder.py @@ -12,6 +12,7 @@ import re import logging import dvd import cd +import traceback import worker class Encoder(worker.Worker): @@ -24,16 +25,20 @@ class Encoder(worker.Worker): while True: wait = True self.status = {"type": "encoder", "state": "idle"} - for fn in glob.glob(self.workdir("*", "state.json")): - self.encode(os.path.dirname(fn), obj) + for fn in glob.glob(self.workdir("*", "sucker.json")): + directory = os.path.dirname(fn) + state = self.read_state(directory) + try: + self.encode(directory, state) + except Exception as e: + logging.error("Error encoding %s: %s" % (directory, e)) + logging.error(traceback.format_exc()) wait = False if wait: time.sleep(12) - def encode(self, directory, obj): + def encode(self, directory, state): self.status["state"] = "encoding" - - state = self.read_state(directory) self.status["title"] = state["title"] if state["video"]: @@ -46,6 +51,7 @@ class Encoder(worker.Worker): self.status["complete"] = pct media.clean(state, directory) + self.clear_state(directory) logging.info("Finished encoding") diff --git a/src/mediahandler.py b/src/mediahandler.py deleted file mode 100644 index eba4a60..0000000 --- a/src/mediahandler.py +++ /dev/null @@ -1,6 +0,0 @@ -class MediaHandler: - def __init__(self, basedir, state): - self.basedir = basedir - self.state = state - - def \ No newline at end of file diff --git a/src/reader.py b/src/reader.py index 1aa7782..28cdedf 100644 --- a/src/reader.py +++ b/src/reader.py @@ -31,7 +31,7 @@ CDROM_EJECT = 0x5309 class Reader(worker.Worker): def __init__(self, device, directory): - super().__init__(device) + super().__init__(directory) self.device = device self.status["type"] = "reader" self.status["device"] = device @@ -66,24 +66,22 @@ class Reader(worker.Worker): rv = fcntl.ioctl(self.drive, CDROM_DISC_STATUS) try: if rv == CDS_AUDIO: - self.handle(false) + self.handle(False) elif rv in [CDS_DATA_1, CDS_DATA_2]: - self.handle(true) + self.handle(True) else: logging.info("Can't handle disc type %d" % rv) except Exception as e: logging.error("Error in disc handler: %s" % e) logging.error(traceback.format_exc()) self.eject() - elif rv in (CDS_TRAY_OPEN, CDS_NO_DISC): + elif rv in (CDS_TRAY_OPEN, CDS_NO_DISC, CDS_DRIVE_NOT_READ): time.sleep(3) else: logging.info("CDROM_DRIVE_STATUS: %d (%s)" % (rv, CDS_STR[rv])) time.sleep(3) def eject(self): - self.status["state"] = "ejecting" - for i in range(20): try: fcntl.ioctl(self.drive, CDROM_LOCKDOOR, 0) @@ -100,16 +98,18 @@ class Reader(worker.Worker): state = {} state["video"] = video if video: - media = cd - else: media = dvd + else: + media = cd media.scan(state, self.device) self.status["title"] = state["title"] subdir = slugify.slugify(state["title"]) + workdir = self.workdir(subdir) + os.makedirs(workdir, exist_ok=True) self.status["state"] = "copying" - for pct in media.copy(device, self.workdir(subdir)): + for pct in media.copy(state, self.device, workdir): self.status["complete"] = pct self.write_state(subdir, state) diff --git a/src/worker.py b/src/worker.py index 26df748..e6c8853 100644 --- a/src/worker.py +++ b/src/worker.py @@ -1,13 +1,13 @@ import threading import os import json +import logging class Worker(threading.Thread): def __init__(self, directory, **kwargs): self.directory = directory self.status = { "state": "idle", - "directory": directory, } kwargs["daemon"] = True @@ -17,9 +17,16 @@ class Worker(threading.Thread): return os.path.join(self.directory, *path) def write_state(self, subdir, state): - with open(self.workdir(subdir, "state.json"), "w") as f: - json.dump(f, state) + logging.debug("Writing state: %s" % repr(state)) + statefn = self.workdir(subdir, "sucker.json") + newstatefn = statefn + ".new" + with open(newstatefn, "w") as f: + json.dump(state, f) + os.rename(newstatefn, statefn) def read_state(self, subdir): - with open(self.workdir(subdir, "state.json")) as f: + with open(self.workdir(subdir, "sucker.json")) as f: return json.load(f) + + def clear_state(self, subdir): + os.unlink(self.workdir(subdir, "sucker.json"))