media-sucker/src/dvd.py

#! /usr/bin/python3

import subprocess
import time
import logging
import re
import os

SECOND = 1
MINUTE = 60 * SECOND
HOUR = 60 * MINUTE

class Copier:
    def __init__(self, device, status):
        self.device = device
        self.status = status
        self.scan()

    def collect(self, track):
        newCollection = []
        for t in self.collection:
            if t["length"] == track["length"]:
                # If the length is exactly the same,
                # assume it's the same track,
                # and pick the one with the most stuff.
                if len(track["audio"]) < len(t["audio"]):
                    return
                elif len(track["subp"]) < len(t["subp"]):
                    return
            newCollection.append(t)
        newCollection.append(track)
        self.collection = newCollection

    def scan(self):
        self.status["state"] = "scanning"

        self.collection = []
        p = subprocess.run(
            [
                "lsdvd",
                "-Oy",
                "-x",
                self.device,
            ],
            encoding="utf-8",
            capture_output=True,
        )
        lsdvd = eval(p.stdout[8:]) # s/^lsdvd = //
        title = lsdvd["title"]
        if title in ('No', 'unknown'):
            title = lsdvd["provider_id"]
            if title == "$PACKAGE_STRING":
                title = "DVD"
        now = time.strftime("%Y-%m-%dT%H%M%S")
        title = "%s %s" % (title, now)

        # Go through all the tracks, looking for the largest referenced sector.
        max_sector = 0
        max_length = 0
        tracks = lsdvd["track"]
        for track in tracks:
            max_length = max(track["length"], max_length)
            for cell in track["cell"]:
                max_sector = max(cell["last_sector"], max_sector)
        if max_sector == 0:
            logging.info("Media size = 0; aborting")
            return

        # Make a guess about what's on this DVD.
        # We will categories into three types:
        # * A feature, which has one track much longer than any other
        # * A collection of shows, which has several long tracks, more or less the same lengths
        # * Something else
        for track in tracks:
            if track["length"] / max_length > 0.80:
                self.collect(track)
        if (max_length < 20 * MINUTE) and (len(self.collection) < len(track) * 0.6):
            self.collection = tracks

        self.status["title"] = title
        self.status["size"] = max_sector * 2048 # DVD sector size = 2048
        self.status["tracks"] = [(t["ix"], t["length"]) for t in self.collection]


    def copy(self, directory):
        self.status["state"] = "copying"

        p = subprocess.Popen(
            [
                "dvdbackup",
                "--input=" + self.device,
                "--name=" + self.status["title"],
                "--mirror",
                "--progress",
            ],
            encoding="utf-8",
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            cwd=directory,
        )
        totalBytes = titleSize = lastTitleSize = 0
        progressRe = re.compile(r"^Copying.*([0-9.]+)/[0-9.]+ (MiB|KiB)")
        for line in p.stdout:
            line = line.strip()
            m = progressRe.search(line)
            if m and m[2] == "MiB":
                titleSize = float(m[1]) * 1024 * 1024
            elif m and m[2] == "KiB":
                titleSize = float(m[1]) * 1024
            if titleSize < lastTitleSize:
                totalBytes += lastTitleSize
            lastTitleSize = titleSize
            self.status["complete"] = (totalBytes + titleSize) / self.status["size"]


class Encoder:
    def __init__(self, basedir, status):
        self.basedir = basedir
        self.status = status

    def encode(self, obj):
        title = obj["title"]
        logging.info("encoding: %s (%s)" % (title, self.basedir))

        total_length = sum(t[1] for t in obj["tracks"])
        finished_length = 0
        for track, length in obj["tracks"]:
            outfn = "%s-%d.mkv" % (title, track)
            tmppath = os.path.join(self.basedir, outfn)
            outpath = os.path.join(self.basedir, "..", outfn)
            p = subprocess.Popen(
                [
                    "nice",
                    "HandBrakeCLI",
                    "--json",
                    "--input", "%s/VIDEO_TS" % self.basedir,
                    "--output", tmppath,
                    "--title", str(track),
                    "--native-language", "eng",
                    "--markers",
                    "--loose-anamorphic",
                    "--all-subtitles",
                    "--all-audio",
                    "--aencoder", "copy",
                    "--audio-copy-mask", "aac,ac3,mp3",
                    "--audio-fallback", "aac",
                ],
                encoding="utf-8",
                stdout=subprocess.PIPE,
                stderr=None,
            )

            # HandBrakeCLI spits out sort of JSON.
            # But Python has no built-in way to stream JSON objects.
            # Hence this kludge.
            progressRe = re.compile(r'^"Progress": ([0-9.]+),')
            for line in p.stdout:
                line = line.strip()
                m = progressRe.search(line)
                if m:
                    progress = float(m[1])
                    complete = (finished_length + progress*length) / total_length
                    self.status["complete"] = complete

            finished_length += length
            os.rename(
                src=tmppath,
                dst=outpath,
            )
            logging.info("Finished track %d; length %d" % (track, length))


if __name__ == "__main__":
    import pprint
    vts = Video(".")
    pprint.pprint(vts.dict)


# vi: sw=4 ts=4 et ai