media-sucker/src/dvd.py

#! /usr/bin/python3

import subprocess
import time
import logging
import re
import os

SECOND = 1
MINUTE = 60 * SECOND
HOUR = 60 * MINUTE

def collect(collection, track):
    newCollection = []
    for t in collection:
        if t["length"] == track["length"]:
            # If the length is exactly the same,
            # assume it's the same track,
            # and pick the one with the most stuff.
            if len(track["audio"]) < len(t["audio"]):
                return collection
            elif len(track["subp"]) < len(t["subp"]):
                return collection
        newCollection.append(t)
    newCollection.append(track)
    return newCollection


def scan(state, device):
    p = subprocess.run(
        [
            "lsdvd",
            "-Oy",
            "-x",
            device,
        ],
        encoding="utf-8",
        capture_output=True,
    )
    lsdvd = eval(p.stdout[8:]) # s/^lsdvd = //
    title = lsdvd["title"]
    if title in ('No', 'unknown'):
        title = lsdvd["provider_id"]
        if title == "$PACKAGE_STRING":
            title = "DVD"
    now = time.strftime(r"%Y-%m-%dT%H:%M:%S")
    title = "%s %s" % (title, now)

    # Go through all the tracks, looking for the largest referenced sector.
    max_sector = 0
    max_length = 0
    tracks = lsdvd["track"]
    for track in tracks:
        max_length = max(track["length"], max_length)
        for cell in track["cell"]:
            max_sector = max(cell["last_sector"], max_sector)
    if max_sector == 0:
        logging.info("Media size = 0; aborting")
        return

    # Make a guess about what's on this DVD.
    # We will categories into three types:
    # * A feature, which has one track much longer than any other
    # * A collection of shows, which has several long tracks, more or less the same lengths
    # * Something else
    collection = []
    for track in tracks:
        if track["length"] / max_length > 0.80:
            collection = collect(track)
    if (max_length < 20 * MINUTE) and (len(collection) < len(track) * 0.6):
        collection = tracks

    state["title"] = title
    state["size"] = max_sector * 2048 # DVD sector size = 2048
    state["tracks"] = [(t["ix"], t["length"]) for t in collection]

def copy(state, device, directory):
    p = subprocess.Popen(
        [
            "dvdbackup",
            "--input=" + device,
            "--name=" + state["title"],
            "--mirror",
            "--progress",
        ],
        encoding="utf-8",
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        cwd=directory,
    )
    totalBytes = titleSize = lastTitleSize = 0
    progressRe = re.compile(r"^Copying.*([0-9.]+)/[0-9.]+ (MiB|KiB)")
    for line in p.stdout:
        line = line.strip()
        m = progressRe.search(line)
        if m and m[2] == "MiB":
            titleSize = float(m[1]) * 1024 * 1024
        elif m and m[2] == "KiB":
            titleSize = float(m[1]) * 1024
        if titleSize < lastTitleSize:
            totalBytes += lastTitleSize
        lastTitleSize = titleSize
        yield (totalBytes + titleSize) / state["size"]


def encode(state, directory):
    title = state["title"]
    logging.info("encoding: %s (%s)" % (title, directory))

    total_length = sum(t[1] for t in state["tracks"])
    finished_length = 0
    for track, length in state["tracks"]:
        outfn = "%s-%d.mkv" % (title, track)
        tmppath = os.path.join(directory, outfn)
        outpath = os.path.join(directory, "..", outfn)
        p = subprocess.Popen(
            [
                "nice",
                "HandBrakeCLI",
                "--json",
                "--input", "%s/VIDEO_TS" % directory,
                "--output", tmppath,
                "--title", str(track),
                "--native-language", "eng",
                "--markers",
                "--loose-anamorphic",
                "--all-subtitles",
                "--all-audio",
                "--aencoder", "copy",
                "--audio-copy-mask", "aac,ac3,mp3",
                "--audio-fallback", "aac",
            ],
            encoding="utf-8",
            stdout=subprocess.PIPE,
            stderr=None,
        )

        # HandBrakeCLI spits out sort of JSON.
        # But Python has no built-in way to stream JSON objects.
        # Hence this kludge.
        progressRe = re.compile(r'^"Progress": ([0-9.]+),')
        for line in p.stdout:
            line = line.strip()
            m = progressRe.search(line)
            if m:
                progress = float(m[1])
                complete = (finished_length + progress*length) / total_length
                state["complete"] = complete

        finished_length += length
        os.rename(
            src=tmppath,
            dst=outpath,
        )
        logging.info("Finished track %d; length %d" % (track, length))


def clean(state, directory):
    pass


if __name__ == "__main__":
    import pprint
    vts = Video(".")
    pprint.pprint(vts.dict)


# vi: sw=4 ts=4 et ai