media-sucker/src/dvd.py

169 lines
4.9 KiB
Python

#! /usr/bin/python3
import subprocess
import time
import logging
import re
import os
SECOND = 1
MINUTE = 60 * SECOND
HOUR = 60 * MINUTE
def collect(collection, track):
newCollection = []
for t in collection:
if t["length"] == track["length"]:
# If the length is exactly the same,
# assume it's the same track,
# and pick the one with the most stuff.
if len(track["audio"]) < len(t["audio"]):
return collection
elif len(track["subp"]) < len(t["subp"]):
return collection
newCollection.append(t)
newCollection.append(track)
return newCollection
def scan(state, device):
p = subprocess.run(
[
"lsdvd",
"-Oy",
"-x",
device,
],
encoding="utf-8",
capture_output=True,
)
lsdvd = eval(p.stdout[8:]) # s/^lsdvd = //
title = lsdvd["title"]
if title in ('No', 'unknown'):
title = lsdvd["provider_id"]
if title == "$PACKAGE_STRING":
title = "DVD"
now = time.strftime(r"%Y-%m-%dT%H:%M:%S")
title = "%s %s" % (title, now)
# Go through all the tracks, looking for the largest referenced sector.
max_sector = 0
max_length = 0
tracks = lsdvd["track"]
for track in tracks:
max_length = max(track["length"], max_length)
for cell in track["cell"]:
max_sector = max(cell["last_sector"], max_sector)
if max_sector == 0:
logging.info("Media size = 0; aborting")
return
# Make a guess about what's on this DVD.
# We will categories into three types:
# * A feature, which has one track much longer than any other
# * A collection of shows, which has several long tracks, more or less the same lengths
# * Something else
collection = []
for track in tracks:
if track["length"] / max_length > 0.80:
collection = collect(track)
if (max_length < 20 * MINUTE) and (len(collection) < len(track) * 0.6):
collection = tracks
state["title"] = title
state["size"] = max_sector * 2048 # DVD sector size = 2048
state["tracks"] = [(t["ix"], t["length"]) for t in collection]
def copy(state, device, directory):
p = subprocess.Popen(
[
"dvdbackup",
"--input=" + device,
"--name=" + state["title"],
"--mirror",
"--progress",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=directory,
)
totalBytes = titleSize = lastTitleSize = 0
progressRe = re.compile(r"^Copying.*([0-9.]+)/[0-9.]+ (MiB|KiB)")
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m and m[2] == "MiB":
titleSize = float(m[1]) * 1024 * 1024
elif m and m[2] == "KiB":
titleSize = float(m[1]) * 1024
if titleSize < lastTitleSize:
totalBytes += lastTitleSize
lastTitleSize = titleSize
yield (totalBytes + titleSize) / state["size"]
def encode(state, directory):
title = state["title"]
logging.info("encoding: %s (%s)" % (title, directory))
total_length = sum(t[1] for t in state["tracks"])
finished_length = 0
for track, length in state["tracks"]:
outfn = "%s-%d.mkv" % (title, track)
tmppath = os.path.join(directory, outfn)
outpath = os.path.join(directory, "..", outfn)
p = subprocess.Popen(
[
"nice",
"HandBrakeCLI",
"--json",
"--input", "%s/VIDEO_TS" % directory,
"--output", tmppath,
"--title", str(track),
"--native-language", "eng",
"--markers",
"--loose-anamorphic",
"--all-subtitles",
"--all-audio",
"--aencoder", "copy",
"--audio-copy-mask", "aac,ac3,mp3",
"--audio-fallback", "aac",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=None,
)
# HandBrakeCLI spits out sort of JSON.
# But Python has no built-in way to stream JSON objects.
# Hence this kludge.
progressRe = re.compile(r'^"Progress": ([0-9.]+),')
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m:
progress = float(m[1])
complete = (finished_length + progress*length) / total_length
state["complete"] = complete
finished_length += length
os.rename(
src=tmppath,
dst=outpath,
)
logging.info("Finished track %d; length %d" % (track, length))
def clean(state, directory):
pass
if __name__ == "__main__":
import pprint
vts = Video(".")
pprint.pprint(vts.dict)
# vi: sw=4 ts=4 et ai