media-sucker/src/dvd.py

180 lines
5.8 KiB
Python
Raw Normal View History

2022-08-21 15:58:11 -06:00
#! /usr/bin/python3
import subprocess
import time
import logging
import re
2022-08-21 17:22:06 -06:00
import os
2022-08-21 15:58:11 -06:00
SECOND = 1
MINUTE = 60 * SECOND
HOUR = 60 * MINUTE
class Copier:
def __init__(self, device, status):
self.device = device
self.status = status
self.scan()
def collect(self, track):
newCollection = []
for t in self.collection:
if t["length"] == track["length"]:
# If the length is exactly the same,
# assume it's the same track,
# and pick the one with the most stuff.
if len(track["audio"]) < len(t["audio"]):
return
elif len(track["subp"]) < len(t["subp"]):
return
newCollection.append(t)
newCollection.append(track)
self.collection = newCollection
def scan(self):
self.status["state"] = "scanning"
self.collection = []
p = subprocess.run(
[
"lsdvd",
"-Oy",
"-x",
self.device,
],
encoding="utf-8",
capture_output=True,
)
lsdvd = eval(p.stdout[8:]) # s/^lsdvd = //
title = lsdvd["title"]
if title in ('No', 'unknown'):
title = lsdvd["provider_id"]
if title == "$PACKAGE_STRING":
title = "DVD"
now = time.strftime("%Y-%m-%dT%H_%M_%S")
2022-08-21 15:58:11 -06:00
title = "%s %s" % (title, now)
# Go through all the tracks, looking for the largest referenced sector.
max_sector = 0
max_length = 0
tracks = lsdvd["track"]
for track in tracks:
max_length = max(track["length"], max_length)
for cell in track["cell"]:
max_sector = max(cell["last_sector"], max_sector)
if max_sector == 0:
logging.info("Media size = 0; aborting")
return
# Make a guess about what's on this DVD.
# We will categories into three types:
# * A feature, which has one track much longer than any other
# * A collection of shows, which has several long tracks, more or less the same lengths
# * Something else
for track in tracks:
if track["length"] / max_length > 0.80:
self.collect(track)
if (max_length < 20 * MINUTE) and (len(self.collection) < len(track) * 0.6):
self.collection = tracks
self.status["title"] = title
self.status["size"] = max_sector * 2048 # DVD sector size = 2048
2022-08-21 20:27:51 -06:00
self.status["tracks"] = [(t["ix"], t["length"]) for t in self.collection]
2022-08-21 15:58:11 -06:00
def copy(self, directory):
self.status["state"] = "copying"
p = subprocess.Popen(
[
"dvdbackup",
"--input=" + self.device,
"--name=" + self.status["title"],
"--mirror",
"--progress",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=directory,
)
totalBytes = titleSize = lastTitleSize = 0
progressRe = re.compile(r"^Copying.*([0-9.]+)/[0-9.]+ (MiB|KiB)")
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m and m[2] == "MiB":
titleSize = float(m[1]) * 1024 * 1024
elif m and m[2] == "KiB":
titleSize = float(m[1]) * 1024
if titleSize < lastTitleSize:
totalBytes += lastTitleSize
lastTitleSize = titleSize
self.status["complete"] = (totalBytes + titleSize) / self.status["size"]
2022-08-21 17:22:06 -06:00
2022-08-21 15:58:11 -06:00
class Encoder:
def __init__(self, basedir, status):
self.basedir = basedir
self.status = status
2022-08-21 17:22:06 -06:00
def encode(self, obj):
title = obj["title"]
logging.info("encoding: %s (%s)" % (title, self.basedir))
2022-08-21 15:58:11 -06:00
2022-08-21 17:22:06 -06:00
total_length = sum(t[1] for t in obj["tracks"])
finished_length = 0
for track, length in obj["tracks"]:
2022-08-21 15:58:11 -06:00
outfn = "%s-%d.mkv" % (title, track)
2022-08-21 17:22:06 -06:00
tmppath = os.path.join(self.basedir, outfn)
outpath = os.path.join(self.basedir, "..", outfn)
2022-08-21 15:58:11 -06:00
p = subprocess.Popen(
[
"nice",
"HandBrakeCLI",
"--json",
2022-08-21 17:22:06 -06:00
"--input", "%s/VIDEO_TS" % self.basedir,
2022-08-21 15:58:11 -06:00
"--output", tmppath,
2022-08-21 17:22:06 -06:00
"--title", str(track),
2022-08-21 15:58:11 -06:00
"--native-language", "eng",
"--markers",
"--loose-anamorphic",
"--all-subtitles",
"--all-audio",
"--aencoder", "copy",
"--audio-copy-mask", "aac,ac3,mp3",
"--audio-fallback", "aac",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=None,
)
# HandBrakeCLI spits out sort of JSON.
# But Python has no built-in way to stream JSON objects.
# Hence this kludge.
progressRe = re.compile(r'^"Progress": ([0-9.]+),')
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m:
progress = float(m[1])
2022-08-21 17:22:06 -06:00
complete = (finished_length + progress*length) / total_length
self.status["complete"] = complete
finished_length += length
2022-08-21 17:22:06 -06:00
os.rename(
src=tmppath,
dst=outpath,
)
logging.info("Finished track %d; length %d" % (track, length))
2022-08-21 15:58:11 -06:00
if __name__ == "__main__":
import pprint
vts = Video(".")
pprint.pprint(vts.dict)
# vi: sw=4 ts=4 et ai