Ripping DVDs works

This commit is contained in:
Neale Pickett 2022-08-21 15:58:11 -06:00
parent c40c65ab65
commit 9f738936bb
4 changed files with 251 additions and 112 deletions

172
src/dvd.py Normal file
View File

@ -0,0 +1,172 @@
#! /usr/bin/python3
import subprocess
import time
import logging
import re
SECOND = 1
MINUTE = 60 * SECOND
HOUR = 60 * MINUTE
class Copier:
def __init__(self, device, status):
self.device = device
self.status = status
self.scan()
def collect(self, track):
newCollection = []
for t in self.collection:
if t["length"] == track["length"]:
# If the length is exactly the same,
# assume it's the same track,
# and pick the one with the most stuff.
if len(track["audio"]) < len(t["audio"]):
return
elif len(track["subp"]) < len(t["subp"]):
return
newCollection.append(t)
newCollection.append(track)
self.collection = newCollection
def scan(self):
self.status["state"] = "scanning"
self.collection = []
p = subprocess.run(
[
"lsdvd",
"-Oy",
"-x",
self.device,
],
encoding="utf-8",
capture_output=True,
)
lsdvd = eval(p.stdout[8:]) # s/^lsdvd = //
title = lsdvd["title"]
if title in ('No', 'unknown'):
title = lsdvd["provider_id"]
if title == "$PACKAGE_STRING":
title = "DVD"
now = time.strftime("%Y-%m-%dT%H:%M:%S")
title = "%s %s" % (title, now)
# Go through all the tracks, looking for the largest referenced sector.
max_sector = 0
max_length = 0
tracks = lsdvd["track"]
for track in tracks:
max_length = max(track["length"], max_length)
for cell in track["cell"]:
max_sector = max(cell["last_sector"], max_sector)
if max_sector == 0:
logging.info("Media size = 0; aborting")
return
# Make a guess about what's on this DVD.
# We will categories into three types:
# * A feature, which has one track much longer than any other
# * A collection of shows, which has several long tracks, more or less the same lengths
# * Something else
for track in tracks:
if track["length"] / max_length > 0.80:
self.collect(track)
if (max_length < 20 * MINUTE) and (len(self.collection) < len(track) * 0.6):
self.collection = tracks
self.status["title"] = title
self.status["size"] = max_sector * 2048 # DVD sector size = 2048
self.status["tracks"] = [t["ix"] for t in self.collection]
def copy(self, directory):
self.status["state"] = "copying"
p = subprocess.Popen(
[
"dvdbackup",
"--input=" + self.device,
"--name=" + self.status["title"],
"--mirror",
"--progress",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=directory,
)
totalBytes = titleSize = lastTitleSize = 0
progressRe = re.compile(r"^Copying.*([0-9.]+)/[0-9.]+ (MiB|KiB)")
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m and m[2] == "MiB":
titleSize = float(m[1]) * 1024 * 1024
elif m and m[2] == "KiB":
titleSize = float(m[1]) * 1024
if titleSize < lastTitleSize:
totalBytes += lastTitleSize
lastTitleSize = titleSize
self.status["complete"] = (totalBytes + titleSize) / self.status["size"]
class Encoder:
def __init__(self, basedir, status):
self.basedir = basedir
self.status = status
def encode(self, track):
self.status["state"] = "encoding"
title = os.path.basename(fdir)
self.status["title"] = title
num_tracks = len(self.status["tracks"])
for track in self.status["tracks"]:
logging.info("encoding: %s (%s)" % (title, fdir))
outfn = "%s-%d.mkv" % (title, track)
tmppath = os.path.join(fdir, outfn)
outpath = os.path.join(self.directory, outfn)
p = subprocess.Popen(
[
"nice",
"HandBrakeCLI",
"--json",
"--input", "%s/VIDEO_TS" % fdir,
"--output", tmppath,
"--title", track,
"--native-language", "eng",
"--markers",
"--loose-anamorphic",
"--all-subtitles",
"--all-audio",
"--aencoder", "copy",
"--audio-copy-mask", "aac,ac3,mp3",
"--audio-fallback", "aac",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=None,
)
# HandBrakeCLI spits out sort of JSON.
# But Python has no built-in way to stream JSON objects.
# Hence this kludge.
progressRe = re.compile(r'^"Progress": ([0-9.]+),')
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m:
progress = float(m[1])
self.status["complete"] = progress
if __name__ == "__main__":
import pprint
vts = Video(".")
pprint.pprint(vts.dict)
# vi: sw=4 ts=4 et ai

View File

@ -16,9 +16,7 @@ class Encoder(threading.Thread):
def __init__(self, directory=None, **kwargs):
self.status = {}
self.directory = directory
for d in ("audio", "video"):
os.makedirs(os.path.join(directory, d), exist_ok=True)
return super().__init__(**kwargs)
] return super().__init__(**kwargs)
def run(self):
while True:
@ -48,50 +46,52 @@ class Encoder(threading.Thread):
self.status["state"] = "encoding"
title = os.path.basename(fdir)
self.status["title"] = title
logging.info("encoding: %s (%s)" % (title, fdir))
outfn = "%s.mkv" % title
tmppath = os.path.join(fdir, outfn)
outpath = os.path.join(self.directory, outfn)
p = subprocess.Popen(
[
"nice",
"HandBrakeCLI",
"--json",
"--input", "%s/VIDEO_TS" % fdir,
"--output", tmppath,
"--main-feature",
"--native-language", "eng",
"--markers",
"--loose-anamorphic",
"--all-subtitles",
"--all-audio",
"--aencoder", "copy",
"--audio-copy-mask", "aac,ac3,mp3",
"--audio-fallback", "aac",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=None,
)
num_tracks = len(self.status["tracks"])
for track in self.status["tracks"]:
# HandBrakeCLI spits out sort of JSON.
# But Python has no built-in way to stream JSON objects.
# Hence this kludge.
progressRe = re.compile(r'^"Progress": ([0-9.]+),')
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m:
progress = float(m[1])
self.status["complete"] = progress
logging.info("encoding: %s (%s)" % (title, fdir))
os.rename(
src=tmppath,
dst=outpath,
)
outfn = "%s-%d.mkv" % (title, track)
tmppath = os.path.join(fdir, outfn)
outpath = os.path.join(self.directory, outfn)
p = subprocess.Popen(
[
"nice",
"HandBrakeCLI",
"--json",
"--input", "%s/VIDEO_TS" % fdir,
"--output", tmppath,
"--title", track,
"--native-language", "eng",
"--markers",
"--loose-anamorphic",
"--all-subtitles",
"--all-audio",
"--aencoder", "copy",
"--audio-copy-mask", "aac,ac3,mp3",
"--audio-fallback", "aac",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=None,
)
# HandBrakeCLI spits out sort of JSON.
# But Python has no built-in way to stream JSON objects.
# Hence this kludge.
progressRe = re.compile(r'^"Progress": ([0-9.]+),')
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m:
progress = float(m[1])
self.status["complete"] = progress
os.rename(
src=tmppath,
dst=outpath,
)
pass
# vi: sw=4 ts=4 et ai

View File

@ -7,8 +7,9 @@ import time
import re
import fcntl
import traceback
import json
import json
import logging
import dvd
CDROM_DRIVE_STATUS = 0x5326
CDS_NO_INFO = 0
@ -82,6 +83,10 @@ class Reader(threading.Thread):
def eject(self):
self.status["state"] = "ejecting"
print("FAKE EJECT")
time.sleep(60*60*2)
return
for i in range(20):
try:
fcntl.ioctl(self.drive, CDROM_LOCKDOOR, 0)
@ -93,7 +98,7 @@ class Reader(threading.Thread):
def finished(self, **kwargs):
self.status["state"] = "finished read"
fn = os.path.join(self.directory, "video", self.status["title"], "sucker.json")
fn = os.path.join(self.directory, self.status["title"], "sucker.json")
newfn = fn + ".new"
with open(newfn, "w") as fout:
json.dump(obj=self.status, fp=fout)
@ -103,72 +108,8 @@ class Reader(threading.Thread):
pass # XXX
def handle_data(self):
self.video_scan()
self.video_copy()
src = dvd.Reader(self.device, self.status)
src.copy(self.directory)
self.finished()
def video_scan(self):
self.status["state"] = "Scanning for DVD title"
p = subprocess.run(
[
"dvdbackup",
"--input=" + self.device,
"--info",
],
encoding="utf-8",
capture_output=True,
)
mediaSize = 0
title = "Unknown DVD"
for l in p.stdout.split("\n"):
if l.startswith("DVD-Video information"):
title = l.split('"')[1]
elif l.endswith("MiB"):
parts = l.split()
mediaSize += float(parts[-2]) * 1024 * 1024
elif l.endswith("KiB"):
parts = l.split()
mediaSize += float(parts[-2]) * 1024
if title == "No Label":
title = time.strftime("Unknown %Y-%m-%dT%H:%M:%S")
self.status["title"] = title
if mediaSize == 0:
logging.info("Media size = 0; aborting")
return
self.status["size"] = mediaSize
def video_copy(self):
self.status["state"] = "copying"
mediaSize = self.status["size"]
title = self.status["title"]
basedir = os.path.join(self.directory, "video")
p = subprocess.Popen(
[
"dvdbackup",
"--input=" + self.device,
"--name=" + title,
"--mirror",
"--progress",
],
encoding="utf-8",
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
cwd=basedir,
)
totalBytes = titleSize = lastTitleSize = 0
progressRe = re.compile(r"^Copying.*([0-9.]+)/[0-9.]+ (MiB|KiB)")
for line in p.stdout:
line = line.strip()
m = progressRe.search(line)
if m and m[2] == "MiB":
titleSize = float(m[1]) * 1024 * 1024
elif m and m[2] == "KiB":
titleSize = float(m[1]) * 1024
if titleSize < lastTitleSize:
totalBytes += lastTitleSize
lastTitleSize = titleSize
self.status["complete"] = (totalBytes + titleSize) / mediaSize
# vi: sw=4 ts=4 et ai

26
src/status.py Normal file
View File

@ -0,0 +1,26 @@
import json
class Status:
def __init__(self, path):
self.dict = {}
self.path = path
self.update()
def __setitem__(self, key, value):
self.dict[key] = value
def __getitem__(self, key):
return self.dict[key]
def update(self):
try:
f = open(self.path, "r")
except FileNotFoundError:
return
d = json.load(f)
for k in d:
self.dict[k] = d[k]
def flush(self):
f = open(self.path, "w")
json.dump(self.dict, f)