Got guidebook parsed, now to calculate placement

This commit is contained in:
Neale Pickett 2022-10-10 19:20:09 -06:00
parent 9f4b2c989c
commit 6d0ebb6194
7 changed files with 535 additions and 239 deletions

View File

@ -0,0 +1,131 @@
let awardPoints = [
100, // 1
75, // 2
65, // 3
60, // 4
56, // 5
53, // 6
50, // 7
47, // 8
45, // 9
43, // 10
41, // 11
39, // 12
38, // 13
37, // 14
36, // 15
35, // 16
34, // 17
33, // 18
32, // 19
31, // 20
30, // 21
29, // 22
28, // 23
27, // 24
26, // 25
25, // 26
24, // 27
23, // 28
22, // 29
21, // 30
20, // 31
19, // 32
18, // 33
17, // 34
16, // 35
15, // 36
14, // 37
13, // 38
12, // 39
11, // 40
10, // 41
9, // 42
8, // 43
7, // 44
6, // 45
5, // 46
4, // 47
3, // 48
2, // 49
1, // 50
0.75, // 51
0.65, // 52
0.60, // 53
0.56, // 54
0.53, // 55
0.50, // 56
0.47, // 57
0.45, // 58
0.43, // 59
0.41, // 60
0.39, // 61
0.38, // 62
0.37, // 63
0.36, // 64
0.35, // 65
0.34, // 66
0.33, // 67
0.32, // 68
0.31, // 69
0.30, // 70
0.29, // 71
0.28, // 72
0.27, // 73
0.26, // 74
0.25, // 75
0.24, // 76
0.23, // 77
0.22, // 78
0.21, // 79
0.20, // 80
0.19, // 81
0.18, // 82
0.17, // 83
0.16, // 84
0.15, // 85
0.14, // 86
0.13, // 87
0.12, // 88
0.11, // 89
0.10, // 90
0.09, // 91
0.08, // 92
0.07, // 93
0.06, // 94
0.05, // 95
0.04, // 96
0.03, // 97
0.02, // 98
0.01, // 99
0.00, // 100
]
/**
* Given a score, calculate what placings could have gotten it.
*
* @param {Number} score Score we're going to guess
* @param {Number} tied Highest number n-way tie to consider
* @returns {Array.<Number>} List of possible placings
*/
function guessPlacing(score, tied=3) {
let placings = []
for (let t = tied; t > 0; t--) {
let totalPoints = score * t
for (let placing = 0; placing < awardPoints.length - t; placing++) {
let acc = 0
for (let i = 0; i < t; i++) {
acc += awardPoints[placing+i]
}
if (acc == totalPoints) {
placings.push(placing+1)
}
}
}
return placings
}
export {
awardPoints,
guessPlacing,
}

View File

@ -1,7 +1,12 @@
.clrg-dataset tbody *:nth-child(3n) { .clrg-dataset {
border-right: thin solid black; max-width: 100%;
overflow-x: auto;
} }
.clrg-dataset tbody .new-round { .clrg-dataset tbody td.new-adjudication {
border-left: thin solid black;
}
.clrg-dataset tbody td.new-round {
border-left: thick solid black; border-left: thick solid black;
} }

View File

@ -1,245 +1,98 @@
/** /**
* @typedef Result * Feis Dataset Importer
* @type {object}
* @param {String} name Competitor's name
* @param {Number} number Competitor's bib number
* @param {String} school Competitor's school
* @param {Number} overallPoints Overall award points for this competitor
* @param {Number} overallRank Overall ranking for this competitor
* @param {String} qualifier Any qualifiers this ranking earned
* @param {Array.<Round>} rounds How this competitor was judged in each round
*/ */
/** import * as FeisWorx from "./feisworx.mjs"
* @typedef Round import * as Guidebook from "./guidebook.mjs"
* @type {Array.<Adjudication>}
*/
/** /**
* @typedef Adjudication * @typedef {import("./types.mjs").Results} Results
* @type {object} * @typedef {import("./types.mjs").Result} Result
* @param {String} adjudicator Adjudicator who recorded this score * @typedef {import("./types.mjs").Round} Round
* @param {Number} raw Raw score * @typedef {import("./types.mjs").Adjudication} Adjudication
* @param {Number} placing Placing relative to this adjudicator's other scores * @typedef {Array.<Array.<String>>} RawData
* @param {Number} points Award points
*/ */
/** /**
* Creates a new element and appends it to parent * Creates a new element and appends it to parent
* *
* @param {Element} parent * @param {Element} parent Element to append to
* @param {String} type * @param {String} type Type of element to create
* @param {Object} [dataset] Data fields to set
* @returns {Element} * @returns {Element}
*/ */
function newElement(parent, type) { function newElement(parent, type, dataset={}) {
return parent.appendChild(document.createElement(type)) let child = parent.appendChild(document.createElement(type))
for (let k in dataset) {
child.dataset[k] = dataset[k]
}
return child
} }
class Dataset { /**
/** * Load a file and parse it into Results.
* *
* @param {String} url URL to load * @param {URL|String} url Location of file to load
*/ * @returns {Results} Parsed results
constructor(url) { */
if (url) { async function loadData(url) {
this.loadData(url) let resp = await fetch(url)
} let contentType = resp.headers.get("Content-Type")
if (! contentType.includes("/xml")) {
console.error(`Cannot load data with content-type ${contentType}`)
return
} }
async loadData(url) { let text = await resp.text()
let resp = await fetch(url) let doc = new DOMParser().parseFromString(text, "text/xml")
let contentType = resp.headers.get("Content-Type") let rawData = parseXMLDocument(doc)
if (! contentType.includes("/xml")) { return parseRawData(rawData)
console.error(`Cannot load data with content-type ${contentType}`) }
return
/**
* Parse an XML document of feis results into a 2D array of strings
*
* @param {Document} doc XML Document
* @returns {RawData} Raw data
*/
function parseXMLDocument(doc) {
let table = doc.querySelector("Table")
let rawData = []
for (let dataRow of table.children) {
if (! ["tr"].includes(dataRow.tagName.toLowerCase())) {
console.warn(`Warning: unexpected XML tag ${dataRow.tagName}, expecting tr`)
continue
} }
let text = await resp.text()
this.doc = new DOMParser().parseFromString(text, "text/xml")
this.rawData = this.parseXMLDocument(this.doc)
this.results = this.parseRawData(this.rawData)
}
parseXMLDocument(doc) { let row = []
let table = doc.querySelector("Table") for (let dataCell of dataRow.children) {
let rawData = [] if (! ["th", "td"].includes(dataCell.tagName.toLowerCase())) {
console.warn(`Warning: unexpected XML tag ${dataRow.tagName}, expecting th/td`)
for (let dataRow of table.children) {
if (! ["tr"].includes(dataRow.tagName.toLowerCase())) {
console.warn(`Warning: unexpected XML tag ${dataRow.tagName}, expecting tr`)
continue continue
} }
row.push(dataCell.textContent)
let row = []
for (let dataCell of dataRow.children) {
if (! ["th", "td"].includes(dataCell.tagName.toLowerCase())) {
console.warn(`Warning: unexpected XML tag ${dataRow.tagName}, expecting th/td`)
continue
}
row.push(dataCell.textContent)
}
rawData.push(row)
}
return rawData
}
/**
* @typedef ParsedData
* @type {object}
* @property {Array.<String>} adjudicators List of adjudicators
* @property {Array.<Result>} results List of results
*/
/**
* Parse raw data into a list of adjudicators and results
*
* @param {Array.<Array.<String>>} rawData Raw data
* @returns {Array.<Result>}
*/
parseRawData(rawData) {
let cellA1 = rawData[0][0].trim().toLowerCase()
switch (cellA1) {
case "place awd pts":
return this.parseFeisWorx2017(rawData)
}
console.error("Cell A1 doesn't resemble anything I can cope with", rawData[0])
}
/**
* Parse FeisWorx 2017 data
*
* This is the output of Adobe Reader saving the PDF as XML.
*
* @param {Array.<Array.<String>>} rawData Raw data
* @returns {Array.<Result>}
*/
parseFeisWorx2017(rawData) {
let adjudicators = []
let results = []
let numRounds = 0
let adjudicatorsPerRound = 0
for (let rowIndex = 0; rowIndex < rawData.length; rowIndex++) {
let cells = rawData[rowIndex]
// Is it a page heading?
if ((cells.length >= 11) && (cells[0].trim().toLowerCase().startsWith("place"))) {
if (numRounds == 0) {
for (let cell of cells) {
if (cell.toLowerCase().startsWith("round")) {
numRounds++
}
}
}
continue
}
if (adjudicators.length == 0) {
let fishy = false
for (let adjudicator of cells) {
if (Number(adjudicator) > 0) {
fishy = true
}
adjudicators.push(adjudicator.trim())
}
if (fishy) {
console.warn("Adjudicators row doesn't look right", cells)
}
adjudicatorsPerRound = adjudicators.length / numRounds
if (! Number.isSafeInteger(adjudicatorsPerRound)) {
console.error(`Irrational number of adjudicators for number of rounds: (${adjudicators.length}/${numRounds})`)
}
continue
}
// Is this just a list of adjudicators again?
if (cells.length >= adjudicators.length) {
let lenDiff = cells.length - adjudicators.length
let same = true
for (let i = adjudicators.length-1; i >= 0; i--) {
if (adjudicators[i] != cells[i+lenDiff].trim()) {
same = false
break
}
}
if (same) {
continue
}
}
let row = {}
{
let parts = cells[0].trim().split(/\s+/)
row.overallRank = Number(parts[0])
row.overallPoints = Number(parts[1])
}
{
let match = cells[1].trim().match(/(\d+) - (.+) \((.+) *\)[ -]*(.+)?/)
if (match) {
row.number = Number(match[1])
row.name = match[2]
row.school = match[3]
row.qualifier = match[4]
}
}
row.rounds = []
let round = []
for (let cellIndex = 2; cellIndex < cells.length; cellIndex++) {
let cell = cells[cellIndex]
let adjudication = {}
let parts = cell.trim().split(/ - ?|\s/)
adjudication.adjudicator = adjudicators[cellIndex - 2]
if ((parts.length == 5) && (parts[3] == "AP")) {
parts.splice(3, 0, "NaN")
}
if ((parts.length == 7) && (parts[4] == "T")) {
adjudication.tie = true
parts.splice(4, 1)
} else {
adjudication.tie = false
}
if (parts.length != 6) {
console.error(`Wrong number of fields in row ${rowIndex} cell ${cellIndex}:`, parts, cells)
break
}
for (let i = 0; i < parts.length; i += 2) {
let key = parts[i]
let val = Number(parts[i+1])
switch (key) {
case "Raw":
adjudication.raw = val
break
case "Plc":
adjudication.placing = val
break
case "AP":
adjudication.points = val
break
default:
console.error(`Unknown key ${key} in row ${rowIndex} cell ${cellIndex}:`, cell)
break
}
}
round.push(adjudication)
if (round.length == adjudicatorsPerRound) {
row.rounds.push(round)
round = []
}
}
results.push(row)
} }
return results rawData.push(row)
} }
return rawData
}
/**
* Parse raw data into a list of adjudicators and results
*
* @param {RawData} rawData Raw data
* @returns {Results} Parsed Results
*/
function parseRawData(rawData) {
let firstRow = rawData[0]
if (firstRow[0].trim().toLowerCase() == "place awd pts") {
return FeisWorx.parse(rawData)
}
if (firstRow[firstRow.length-1].trim().toLowerCase() == "total ip *") {
return Guidebook.parse(rawData)
}
console.error("First row doesn't resemble anything I can cope with", firstRow)
} }
/** /**
@ -247,7 +100,7 @@ class Dataset {
* Fills a table element with some results * Fills a table element with some results
* *
* @param {Element} table Table to fill in * @param {Element} table Table to fill in
* @param {Array.<Result>} results Results to fill with * @param {Results} results Results to fill with
*/ */
function fillTable(table, results) { function fillTable(table, results) {
let head = newElement(table, "thead") let head = newElement(table, "thead")
@ -255,10 +108,9 @@ function fillTable(table, results) {
let row1 = newElement(head, "tr") let row1 = newElement(head, "tr")
let row2 = newElement(head, "tr") let row2 = newElement(head, "tr")
newElement(row0, "th").colSpan = 3 newElement(row0, "th").colSpan = 2
newElement(row1, "th").colSpan = 3 newElement(row1, "th").colSpan = 2
newElement(row2, "th").textContent = "Name" newElement(row2, "th").textContent = "Name"
newElement(row2, "th").textContent = "Points"
newElement(row2, "th").textContent = "Rank" newElement(row2, "th").textContent = "Rank"
let roundNumber = 0 let roundNumber = 0
@ -283,7 +135,6 @@ function fillTable(table, results) {
let row = newElement(body, "tr") let row = newElement(body, "tr")
newElement(row, "th").textContent = result.name newElement(row, "th").textContent = result.name
newElement(row, "th").textContent = result.overallPoints
newElement(row, "th").textContent = result.overallRank newElement(row, "th").textContent = result.overallRank
let i = 0 let i = 0
@ -292,6 +143,7 @@ function fillTable(table, results) {
for (let adjudication of round) { for (let adjudication of round) {
let raw = newElement(row, "td") let raw = newElement(row, "td")
raw.textContent = adjudication.raw raw.textContent = adjudication.raw
raw.classList.add("new-adjudication")
if (first) { if (first) {
raw.classList.add("new-round") raw.classList.add("new-round")
first = false first = false
@ -307,12 +159,11 @@ function fillTable(table, results) {
async function init() { async function init() {
for (let div of document.querySelectorAll(".clrg-dataset")) { for (let div of document.querySelectorAll(".clrg-dataset")) {
let dataset = new Dataset() let results = await loadData(div.dataset.url)
await dataset.loadData(div.dataset.url)
let table = newElement(div, "table") let table = newElement(div, "table")
fillTable(table, dataset.results) fillTable(table, results)
console.log(dataset) console.log(results)
} }
} }
@ -324,5 +175,7 @@ if (document.readyState === "loading") {
} }
export { export {
Dataset, loadData,
parseXMLDocument,
parseRawData,
} }

View File

@ -0,0 +1,153 @@
/**
* FeisWorx parser
*
* This is the output of Adobe Reader saving the PDF as XML.
*/
/**
* @typedef {import("./types.mjs").Results} Results
* @typedef {import("./types.mjs").Result} Result
* @typedef {import("./types.mjs").Round} Round
* @typedef {import("./types.mjs").Adjudication} Adjudication
*/
/**
* Parse FeisWorx data
*
* @param {Array.<Array.<String>>} rawData Raw data
* @returns {Results}
*/
function parse(rawData) {
/** @type {Results} */
let results = []
let adjudicators = []
let numRounds = 0
let adjudicatorsPerRound = 0
for (let rowIndex = 0; rowIndex < rawData.length; rowIndex++) {
let cells = rawData[rowIndex]
// Is it a page heading?
if ((cells.length >= 11) && (cells[0].trim().toLowerCase().startsWith("place"))) {
if (numRounds == 0) {
for (let cell of cells) {
if (cell.toLowerCase().startsWith("round")) {
numRounds++
}
}
}
continue
}
if (adjudicators.length == 0) {
let fishy = false
for (let adjudicator of cells) {
if (Number(adjudicator) > 0) {
fishy = true
}
adjudicators.push(adjudicator.trim())
}
if (fishy) {
console.warn("Adjudicators row doesn't look right", cells)
}
adjudicatorsPerRound = adjudicators.length / numRounds
if (! Number.isSafeInteger(adjudicatorsPerRound)) {
console.error(`Irrational number of adjudicators for number of rounds: (${adjudicators.length}/${numRounds})`)
}
continue
}
// Is this just a list of adjudicators again?
if (cells.length >= adjudicators.length) {
let lenDiff = cells.length - adjudicators.length
let same = true
for (let i = adjudicators.length-1; i >= 0; i--) {
if (adjudicators[i] != cells[i+lenDiff].trim()) {
same = false
break
}
}
if (same) {
continue
}
}
let row = {}
{
let parts = cells[0].trim().split(/\s+/)
row.overallRank = Number(parts[0])
row.overallPoints = Number(parts[1])
}
{
let match = cells[1].trim().match(/(\d+) - (.+) \((.+) *\)[ -]*(.+)?/)
if (match) {
row.number = Number(match[1])
row.name = match[2]
row.school = match[3]
row.qualifier = match[4]
}
}
/** @type {Round} */
let round = []
/** @type {Array.<Round>} */
row.rounds = []
for (let cellIndex = 2; cellIndex < cells.length; cellIndex++) {
let cell = cells[cellIndex]
/** @type {Adjudication} */
let adjudication = {}
let parts = cell.trim().split(/ - ?|\s/)
adjudication.adjudicator = adjudicators[cellIndex - 2]
if ((parts.length == 5) && (parts[3] == "AP")) {
parts.splice(3, 0, "NaN")
}
if ((parts.length == 7) && (parts[4] == "T")) {
adjudication.tie = true
parts.splice(4, 1)
} else {
adjudication.tie = false
}
if (parts.length != 6) {
console.error(`Wrong number of fields in row ${rowIndex} cell ${cellIndex}:`, parts, cells)
break
}
for (let i = 0; i < parts.length; i += 2) {
let key = parts[i]
let val = Number(parts[i+1])
switch (key) {
case "Raw":
adjudication.raw = val
break
case "Plc":
adjudication.placing = val
break
case "AP":
adjudication.points = val
break
default:
console.error(`Unknown key ${key} in row ${rowIndex} cell ${cellIndex}:`, cell)
break
}
}
round.push(adjudication)
if (round.length == adjudicatorsPerRound) {
row.rounds.push(round)
round = []
}
}
results.push(row)
}
return results
}
export {
parse
}

View File

@ -0,0 +1,116 @@
/**
* Guidebook parser
*
* We're not actually sure what generated these PDFs.
* But we got them from Guidebook, so there you go.
*
* This is the output of Adobe Reader saving the PDF as XML.
*/
import {awardPoints, guessPlacing} from "./awardPoints.mjs"
/**
* @typedef {import("./types.mjs").Results} Results
* @typedef {import("./types.mjs").Result} Result
* @typedef {import("./types.mjs").Round} Round
* @typedef {import("./types.mjs").Adjudication} Adjudication
*/
/**
* Parse Guidebook data
*
* @param {Array.<Array.<String>>} rawData Raw data
* @returns {Results}
*/
function parse(rawData) {
/** @type {Results} */
let results = []
let adjudicators = []
let numRounds = 0
let adjudicatorsPerRound = 0
let possibleTiesByAdjudicatorRound = {}
for (let rowIndex = 0; rowIndex < rawData.length; rowIndex++) {
let cells = rawData[rowIndex]
// Is it a page heading?
if ((cells[0].trim().toLowerCase() == "card")) {
continue
}
// Is it a list of adjudicators?
if (cells[cells.length-1].trim().toLowerCase() == "total ip *") {
cells.splice(cells.length-1, 1) // -1: total IP *
cells.splice(0, 5) // 0 - 4: blank
adjudicators = []
for (let cell of cells) {
cell = cell.trim()
if (cell.toLowerCase().includes("rounds 1")) {
// skip it
} else if (cell.toLowerCase().includes("round total")) {
numRounds++
} else {
adjudicators.push(cell)
}
}
adjudicatorsPerRound = adjudicators.length / numRounds
if (! Number.isSafeInteger(adjudicatorsPerRound)) {
console.error(`Irrational number of adjudicators for number of rounds: (${adjudicators.length}/${numRounds})`)
}
continue
}
let row = {}
row.number = Number(cells[0])
// cells[1]: Position at recall
row.overallRank = Number(cells[2])
{
let parts = cells[3].trim().split(/\s:\s/)
console.log(parts, cells[3])
let nameSchool = parts[0]
// parts[1]: region
// We're going to take a wild-ass guess here that the dancer only has two names
let subparts = nameSchool.split(/\s+/)
row.name = subparts.slice(0, 2).join(" ")
row.school = subparts.slice(2).join(" ")
}
row.qualifier = cells[4].trim()
/** @type {Round} */
let round = []
/** @type {Array.<Round>} */
row.rounds = []
let adjudicatorNumber = 0
for (let cellIndex = 5; cellIndex < cells.length; cellIndex++) {
let cell = cells[cellIndex].trim()
if (! cell.includes("/")) {
continue
}
/** @type {Adjudication} */
let adjudication = {}
adjudication.adjudicator = adjudicators[adjudicatorNumber++]
let parts = cell.split("/")
adjudication.raw = Number(parts[0])
adjudication.points = Number(parts[1])
adjudication.placing = guessPlacing(adjudication.points)
// Guidebook reports don't list every dancer: we'll guess placing later
round.push(adjudication)
if (round.length == adjudicatorsPerRound) {
row.rounds.push(round)
round = []
}
}
results.push(row)
}
return results
}
export {
parse,
}

View File

@ -0,0 +1,34 @@
/**
* A collection of results
* @typedef {Array.<Result>} Results
*/
/**
* A single result
*
* @typedef {Object} Result
* @property {String} name Competitor's name
* @property {Number} number Competitor's bib number
* @property {String} school Competitor's school
* @property {Number} overallPoints Overall award points for this competitor
* @property {Number} overallRank Overall ranking for this competitor
* @property {String} qualifier Any qualifiers this ranking earned
* @property {Array.<Round>} rounds How this competitor was judged in each round
*/
/**
* The results for one dancer for one round
*
* @typedef Round
* @type {Array.<Adjudication>}
*/
/**
* One adjudicator's results for one dancer for one round
* @typedef {Object} Adjudication
* @property {String} adjudicator Adjudicator who recorded this score
* @property {Number} raw Raw score
* @property {Number} placing Placing relative to this adjudicator's other scores
* @property {Number} points Award points
* @property {Boolean} tie Whether this score was a tie
*/

View File

@ -6,7 +6,11 @@ scripts:
- dataset.mjs - dataset.mjs
--- ---
wat? <h1>2021 Irish Dance North Americans 21A</h1>
<div class="clrg-dataset" data-url="2022-10-10 2021 Irish dance north Americans 21A.xml"></div>
<h1>2017 11 AB Wro</h1>
<div class="clrg-dataset" data-url="2017-11 AB Wro.xml"></div> <div class="clrg-dataset" data-url="2017-11 AB Wro.xml"></div>
<div class="clrg-dataset" data-url="2022-10-10 Wro2019-09.xml"></div>
<h1>2019 09 Wro</h1>
<div class="clrg-dataset" data-url="2022-10-10 Wro2019-09.xml"></div>