Files
radio-scraper/index.ts
2021-03-14 21:02:22 -06:00

545 lines
19 KiB
TypeScript

const { spawn, exec } = require('child_process')
const fs = require('fs')
const youtubedl = require('youtube-dl-exec')
import {FileResult, fileSync} from 'tmp'
// const {fileSync} = require('tmp')
const playlist = require('./playlist-search')
const scraper = require('./playlist-scrape')
const {getNewPoll} = require('./poll-master')
const STREAM_URL = "https://www.youtube.com/henrikomagnifico/live"
const DURATION_REGEX = /(\d{1,2}:\d{2})\/(\d{1,2}:\d{2})/
const EXPIRATION_REGEX = /.+\/expire\/([0-9]{10})\/.+/
const thresholdMap = [20, 17.5, 15, 12.5, 10, 7.5, 5].reduce((obj, t) => {
obj[t] = {trackVotes: {correct: 0, skipped: 0}, positionVotes: {correct: 0, skipped: 0}}
return obj
}, {})
const thresholds = [20, 17.5, 15, 12.5, 10, 7.5, 5].map(t => ({threshold: t, trackVotes: {correct: 0, skipped: 0}, positionVotes: {correct: 0, skipped: 0}}))
let currentTrack = {}
let currentTrackPosition = {
position: 0,
positionTimestamp: 0
}
let trackIdentified = false
let nextTrackTimestamp = 0
let readAttempts = 0
let totalVotes = 0
let preemptiveCallback = null
let resolvedUrl = {
url: '',
expires: 0
}
const trackStatsFileStream = fs.createWriteStream("track.csv", {flags: 'a'});
const positionStatsFileStream = fs.createWriteStream("position.csv", {flags: 'a'});
thresholds.forEach(t => {
trackStatsFileStream.write(t.threshold + ",")
positionStatsFileStream.write(t.threshold + ",")
})
trackStatsFileStream.write('\n')
positionStatsFileStream.write('\n')
function readText(tmpFile: FileResult, charWhitelist: string = null) {
return new Promise((resolve, reject) => {
let command = `tesseract ${tmpFile.name} -`
if (charWhitelist) {
command += ` -c tessedit_char_whitelist="${charWhitelist}"`
}
exec(command, ((error, stdout, stderr) => {
tmpFile.removeCallback()
if (error) {
console.error(stderr)
reject(error)
} else {
resolve(stdout.replace(/[\n\f]/g, ''))
}
}))
})
}
function thresholdImage(image, threshold): Promise<FileResult> {
return new Promise(((resolve, reject) => {
const tmpFile = fileSync()
exec(`convert png:${image.name} -white-threshold ${threshold}% -colorspace HSB -channel B -separate ${tmpFile.name}`, ((error, stdout, stderr) => {
if (error) {
console.error(stderr)
tmpFile.removeCallback()
reject(error)
} else {
resolve(tmpFile)
}
}))
}))
}
function getRegion(frame, x, y, width, height): Promise<FileResult> { // Returns Promise<Buffer>
return new Promise(function (resolve, reject) {
const tmpFile = fileSync()
exec(`convert png:${frame.name} -negate -crop ${width}x${height}+${x}+${y} png:${tmpFile.name}`, ((error, stdout, stderr) => {
if (error) {
console.error(stderr)
tmpFile.removeCallback()
reject(error)
} else {
resolve(tmpFile)
}
}))
});
}
function getFrame(url): Promise<FileResult> {
return new Promise((resolve, reject) => {
const frameFile = fileSync()
exec(`ffmpeg -i ${url} -y -f image2 -c:v png -frames:v 1 ${frameFile.name}`, (err, stdout, stderr) => {
if (err) {
console.log(stderr)
frameFile.removeCallback()
reject(err)
} else {
resolve(frameFile)
}
})
});
}
function getYoutubeStream() {
return new Promise((resolve, reject) => {
youtubedl(STREAM_URL, {
dumpJson: true,
format: "best"
}).then(output => resolve(output.url))
.catch(err => reject(err))
})
}
function getYoutubeAudioUrl() {
}
function getVotes(ocrResults) {
return ocrResults.map((text, index, arr) => { // Actually perform the search. Map returns array of search results, result.refIndex is of interest
new Promise(resolve => resolve()).then(console.debug(`Worker ${index} input: ${text}`))
const title = text[0].trim()
const album = text[1].trim()
const position_duration = text[2].trim()
let capturedDuration = DURATION_REGEX.test(position_duration) ? position_duration.match(DURATION_REGEX) : ['', '', '']
const retval = {
result: playlist.search(album, title, capturedDuration[2]),
position: capturedDuration[1]
}
return retval
})
}
function tallyVotes(votes) {
const trackResults = {}
const positionResults = {}
let overallVotes = 0
let positionVotes = 0
const voters = []
const durationVotes = []
totalVotes++
votes.forEach((vote, index, arr) => {
if (vote.result != null) {
overallVotes++
voters.push("y")
if (trackResults.hasOwnProperty(vote.result.refIndex)) {
trackResults[`${vote.result.refIndex}`] = trackResults[`${vote.result.refIndex}`] + 1
} else {
trackResults[`${vote.result.refIndex}`] = 1
}
} else {
voters.push("n")
}
if (vote.position) {
positionVotes++
durationVotes.push("y")
if (positionResults.hasOwnProperty(vote.position)) {
positionResults[`${vote.position}`] = positionResults[`${vote.position}`] + 1
} else {
positionResults[`${vote.position}`] = 1
}
} else {
durationVotes.push("n")
}
})
console.debug("\nDid workers vote?")
console.debug("Result vote:", JSON.stringify(voters))
console.debug("Duration vote:", JSON.stringify(durationVotes))
if (overallVotes === 0) {
console.warn("no workers voted")
return {
position: null,
positionConfidence: 0,
result: null,
resultConfidence: 0,
voterConfidence: overallVotes / votes.length
}
} else if (overallVotes / votes.length < 0.5) {
console.warn("not enough workers voted")
return {
position: null,
positionConfidence: 0,
result: null,
resultConfidence: 0,
voterConfidence: overallVotes / votes.length
}
}
// Retrieve votes
let trackIndex = Object.keys(trackResults)[0]
let trackMaxVotes = trackResults[trackIndex]
let position = Object.keys(positionResults)[0]
let positionMaxVotes = positionResults[position]
console.debug("Initial index:", trackIndex, "All vote results:", trackResults)
for (const index in trackResults) {
if (trackResults[index] > trackMaxVotes) {
trackIndex = index
trackMaxVotes = trackResults[index]
}
}
for (const positionKey in positionResults) {
if (positionResults[positionKey] > positionMaxVotes) {
position = positionKey
positionMaxVotes = positionResults[positionKey]
}
}
// console.log(JSON.stringify(votes))
console.debug(`\nVote was index`, trackIndex)
console.debug("Worker results:")
const voterTrackAccuracy = voters.map((v, i, a) => {
if (v === 'y') {
const voteWasCorrect = `${votes[i].result.refIndex}` === trackIndex
if (voteWasCorrect) {
thresholds[i].trackVotes.correct++
return 'Y'
} else {
return 'n'
}
} else {
thresholds[i].trackVotes.skipped++
return '_'
}
})
const voterPositionAccuracy = durationVotes.map(((value, index, array) => {
if (value === 'y') {
const voteWasCorrect = votes[index].position === position
if (voteWasCorrect) {
thresholds[index].positionVotes.correct++
return 'Y'
} else {
return 'n'
}
} else {
thresholds[index].positionVotes.skipped++
return '_'
}
}))
console.debug("Track accuracy:", JSON.stringify(voterTrackAccuracy))
trackStatsFileStream.write(voterTrackAccuracy.join(','))
console.debug("Position accuracy:", JSON.stringify(voterPositionAccuracy))
positionStatsFileStream.write(voterPositionAccuracy.join(','))
const actualResult = votes.find(v => v.result != null && `${v.result.refIndex}` === trackIndex).result.item
return {
position: position,
positionConfidence: positionMaxVotes / votes.length,
result: actualResult,
resultConfidence: trackMaxVotes / overallVotes,
voterConfidence: overallVotes / votes.length
}
}
function millisToNextTrack(position, duration) {
const durationInSeconds = timeToSeconds(duration)
const positionInSeconds = timeToSeconds(position)
return (durationInSeconds - positionInSeconds) * 1000
}
function timeToSeconds(time) {
const timeRegex = /(\d{1,2}):(\d{2})/
if (timeRegex.test(time)) {
const timeValues = time.match(timeRegex)
return (parseInt(timeValues[1]) * 60) + parseInt(timeValues[2])
} else {
throw `${time} does not match MM:SS format`
}
}
function performQuorumProcessing(titleCroppedPromise, albumCroppedPromise, durationCroppedPromise) {
return new Promise(((resolve, reject) => {
Promise.all([titleCroppedPromise, albumCroppedPromise, durationCroppedPromise]) // Wait for all promises to be resolved, returns paths to cropped segments of frame
.then(images => {
Promise.all(thresholds.map(t => { // Threshold all segments at determined levels, returns text determined by each threshold level as [title, album, position/duration]
return Promise.all([
thresholdImage(images[0], t.threshold).then(file => readText(file)),
thresholdImage(images[1], t.threshold).then(file => readText(file)),
thresholdImage(images[2], t.threshold).then(file => readText(file, "1234567890:/"))
])
})).then(allOcrResults => { // Perform search based on resolved text. Perform voting here between thresholds
// console.debug(allOcrResults)
// Cast votes
const votes = getVotes(allOcrResults) // Actually perform the search. Map returns array of search results, result.refIndex is of interest
// console.debug(JSON.stringify(votes))
// Tally votes
const winner = tallyVotes(votes)
console.debug('\nPosition confidence:', winner.positionConfidence)
console.debug('Result confidence:', winner.resultConfidence)
console.debug('Voter confidence:', winner.voterConfidence)
resolve(winner)
})
}).catch(error => reject(error))
}))
}
function logVotingResults(votes, result, voteType) {
Object.keys(votes).forEach(t => {
const tracker = thresholdMap[t]
if (votes[t] != null) {
if (votes[t] === result) {
tracker[voteType].correct++
}
} else {
tracker[voteType].skipped++
}
})
}
async function positionDurationVoting(croppedDuration, voteOnDuration) {
const matchIndex = voteOnDuration ? 2 : 1
return new Promise((async (resolve) => {
const positionPollName = `position-${matchIndex}-${Date.now()}`
poll.registerNewPoll(positionPollName, thresholds.length, (result, votes) => {
// Log votes
logVotingResults(votes, result, 'positionVotes')
// Return result
resolve(result)
}, (votes) => {
// Log votes
logVotingResults(votes, null, 'positionVotes')
resolve(null)
})
for (const t of Object.keys(thresholdMap)) {
const thresholded = await thresholdImage(croppedDuration, t)
const ocrText = await readText(thresholded, "1234567890:/")
let capturedDuration = DURATION_REGEX.test(ocrText) ? ocrText.match(DURATION_REGEX) : [null, null, null]
poll.voteInPoll(positionPollName, t, capturedDuration[matchIndex])
}
}))
}
async function trackVoting(croppedTitle, croppedAlbum, duration) {
return new Promise(async (resolve) => {
const poll = getNewPoll(Object.keys(thresholdMap).length, (result, votes) => {
logVotingResults(votes, result, 'trackVotes')
console.log('trackVoteResult', result)
resolve(playlist.getAt(result))
}, (votes) => {
logVotingResults(votes, null, 'trackVotes')
resolve(null)
})
for (const t of Object.keys(thresholdMap)) {
Promise.all([thresholdImage(croppedTitle, t).then(text => readText(text)), thresholdImage(croppedAlbum, t).then(text => readText(text))]).then(trackOcr => {
const title = trackOcr[0].trim()
const album = trackOcr[1].trim()
const track = playlist.search(album, title, duration)
if (track != null) {
poll.vote(t, track.refIndex)
} else {
poll.skipVote(t)
}
})
}
})
}
async function updateStreamUrl() {
const ytStreamUrl = await getYoutubeStream()
resolvedUrl.url = ytStreamUrl
resolvedUrl.expires = parseInt(ytStreamUrl.match(EXPIRATION_REGEX)[1]) * 1000
}
async function updateTrackInfo() {
const urlExpiresSoon = resolvedUrl.expires < (Date.now() + (30 * 60 * 1000))
if (resolvedUrl.expires < Date.now()) {
await updateStreamUrl()
console.log("Got stream info")
} else if (urlExpiresSoon) {
updateStreamUrl().then(() => console.log("Updated stream info in the background"))
}
console.log(`Now: ${Date.now()}\tExpiration: ${resolvedUrl.expires}\tAttempt: ${readAttempts}`)
const processingStart = Date.now()
const frame = await getFrame(resolvedUrl.url)
const frameTime = Date.now()
const titlePromise = getRegion(frame, 432, 850, 1487, 100)
const albumPromise = getRegion(frame, 440, 957, 1487, 32)
const durationPromise = getRegion(frame, 0, 1028, 235, 34)
const result = await performQuorumProcessing(titlePromise, albumPromise, durationPromise)
trackStatsFileStream.write(',' + readAttempts + '\n')
const votingTime = Date.now()
if (result.resultConfidence < 0.5) {
console.debug("Result confidence not high enough, retrying")
setTimeout(() => updateTrackInfo(), 0)
readAttempts++
return;
}
readAttempts = 0
currentTrack = result.result
console.debug("\nCurrent threshold statistics: ")
thresholds.forEach(t => {
const skippedTrackRatio = (t.trackVotes.skipped / totalVotes) * 100
const skippedPositionRatio = (t.positionVotes.skipped / totalVotes) * 100
const accurateTrackRatio = (t.trackVotes.correct / (totalVotes - t.trackVotes.skipped)) * 100
const accuratePositionRatio = (t.positionVotes.correct / (totalVotes - t.positionVotes.skipped)) * 100
const trackEffectiveness = accurateTrackRatio - skippedTrackRatio
const positionEffectiveness = accuratePositionRatio - skippedPositionRatio
console.log(`${t.threshold}%:\tTrack: [Skip: ${skippedTrackRatio.toFixed(2)}% Correct: ${accurateTrackRatio.toFixed(2)}% Eff.: ${trackEffectiveness.toFixed(2)}%]\tPos: [Skip: ${skippedPositionRatio.toFixed(2)}% Correct: ${accuratePositionRatio.toFixed(2)}% Eff.: ${positionEffectiveness.toFixed(2)}%]`)
})
if (result.position) {
const secondsUntilNext = millisToNextTrack(result.position, currentTrack.duration)
const processingTime = Date.now() - processingStart
const timeInFrame = Date.now() - frameTime
nextTrackTimestamp = frameTime + (secondsUntilNext * 1000)
setTimeout(() => updateTrackInfo(), secondsUntilNext - timeInFrame + 750)
console.log(`\nFrame processing took ${processingTime}ms (Frame retrieval: ${frameTime - processingStart}ms) (Processing & voting: ${votingTime - frameTime}ms)`)
console.log(`${currentTrack.album}: ${currentTrack.track} (${result.position}/${currentTrack.duration})`)
console.log(`Next track should be at: ${new Date(nextTrackTimestamp).toLocaleString()}`)
} else {
console.log(`${currentTrack.album}: ${currentTrack.track}`)
console.warn("Workers did not vote on current position, retrying...")
setTimeout(() => updateTrackInfo(), 0)
}
}
async function updateTrackInfo_v2() {
trackIdentified = false
const urlExpiresSoon = resolvedUrl.expires < (Date.now() + (30 * 60 * 1000))
if (resolvedUrl.expires < Date.now()) {
await updateStreamUrl()
console.log("Got stream info")
} else if (urlExpiresSoon) {
updateStreamUrl().then(() => console.log("Updated stream info in the background"))
}
console.log(`V2: Now: ${Date.now()}\tExpiration: ${resolvedUrl.expires}\tAttempt: ${readAttempts}`)
const processingStart = Date.now()
const frame = await getFrame(resolvedUrl.url)
const frameTime = Date.now()
const croppedDuration = await getRegion(frame, 0, 1028, 235, 34)
positionDurationVoting(croppedDuration, true).then(async duration => {
// Not checking if duration was null is fine because we can search without it
const title = await getRegion(frame, 432, 850, 1487, 100)
const album = await getRegion(frame, 440, 957, 1487, 32)
const trackResult = await trackVoting(title, album, duration)
if (trackResult == null) {
// Didn't get a result, try again
setTimeout(updateTrackInfo_v2, 0)
} else {
console.log("identified track")
currentTrack = trackResult
trackIdentified = true
if (positionIsIdentified()) {
registerNextTrackCheck(currentTrackPosition.position, currentTrack.duration, frameTime)
printTrack(frameTime, processingStart)
} else {
// We know that a song will only run as long as the duration,
// so we can at least register a callback for that far out
const timeoutLength = millisToNextTrack("0:00", currentTrack.duration) - (Date.now() - frameTime) + 750
preemptiveCallback = setTimeout(updateTrackInfo_v2, timeoutLength)
}
}
}).catch(err => {
//
})
if (!positionIsIdentified()) {
updatePosition(frame, frameTime).then(position => {
if (position != null) {
console.log("identified position")
if (trackIdentified) {
registerNextTrackCheck(position, currentTrack.duration, frameTime)
printTrack(frameTime, processingStart)
if (preemptiveCallback != null) {
clearTimeout(preemptiveCallback)
preemptiveCallback = null
}
} else {
currentTrackPosition.position = position
currentTrackPosition.positionTimestamp = frameTime
}
} else {
console.log("position missing")
setTimeout(updatePosition, 0)
}
})
}
}
function printTrack(frameTime, processingStart) {
const now = Date.now()
const processingTime = now - processingStart
const votingTime = now - frameTime
const frameRetrievalTime = frameTime - processingStart
console.log(`\nFrame processing took ${processingTime}ms (Frame retrieval: ${frameRetrievalTime}ms) (Processing & voting: ${votingTime}ms)`)
console.log(`${currentTrack.album}: ${currentTrack.track} (${currentTrackPosition.position}/${currentTrack.duration})`)
console.log(`Next track should be at: ${new Date(nextTrackTimestamp).toLocaleString()}`)
}
function registerNextTrackCheck(position, duration, frameTime) {
nextTrackTimestamp = millisToNextTrack(position, duration) - (Date.now() - frameTime)
setTimeout(updateTrackInfo_v2, nextTrackTimestamp + 750)
}
function positionIsIdentified() {
return nextTrackTimestamp < currentTrackPosition.positionTimestamp
}
async function updatePosition(frame, frameTime) {
// Stream URL update shouldn't need to be called from here
if (frame == null) {
frame = await getFrame(resolvedUrl.url)
frameTime = Date.now()
}
const durationFrame = await getRegion(frame, 0, 1028, 235, 34)
const position = await positionDurationVoting(durationFrame, false)
return position
if (position == null) {
// Position wasn't found, and we need it. Try again.
setTimeout(updatePosition, 0)
} else {
currentTrackPosition.position = position
currentTrackPosition.positionTimestamp = frameTime
}
}
// getYoutubeStream()
// .then(url => getFrame(url))
// .then(frame => {
// const titlePromise = getRegion(frame, 432, 906, 1487, 54)
// const albumPromise = getRegion(frame, 440, 957, 1487, 32)
// const durationPromise = getRegion(frame, 0, 1028, 235, 34)
//
// return performQuorumProcessing(titlePromise, albumPromise, durationPromise)
// })
// .then(value => {
// console.log('Processing complete, voted result was:')
// console.log(`${value.result.album}: ${value.result.track} (${value.position}/${value.result.duration})`)
// console.log(`Time until next track: ${timeUntilNextTrack(value.position, value.result.duration)}`)
// })
// .catch(err => console.error(err))
updateTrackInfo_v2()