Files
radio-scraper/index.js
2021-03-08 16:59:33 -07:00

363 lines
13 KiB
JavaScript

const { spawn, exec } = require('child_process')
const fs = require('fs')
const youtubedl = require('youtube-dl-exec')
const tmp = require('tmp')
const playlist = require('./playlist-search')
const scraper = require('./playlist-scrape')
const STREAM_URL = "https://www.youtube.com/henrikomagnifico/live"
const DURATION_REGEX = /(\d{1,2}:\d{2})\/(\d{1,2}:\d{2})/
const EXPIRATION_REGEX = /.+\/expire\/([0-9]{10})\/.+/
const thresholds = [20, 17.5, 15, 12.5, 10, 7.5, 5].map(t => ({threshold: t, trackVotes: {correct: 0, skipped: 0}, positionVotes: {correct: 0, skipped: 0}}))
let albumWhitelistCharacters = playlist.getValidAlbumCharacters()
let trackWhitelistCharacters = playlist.getValidTrackCharacters()
let currentTrack = {}
let nextTrackTimestamp = 0
let readAttempts = 0
let totalVotes = 0
let resolvedUrl = {
url: '',
expires: 0
}
var trackStatsFileStream = fs.createWriteStream("track.csv", {flags: 'a'});
var positionStatsFileStream = fs.createWriteStream("position.csv", {flags: 'a'});
thresholds.forEach(t => {
trackStatsFileStream.write(t.threshold + ",")
positionStatsFileStream.write(t.threshold + ",")
})
trackStatsFileStream.write('\n')
positionStatsFileStream.write('\n')
function readText(tmpfile, charWhitelist) {
return new Promise((resolve, reject) => {
let command = `tesseract ${tmpfile.name} -`
if (charWhitelist) {
command += ` -c tessedit_char_whitelist="${charWhitelist}"`
}
exec(command, ((error, stdout, stderr) => {
tmpfile.removeCallback()
if (error) {
console.error(stderr)
reject(error)
} else {
resolve(stdout.replace(/[\n\f]/g, ''))
}
}))
})
}
function thresholdImage(image, threshold) {
return new Promise(((resolve, reject) => {
const tmpFile = tmp.fileSync()
exec(`convert png:${image.name} -white-threshold ${threshold}% -colorspace HSB -channel B -separate ${tmpFile.name}`, ((error, stdout, stderr) => {
if (error) {
console.error(stderr)
tmpFile.removeCallback()
reject(error)
} else {
resolve(tmpFile)
}
}))
}))
}
function getRegion(frame, x, y, width, height) { // Returns Promise<Buffer>
return new Promise(function (resolve, reject) {
const tmpFile = tmp.fileSync()
exec(`convert png:${frame.name} -negate -crop ${width}x${height}+${x}+${y} png:${tmpFile.name}`, ((error, stdout, stderr) => {
if (error) {
console.error(stderr)
tmpFile.removeCallback()
reject(error)
} else {
resolve(tmpFile)
}
}))
});
}
function getFrame(url) {
return new Promise((resolve, reject) => {
const frameFile = tmp.fileSync()
exec(`ffmpeg -i ${url} -y -f image2 -c:v png -frames:v 1 ${frameFile.name}`, (err, stdout, stderr) => {
if (err) {
console.log(stderr)
frameFile.removeCallback()
reject(err)
} else {
resolve(frameFile)
}
})
});
}
function getYoutubeStream() {
return new Promise((resolve, reject) => {
youtubedl(STREAM_URL, {
dumpJson: true,
format: "best"
}).then(output => resolve(output.url))
.catch(err => reject(err))
})
}
function getYoutubeAudioUrl() {
}
function getVotes(ocrResults) {
return ocrResults.map((text, index, arr) => { // Actually perform the search. Map returns array of search results, result.refIndex is of interest
new Promise(resolve => resolve()).then(console.debug(`Worker ${index} input: ${text}`))
const title = text[0].trim()
const album = text[1].trim()
const position_duration = text[2].trim()
let capturedDuration = DURATION_REGEX.test(position_duration) ? position_duration.match(DURATION_REGEX) : ['', '', '']
const retval = {
result: playlist.search(album, title, capturedDuration[2]),
position: capturedDuration[1]
}
return retval
})
}
function tallyVotes(votes) {
const trackResults = {}
const positionResults = {}
let overallVotes = 0
let positionVotes = 0
const voters = []
const durationVotes = []
totalVotes++
votes.forEach((vote, index, arr) => {
if (vote.result != null) {
overallVotes++
voters.push("y")
if (trackResults.hasOwnProperty(vote.result.refIndex)) {
trackResults[`${vote.result.refIndex}`] = trackResults[`${vote.result.refIndex}`] + 1
} else {
trackResults[`${vote.result.refIndex}`] = 1
}
} else {
voters.push("n")
}
if (vote.position) {
positionVotes++
durationVotes.push("y")
if (positionResults.hasOwnProperty(vote.position)) {
positionResults[`${vote.position}`] = positionResults[`${vote.position}`] + 1
} else {
positionResults[`${vote.position}`] = 1
}
} else {
durationVotes.push("n")
}
})
console.debug("\nDid workers vote?")
console.debug("Result vote:", JSON.stringify(voters))
console.debug("Duration vote:", JSON.stringify(durationVotes))
if (overallVotes === 0) {
console.warn("no workers voted")
return {
position: null,
positionConfidence: 0,
result: null,
resultConfidence: 0,
voterConfidence: overallVotes / votes.length
}
} else if (overallVotes / votes.length < 0.5) {
console.warn("not enough workers voted")
return {
position: null,
positionConfidence: 0,
result: null,
resultConfidence: 0,
voterConfidence: overallVotes / votes.length
}
}
// Retrieve votes
let trackIndex = Object.keys(trackResults)[0]
let trackMaxVotes = trackResults[trackIndex]
let position = Object.keys(positionResults)[0]
let positionMaxVotes = positionResults[position]
console.debug("Initial index:", trackIndex, "All vote results:", trackResults)
for (const index in trackResults) {
if (trackResults[index] > trackMaxVotes) {
trackIndex = index
trackMaxVotes = trackResults[index]
}
}
for (const positionKey in positionResults) {
if (positionResults[positionKey] > positionMaxVotes) {
position = positionKey
positionMaxVotes = positionResults[positionKey]
}
}
// console.log(JSON.stringify(votes))
console.debug(`\nVote was index`, trackIndex)
console.debug("Worker results:")
const voterTrackAccuracy = voters.map((v, i, a) => {
if (v === 'y') {
const voteWasCorrect = `${votes[i].result.refIndex}` === trackIndex
if (voteWasCorrect) {
thresholds[i].trackVotes.correct++
return 'Y'
} else {
return 'n'
}
} else {
thresholds[i].trackVotes.skipped++
return '_'
}
})
const voterPositionAccuracy = durationVotes.map(((value, index, array) => {
if (value === 'y') {
const voteWasCorrect = votes[index].position === position
if (voteWasCorrect) {
thresholds[index].positionVotes.correct++
return 'Y'
} else {
return 'n'
}
} else {
thresholds[index].positionVotes.skipped++
return '_'
}
}))
console.debug("Track accuracy:", JSON.stringify(voterTrackAccuracy))
trackStatsFileStream.write(voterTrackAccuracy.join(','))
console.debug("Position accuracy:", JSON.stringify(voterPositionAccuracy))
positionStatsFileStream.write(voterPositionAccuracy.join(','))
const actualResult = votes.find(v => v.result != null && `${v.result.refIndex}` === trackIndex).result.item
return {
position: position,
positionConfidence: positionMaxVotes / votes.length,
result: actualResult,
resultConfidence: trackMaxVotes / overallVotes,
voterConfidence: overallVotes / votes.length
}
}
function timeUntilNextTrack(position, duration) {
const timeRegex = /(\d{1,2}):(\d{2})/
const durationValues = duration.match(timeRegex)
const positionValues = position.match(timeRegex)
const durationInSeconds = (parseInt(durationValues[1]) * 60) + parseInt(durationValues[2])
const positionInSeconds = (parseInt(positionValues[1]) * 60) + parseInt(positionValues[2])
return durationInSeconds - positionInSeconds
}
function performQuorumProcessing(titleCroppedPromise, albumCroppedPromise, durationCroppedPromise) {
return new Promise(((resolve, reject) => {
Promise.all([titleCroppedPromise, albumCroppedPromise, durationCroppedPromise]) // Wait for all promises to be resolved, returns paths to cropped segments of frame
.then(images => {
Promise.all(thresholds.map(t => { // Threshold all segments at determined levels, returns text determined by each threshold level as [title, album, position/duration]
return Promise.all([
thresholdImage(images[0], t.threshold).then(file => readText(file)),
thresholdImage(images[1], t.threshold).then(file => readText(file)),
thresholdImage(images[2], t.threshold).then(file => readText(file, "1234567890:/"))
])
})).then(allOcrResults => { // Perform search based on resolved text. Perform voting here between thresholds
// console.debug(allOcrResults)
// Cast votes
const votes = getVotes(allOcrResults) // Actually perform the search. Map returns array of search results, result.refIndex is of interest
// console.debug(JSON.stringify(votes))
// Tally votes
const winner = tallyVotes(votes)
console.debug('\nPosition confidence:', winner.positionConfidence)
console.debug('Result confidence:', winner.resultConfidence)
console.debug('Voter confidence:', winner.voterConfidence)
resolve(winner)
})
}).catch(error => reject(error))
}))
}
async function updateStreamUrl() {
const ytStreamUrl = await getYoutubeStream()
resolvedUrl.url = ytStreamUrl
resolvedUrl.expires = parseInt(ytStreamUrl.match(EXPIRATION_REGEX)[1]) * 1000
}
async function updateTrackInfo() {
const urlExpiresSoon = resolvedUrl.expires < (Date.now() + (30 * 60 * 1000))
if (resolvedUrl.expires < Date.now()) {
await updateStreamUrl()
console.log("Got stream info")
} else if (urlExpiresSoon) {
updateStreamUrl().then(() => console.log("Updated stream info in the background"))
}
console.log(`Now: ${Date.now()}\tExpiration: ${resolvedUrl.expires}\tAttempt: ${readAttempts}`)
const processingStart = Date.now()
const frame = await getFrame(resolvedUrl.url)
const frameTime = Date.now()
const titlePromise = getRegion(frame, 432, 850, 1487, 100)
const albumPromise = getRegion(frame, 440, 957, 1487, 32)
const durationPromise = getRegion(frame, 0, 1028, 235, 34)
const result = await performQuorumProcessing(titlePromise, albumPromise, durationPromise)
trackStatsFileStream.write(',' + readAttempts + '\n')
const votingTime = Date.now()
if (result.resultConfidence < 0.5) {
console.debug("Result confidence not high enough, retrying")
setTimeout(() => updateTrackInfo(), 0)
readAttempts++
return;
}
readAttempts = 0
currentTrack = result.result
console.debug("\nCurrent threshold statistics: ")
thresholds.forEach(t => {
const skippedTrackRatio = (t.trackVotes.skipped / totalVotes) * 100
const skippedPositionRatio = (t.positionVotes.skipped / totalVotes) * 100
const accurateTrackRatio = (t.trackVotes.correct / (totalVotes - t.trackVotes.skipped)) * 100
const accuratePositionRatio = (t.positionVotes.correct / (totalVotes - t.positionVotes.skipped)) * 100
const trackEffectiveness = accurateTrackRatio - skippedTrackRatio
const positionEffectiveness = accuratePositionRatio - skippedPositionRatio
console.log(`${t.threshold}%:\tTrack: [Skip: ${skippedTrackRatio.toFixed(2)}% Correct: ${accurateTrackRatio.toFixed(2)}% Eff.: ${trackEffectiveness.toFixed(2)}%]\tPos: [Skip: ${skippedPositionRatio.toFixed(2)}% Correct: ${accuratePositionRatio.toFixed(2)}% Eff.: ${positionEffectiveness.toFixed(2)}%]`)
})
if (result.position) {
const secondsUntilNext = timeUntilNextTrack(result.position, currentTrack.duration)
const processingTime = Date.now() - processingStart
const timeInFrame = Date.now() - frameTime
nextTrackTimestamp = frameTime + (secondsUntilNext * 1000)
setTimeout(() => updateTrackInfo(), (secondsUntilNext * 1000) - timeInFrame + 750)
console.log(`\nFrame processing took ${processingTime}ms (Frame retrieval: ${frameTime - processingStart}ms) (Processing & voting: ${votingTime - frameTime}ms)`)
console.log(`${currentTrack.album}: ${currentTrack.track} (${result.position}/${currentTrack.duration})`)
console.log(`Next track should be at: ${new Date(nextTrackTimestamp).toLocaleString()}`)
} else {
console.log(`${currentTrack.album}: ${currentTrack.track}`)
console.warn("Workers did not vote on current position, retrying...")
setTimeout(() => updateTrackInfo(), 0)
}
}
// getYoutubeStream()
// .then(url => getFrame(url))
// .then(frame => {
// const titlePromise = getRegion(frame, 432, 906, 1487, 54)
// const albumPromise = getRegion(frame, 440, 957, 1487, 32)
// const durationPromise = getRegion(frame, 0, 1028, 235, 34)
//
// return performQuorumProcessing(titlePromise, albumPromise, durationPromise)
// })
// .then(value => {
// console.log('Processing complete, voted result was:')
// console.log(`${value.result.album}: ${value.result.track} (${value.position}/${value.result.duration})`)
// console.log(`Time until next track: ${timeUntilNextTrack(value.position, value.result.duration)}`)
// })
// .catch(err => console.error(err))
updateTrackInfo()