2023-08-30 11:36:09 +00:00
|
|
|
const fs = require('fs')
|
|
|
|
|
|
|
|
function importTranscript(srtContent, names) {
|
2023-08-30 11:56:52 +00:00
|
|
|
const entries = srtContent.split(/\n\n+/)
|
|
|
|
const nameSet = new Set(names.map(name => name.trim().toLowerCase())) // Ensure we have lowercased names
|
2023-08-30 11:36:09 +00:00
|
|
|
|
2023-08-30 11:56:52 +00:00
|
|
|
let foundSpeakers = new Set()
|
|
|
|
let lastFoundSpeaker = null
|
|
|
|
let mdParts = []
|
2023-08-30 11:36:09 +00:00
|
|
|
|
2023-08-30 11:56:52 +00:00
|
|
|
entries.forEach(entry => {
|
|
|
|
const lines = entry.split(/\n/)
|
|
|
|
if (lines.length < 3) return
|
|
|
|
|
|
|
|
const timeParts = lines[1].split(' --> ')
|
|
|
|
if (timeParts.length !== 2) return
|
|
|
|
|
|
|
|
let content = lines.slice(2).join(' ')
|
|
|
|
let currentFoundSpeaker = null
|
|
|
|
|
|
|
|
for (const name of nameSet) {
|
|
|
|
const regex = new RegExp(`^\\s*(${name}):`, 'i') // Adjusted regex
|
|
|
|
if (content.match(regex)) {
|
|
|
|
content = content.replace(regex, `**$1:**`)
|
|
|
|
foundSpeakers.add(name.toLowerCase())
|
|
|
|
currentFoundSpeaker = name
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2023-08-30 11:36:09 +00:00
|
|
|
|
2023-08-30 11:56:52 +00:00
|
|
|
// Add a line break if the speaker changed
|
|
|
|
if (currentFoundSpeaker && currentFoundSpeaker !== lastFoundSpeaker) {
|
|
|
|
mdParts.push("\n\n")
|
|
|
|
}
|
|
|
|
|
|
|
|
mdParts.push(`==${timeParts[0]}==${content}==${timeParts[1]}==`)
|
|
|
|
|
|
|
|
lastFoundSpeaker = currentFoundSpeaker
|
|
|
|
})
|
|
|
|
|
|
|
|
nameSet.forEach(name => {
|
|
|
|
if (!foundSpeakers.has(name)) {
|
|
|
|
console.warn(`Warning: Speaker ${name} wasn't found. Did you misspell their name?`)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
return mdParts.join(' ')
|
2023-08-30 11:36:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let srtFileName, mdFileName, names = ''
|
|
|
|
|
|
|
|
for (let i = 2; i < process.argv.length; i++) {
|
2023-08-30 11:56:52 +00:00
|
|
|
switch (process.argv[i]) {
|
|
|
|
case '--input':
|
|
|
|
case '-i':
|
|
|
|
srtFileName = process.argv[++i]
|
|
|
|
break
|
|
|
|
case '--output':
|
|
|
|
case '-o':
|
|
|
|
mdFileName = process.argv[++i]
|
|
|
|
break
|
|
|
|
case '--speakers':
|
|
|
|
names = process.argv[++i]
|
|
|
|
break
|
|
|
|
}
|
2023-08-30 11:36:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!srtFileName) {
|
2023-08-30 11:56:52 +00:00
|
|
|
console.log("This utility converts valid .srt files to NDC compatible transcripts. Usage: node import-transcripts.js --input <input.srt> --output <output.md> --speakers 'Name1,Name2,...'")
|
|
|
|
process.exit(1)
|
2023-08-30 11:36:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
const srtContent = fs.readFileSync(srtFileName, 'utf8')
|
|
|
|
const nameList = names.split(',')
|
|
|
|
const mdContent = importTranscript(srtContent, nameList)
|
|
|
|
|
|
|
|
if (mdFileName) {
|
2023-08-30 11:56:52 +00:00
|
|
|
fs.writeFileSync(mdFileName, mdContent, 'utf8')
|
|
|
|
console.log(`Converted content written to ${mdFileName}`)
|
2023-08-30 11:36:09 +00:00
|
|
|
} else {
|
2023-08-30 11:56:52 +00:00
|
|
|
console.log(mdContent)
|
2023-08-30 11:36:09 +00:00
|
|
|
}
|