const fs = require('fs') function importTranscript(srtContent, names) { const entries = srtContent.split(/\n\n+/) const nameSet = new Set(names.map(name => name.trim().toLowerCase())) // Ensure we have lowercased names let foundSpeakers = new Set() let lastFoundSpeaker = null let mdParts = [] entries.forEach(entry => { const lines = entry.split(/\n/) if (lines.length < 3) return const timeParts = lines[1].split(' --> ') if (timeParts.length !== 2) return let content = lines.slice(2).join(' ') let currentFoundSpeaker = null for (const name of nameSet) { const regex = new RegExp(`^\\s*(${name}):`, 'i') // Adjusted regex if (content.match(regex)) { content = content.replace(regex, `**$1:**`) foundSpeakers.add(name.toLowerCase()) currentFoundSpeaker = name break } } // Add a line break if the speaker changed if (currentFoundSpeaker && currentFoundSpeaker !== lastFoundSpeaker) { mdParts.push("\n\n") } mdParts.push(`==${timeParts[0]}==${content}==${timeParts[1]}==`) lastFoundSpeaker = currentFoundSpeaker }) nameSet.forEach(name => { if (!foundSpeakers.has(name)) { console.warn(`Warning: Speaker ${name} wasn't found. Did you misspell their name?`) } }) return mdParts.join(' ') } let srtFileName, mdFileName, names = '' for (let i = 2; i < process.argv.length; i++) { switch (process.argv[i]) { case '--input': case '-i': srtFileName = process.argv[++i] break case '--output': case '-o': mdFileName = process.argv[++i] break case '--speakers': names = process.argv[++i] break } } if (!srtFileName) { console.log("This utility converts valid .srt files to NDC compatible transcripts. Usage: node import-transcripts.js --input --output --speakers 'Name1,Name2,...'") process.exit(1) } const srtContent = fs.readFileSync(srtFileName, 'utf8') const nameList = names.split(',') const mdContent = importTranscript(srtContent, nameList) if (mdFileName) { fs.writeFileSync(mdFileName, mdContent, 'utf8') console.log(`Converted content written to ${mdFileName}`) } else { console.log(mdContent) }