ndc-content-tools/import-transcript.js

const fs = require('fs')

function importTranscript(srtContent, names) {
    const entries = srtContent.split(/\n\n+/)
    const nameSet = new Set(names.map(name => name.trim().toLowerCase())) // Ensure we have lowercased names

    let foundSpeakers = new Set()
    let lastFoundSpeaker = null
    let mdParts = []

    entries.forEach(entry => {
        const lines = entry.split(/\n/)
        if (lines.length < 3) return

        const timeParts = lines[1].split(' --> ')
        if (timeParts.length !== 2) return

        let content = lines.slice(2).join(' ')
        let currentFoundSpeaker = null

        for (const name of nameSet) {
            const regex = new RegExp(`^\\s*(${name}):`, 'i')  // Adjusted regex
            if (content.match(regex)) {
                content = content.replace(regex, `**$1:**`)
                foundSpeakers.add(name.toLowerCase())
                currentFoundSpeaker = name
                break
            }
        }

        // Add a line break if the speaker changed
        if (currentFoundSpeaker && currentFoundSpeaker !== lastFoundSpeaker) {
            mdParts.push("\n\n")
        }

        mdParts.push(`==${timeParts[0]}==${content}==${timeParts[1]}==`)

        lastFoundSpeaker = currentFoundSpeaker
    })

    nameSet.forEach(name => {
        if (!foundSpeakers.has(name)) {
            console.warn(`Warning: Speaker ${name} wasn't found. Did you misspell their name?`)
        }
    })

    return mdParts.join(' ')
}

let srtFileName, mdFileName, names = ''

for (let i = 2; i < process.argv.length; i++) {
    switch (process.argv[i]) {
        case '--input':
        case '-i':
            srtFileName = process.argv[++i]
            break
        case '--output':
        case '-o':
            mdFileName = process.argv[++i]
            break
        case '--speakers':
            names = process.argv[++i]
            break
    }
}

if (!srtFileName) {
    console.log("This utility converts valid .srt files to NDC compatible transcripts. Usage: node import-transcripts.js --input <input.srt> --output <output.md> --speakers 'Name1,Name2,...'")
    process.exit(1)
}

const srtContent = fs.readFileSync(srtFileName, 'utf8')
const nameList = names.split(',')
const mdContent = importTranscript(srtContent, nameList)

if (mdFileName) {
    fs.writeFileSync(mdFileName, mdContent, 'utf8')
    console.log(`Converted content written to ${mdFileName}`)
} else {
    console.log(mdContent)
}
Transcript converter 2023-08-30 11:36:09 +00:00			`const fs = require('fs')`

			`function importTranscript(srtContent, names) {`
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`const entries = srtContent.split(/\n\n+/)`
			`const nameSet = new Set(names.map(name => name.trim().toLowerCase())) // Ensure we have lowercased names`
Transcript converter 2023-08-30 11:36:09 +00:00
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`let foundSpeakers = new Set()`
			`let lastFoundSpeaker = null`
			`let mdParts = []`
Transcript converter 2023-08-30 11:36:09 +00:00
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`entries.forEach(entry => {`
			`const lines = entry.split(/\n/)`
			`if (lines.length < 3) return`

			`const timeParts = lines[1].split(' --> ')`
			`if (timeParts.length !== 2) return`

			`let content = lines.slice(2).join(' ')`
			`let currentFoundSpeaker = null`

			`for (const name of nameSet) {`
			const regex = new RegExp(`^\\s*(${name}):`, 'i') // Adjusted regex
			`if (content.match(regex)) {`
			content = content.replace(regex, `$1:`)
			`foundSpeakers.add(name.toLowerCase())`
			`currentFoundSpeaker = name`
			`break`
			`}`
			`}`
Transcript converter 2023-08-30 11:36:09 +00:00
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`// Add a line break if the speaker changed`
			`if (currentFoundSpeaker && currentFoundSpeaker !== lastFoundSpeaker) {`
			`mdParts.push("\n\n")`
			`}`

			mdParts.push(`==${timeParts[0]}==${content}==${timeParts[1]}==`)

			`lastFoundSpeaker = currentFoundSpeaker`
			`})`

			`nameSet.forEach(name => {`
			`if (!foundSpeakers.has(name)) {`
			console.warn(`Warning: Speaker ${name} wasn't found. Did you misspell their name?`)
			`}`
			`})`

			`return mdParts.join(' ')`
Transcript converter 2023-08-30 11:36:09 +00:00			`}`

			`let srtFileName, mdFileName, names = ''`

			`for (let i = 2; i < process.argv.length; i++) {`
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`switch (process.argv[i]) {`
			`case '--input':`
			`case '-i':`
			`srtFileName = process.argv[++i]`
			`break`
			`case '--output':`
			`case '-o':`
			`mdFileName = process.argv[++i]`
			`break`
			`case '--speakers':`
			`names = process.argv[++i]`
			`break`
			`}`
Transcript converter 2023-08-30 11:36:09 +00:00			`}`

			`if (!srtFileName) {`
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`console.log("This utility converts valid .srt files to NDC compatible transcripts. Usage: node import-transcripts.js --input <input.srt> --output <output.md> --speakers 'Name1,Name2,...'")`
			`process.exit(1)`
Transcript converter 2023-08-30 11:36:09 +00:00			`}`

			`const srtContent = fs.readFileSync(srtFileName, 'utf8')`
			`const nameList = names.split(',')`
			`const mdContent = importTranscript(srtContent, nameList)`

			`if (mdFileName) {`
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`fs.writeFileSync(mdFileName, mdContent, 'utf8')`
			console.log(`Converted content written to ${mdFileName}`)
Transcript converter 2023-08-30 11:36:09 +00:00			`} else {`
Updated to handle things properly 2023-08-30 11:56:52 +00:00			`console.log(mdContent)`
Transcript converter 2023-08-30 11:36:09 +00:00			`}`