Transcript converter
This commit is contained in:
		
						commit
						ee4fc5d9c9
					
				
							
								
								
									
										72
									
								
								import-transcript.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								import-transcript.js
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,72 @@
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
const fs = require('fs')
 | 
			
		||||
 | 
			
		||||
function importTranscript(srtContent, names) {
 | 
			
		||||
    const entries = srtContent.trim().split(/\n\n+/)
 | 
			
		||||
    const nameSet = new Set(names.map(name => name.trim()))
 | 
			
		||||
 | 
			
		||||
    let foundSpeakers = new Set()
 | 
			
		||||
 | 
			
		||||
    const mdEntries = entries.map(entry => {
 | 
			
		||||
        const lines = entry.split(/\n/)
 | 
			
		||||
        if (lines.length < 3) return null
 | 
			
		||||
 | 
			
		||||
        const timeParts = lines[1].split(' --> ')
 | 
			
		||||
        if (timeParts.length !== 2) return null
 | 
			
		||||
 | 
			
		||||
        let content = lines.slice(2).join(' ').trim()
 | 
			
		||||
        for (const name of nameSet) {
 | 
			
		||||
            const regex = new RegExp(`^${name}:`, 'i')
 | 
			
		||||
            if (content.match(regex)) {
 | 
			
		||||
                content = content.replace(name, `**${name}**`)
 | 
			
		||||
                foundSpeakers.add(name)
 | 
			
		||||
                break
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return `==${timeParts[0]}==${content}==${timeParts[1]}==`
 | 
			
		||||
    }).filter(Boolean)
 | 
			
		||||
 | 
			
		||||
    nameSet.forEach(name => {
 | 
			
		||||
        if (!foundSpeakers.has(name)) {
 | 
			
		||||
            console.warn(`Warning: Speaker ${name} wasn't found. Did you misspell their name?`)
 | 
			
		||||
        }
 | 
			
		||||
    })
 | 
			
		||||
 | 
			
		||||
    return mdEntries.join('\n\n')
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let srtFileName, mdFileName, names = ''
 | 
			
		||||
 | 
			
		||||
for (let i = 2; i < process.argv.length; i++) {
 | 
			
		||||
    switch (process.argv[i]) {
 | 
			
		||||
        case '--input':
 | 
			
		||||
        case '-i':
 | 
			
		||||
            srtFileName = process.argv[++i]
 | 
			
		||||
            break
 | 
			
		||||
        case '--output':
 | 
			
		||||
        case '-o':
 | 
			
		||||
            mdFileName = process.argv[++i]
 | 
			
		||||
            break
 | 
			
		||||
        case '--speakers':
 | 
			
		||||
            names = process.argv[++i]
 | 
			
		||||
            break
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
if (!srtFileName) {
 | 
			
		||||
    console.log("This utility converts valid .srt files to NDC compatible transcripts. Usage: node import-transcripts.js --input <input.srt> --output <output.md> --speakers 'Name1,Name2,...'")
 | 
			
		||||
    process.exit(1)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const srtContent = fs.readFileSync(srtFileName, 'utf8')
 | 
			
		||||
const nameList = names.split(',')
 | 
			
		||||
const mdContent = importTranscript(srtContent, nameList)
 | 
			
		||||
 | 
			
		||||
if (mdFileName) {
 | 
			
		||||
    fs.writeFileSync(mdFileName, mdContent, 'utf8')
 | 
			
		||||
    console.log(`Converted content written to ${mdFileName}`)
 | 
			
		||||
} else {
 | 
			
		||||
    console.log(mdContent)
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user