@@ -100,10 +100,26 @@ function findMarkdownFiles(dir) {
100100 return files . sort ( ) ;
101101}
102102
103+ /**
104+ * Count tokens in dialogue text using Gemini API
105+ */
106+ async function countDialogueTokens ( dialogue ) {
107+ const model = genAI . getGenerativeModel ( { model : TTS_MODEL } ) ;
108+
109+ const result = await model . countTokens ( {
110+ contents : [ {
111+ role : 'user' ,
112+ parts : [ { text : dialogue } ]
113+ } ]
114+ } ) ;
115+
116+ return result . totalTokens ;
117+ }
118+
103119/**
104120 * Generate conversational dialogue from technical content
105121 */
106- async function generateDialogue ( content , fileName ) {
122+ async function generateDialogue ( content , fileName , maxTokens = null ) {
107123 console . log ( ` 🎭 Generating dialogue script...` ) ;
108124
109125 const model = genAI . getGenerativeModel ( { model : DIALOGUE_MODEL } ) ;
@@ -115,21 +131,30 @@ async function generateDialogue(content, fileName) {
115131SPECIAL INSTRUCTIONS FOR THIS EPISODE:
116132This is the course introduction. When discussing the section about how the course was developed using AI (including the podcast itself), have Alex and Sam briefly acknowledge in a natural, professional way that they themselves are AI-generated voices. This should feel like a thoughtful meta-moment, not gimmicky. Keep it concise - one or two exchanges where they note the self-referential nature of AI-generated hosts discussing AI-generated content. Make it feel authentic to how senior engineers would react to this realization.` : '' ;
117133
134+ const lengthConstraint = maxTokens ? `
135+
136+ CRITICAL LENGTH REQUIREMENT:
137+ The dialogue MUST be under ${ maxTokens } tokens (approximately ${ Math . floor ( maxTokens * 4 ) } characters or ${ Math . floor ( maxTokens * 0.75 ) } words). This is a hard constraint due to TTS API limitations. Prioritize the most important concepts and keep explanations concise while maintaining quality.` : `
138+
139+ LENGTH GUIDELINE:
140+ Aim for a concise dialogue under 7,000 tokens to ensure it fits within TTS API limits while maintaining quality.` ;
141+
118142 const prompt = `You are converting technical course content into a natural, engaging two-person podcast conversation.
119143
120144Speakers:
121- - Alex: The instructor - knowledgeable, clear, engaging teacher
122- - Sam: Senior software engineer - curious , asks clarifying questions, relates concepts to real-world scenarios
145+ - Alex: The instructor - knowledgeable, clear, measured teaching style
146+ - Sam: Senior software engineer - thoughtful , asks clarifying questions, relates concepts to real-world scenarios
123147
124148Guidelines:
125- - Keep the conversation natural and flowing
126- - Sam should ask relevant questions that a senior engineer would ask
149+ - Keep the conversation natural and flowing, but maintain professional composure
150+ - Conversational yet measured - avoid excessive enthusiasm or exclamations
151+ - Sam should ask relevant, thoughtful questions that a senior engineer would ask
127152- Alex should explain concepts clearly but not patronizingly
128- - Include brief moments of insight or "aha" moments
153+ - Include brief moments of insight or understanding
129154- Keep technical accuracy - don't dumb down the content
130- - Make it engaging but professional
155+ - Make it engaging but professional - prioritize clarity over energy
131156- Break down complex concepts through dialogue
132- - Reference real-world scenarios and examples${ metaCommentary }
157+ - Reference real-world scenarios and examples${ metaCommentary } ${ lengthConstraint }
133158
134159Technical Content Title: ${ fileName }
135160
@@ -180,46 +205,107 @@ function createWavHeader(pcmDataLength) {
180205 return header ;
181206}
182207
208+ /**
209+ * Retry a function with exponential backoff for transient errors
210+ * @param {Function } fn - Async function to retry
211+ * @param {number } maxAttempts - Maximum retry attempts (default: 4)
212+ * @returns {Promise } - Result of successful function call
213+ */
214+ async function retryWithBackoff ( fn , maxAttempts = 4 ) {
215+ let lastError ;
216+
217+ for ( let attempt = 0 ; attempt < maxAttempts ; attempt ++ ) {
218+ try {
219+ return await fn ( ) ;
220+ } catch ( error ) {
221+ lastError = error ;
222+
223+ // Check if error is retryable (transient network/API errors)
224+ const isRetryable =
225+ error . message ?. includes ( 'fetch failed' ) ||
226+ error . message ?. includes ( 'ECONNRESET' ) ||
227+ error . message ?. includes ( 'ETIMEDOUT' ) ||
228+ error . message ?. includes ( 'ENOTFOUND' ) ||
229+ error . status === 429 || // Rate limit
230+ error . status === 500 || // Internal server error
231+ error . status === 503 || // Service unavailable
232+ error . status === 504 ; // Gateway timeout
233+
234+ // Don't retry permanent errors (auth, permission, not found)
235+ const isPermanent =
236+ error . status === 400 || // Bad request
237+ error . status === 401 || // Unauthorized
238+ error . status === 403 || // Forbidden
239+ error . status === 404 ; // Not found
240+
241+ if ( isPermanent ) {
242+ throw error ; // Fail fast on permanent errors
243+ }
244+
245+ if ( ! isRetryable || attempt === maxAttempts - 1 ) {
246+ throw lastError ; // Last attempt or non-retryable error
247+ }
248+
249+ // Exponential backoff: 1s, 2s, 4s, 8s
250+ const delay = Math . pow ( 2 , attempt ) * 1000 ;
251+ console . log ( ` ⏳ Retry ${ attempt + 1 } /${ maxAttempts } after ${ delay } ms (${ error . message } )` ) ;
252+ await new Promise ( resolve => setTimeout ( resolve , delay ) ) ;
253+ }
254+ }
255+
256+ throw lastError ;
257+ }
258+
183259/**
184260 * Convert dialogue text to audio using multi-speaker TTS
185261 */
186262async function generateAudio ( dialogue , outputPath ) {
187263 console . log ( ` 🎙️ Synthesizing audio...` ) ;
188264
189- const model = genAI . getGenerativeModel ( {
190- model : TTS_MODEL ,
191- } ) ;
192-
193- const result = await model . generateContent ( {
194- contents : [ {
195- role : 'user' ,
196- parts : [ { text : dialogue } ]
197- } ] ,
198- generationConfig : {
199- responseModalities : [ 'AUDIO' ] ,
200- speechConfig : {
201- multiSpeakerVoiceConfig : {
202- speakerVoiceConfigs : [
203- {
204- speaker : 'Alex' ,
205- voiceConfig : {
206- prebuiltVoiceConfig : {
207- voiceName : 'Kore' // Firm, professional voice
265+ // Wrap TTS API call with retry logic
266+ const result = await retryWithBackoff ( async ( ) => {
267+ const model = genAI . getGenerativeModel ( {
268+ model : TTS_MODEL ,
269+ } ) ;
270+
271+ const response = await model . generateContent ( {
272+ contents : [ {
273+ role : 'user' ,
274+ parts : [ { text : dialogue } ]
275+ } ] ,
276+ generationConfig : {
277+ responseModalities : [ 'AUDIO' ] ,
278+ speechConfig : {
279+ multiSpeakerVoiceConfig : {
280+ speakerVoiceConfigs : [
281+ {
282+ speaker : 'Alex' ,
283+ voiceConfig : {
284+ prebuiltVoiceConfig : {
285+ voiceName : 'Kore' // Firm, professional voice
286+ }
208287 }
209- }
210- } ,
211- {
212- speaker : 'Sam' ,
213- voiceConfig : {
214- prebuiltVoiceConfig : {
215- voiceName : 'Puck' // Upbeat, curious voice
288+ } ,
289+ {
290+ speaker : 'Sam' ,
291+ voiceConfig : {
292+ prebuiltVoiceConfig : {
293+ voiceName : 'Charon' // Neutral, professional voice
294+ }
216295 }
217296 }
218- }
219- ]
297+ ]
298+ }
220299 }
221300 }
301+ } ) ;
302+
303+ // Guarded response parsing - validate structure before accessing
304+ if ( ! response ?. response ?. candidates ?. [ 0 ] ?. content ?. parts ?. [ 0 ] ?. inlineData ?. data ) {
305+ throw new Error ( 'TTS API returned malformed response - missing inlineData.data' ) ;
222306 }
307+
308+ return response ;
223309 } ) ;
224310
225311 const audioData = result . response . candidates [ 0 ] . content . parts [ 0 ] . inlineData ;
@@ -238,6 +324,8 @@ async function generateAudio(dialogue, outputPath) {
238324
239325 writeFileSync ( outputPath , wavBuffer ) ;
240326
327+ console . log ( ` ✅ Audio synthesized successfully` ) ;
328+
241329 return {
242330 size : wavBuffer . length ,
243331 format : 'audio/wav'
@@ -263,8 +351,46 @@ async function processFile(filePath, manifest) {
263351 return ;
264352 }
265353
266- // Generate dialogue
267- const dialogue = await generateDialogue ( content , fileName ) ;
354+ // Generate dialogue with retry logic for token limit
355+ const TOKEN_LIMIT = 8192 ;
356+ const TOKEN_SAFETY_MARGIN = 500 ;
357+ const MAX_TOKENS = TOKEN_LIMIT - TOKEN_SAFETY_MARGIN ;
358+
359+ const retryLimits = [
360+ { maxTokens : null , attempt : 0 } , // First try: soft guideline (7,000 tokens)
361+ { maxTokens : 7000 , attempt : 1 } , // Retry 1: 7,000 tokens
362+ { maxTokens : 6000 , attempt : 2 } , // Retry 2: 6,000 tokens
363+ { maxTokens : 5500 , attempt : 3 } , // Retry 3: 5,500 tokens (last resort)
364+ ] ;
365+
366+ let dialogue ;
367+ let tokenCount ;
368+ let attemptSucceeded = false ;
369+
370+ for ( const { maxTokens, attempt } of retryLimits ) {
371+ if ( attempt > 0 ) {
372+ console . log ( ` 🔄 Retry ${ attempt } : Regenerating with ${ maxTokens } token limit...` ) ;
373+ }
374+
375+ dialogue = await generateDialogue ( content , fileName , maxTokens ) ;
376+ tokenCount = await countDialogueTokens ( dialogue ) ;
377+
378+ console . log ( ` 📊 Token count: ${ tokenCount } / ${ MAX_TOKENS } (${ ( ( tokenCount / MAX_TOKENS ) * 100 ) . toFixed ( 1 ) } %)` ) ;
379+
380+ if ( tokenCount <= MAX_TOKENS ) {
381+ attemptSucceeded = true ;
382+ if ( attempt > 0 ) {
383+ console . log ( ` ✅ Success on attempt ${ attempt + 1 } ` ) ;
384+ }
385+ break ;
386+ } else {
387+ console . log ( ` ⚠️ Exceeds limit by ${ tokenCount - MAX_TOKENS } tokens` ) ;
388+ }
389+ }
390+
391+ if ( ! attemptSucceeded ) {
392+ throw new Error ( `Failed to generate dialogue within token limit after ${ retryLimits . length } attempts. Final count: ${ tokenCount } tokens` ) ;
393+ }
268394
269395 // Determine output path
270396 const outputFileName = `${ fileName } .wav` ;
@@ -279,18 +405,53 @@ async function processFile(filePath, manifest) {
279405 audioUrl,
280406 size : audioInfo . size ,
281407 format : audioInfo . format ,
408+ tokenCount : tokenCount ,
282409 generatedAt : new Date ( ) . toISOString ( )
283410 } ;
284411
285412 console . log ( ` ✅ Generated: ${ audioUrl } ` ) ;
286- console . log ( ` 📊 Size: ${ ( audioInfo . size / 1024 / 1024 ) . toFixed ( 2 ) } MB` ) ;
413+ console . log ( ` 📊 Audio size: ${ ( audioInfo . size / 1024 / 1024 ) . toFixed ( 2 ) } MB` ) ;
414+ console . log ( ` 📊 Tokens: ${ tokenCount } ` ) ;
287415
288416 } catch ( error ) {
289417 console . error ( ` ❌ Error: ${ error . message } ` ) ;
290418 console . error ( ` Skipping this file and continuing...` ) ;
291419 }
292420}
293421
422+ /**
423+ * Process files with concurrency limit
424+ * @param {string[] } files - Array of file paths to process
425+ * @param {Object } manifest - Manifest object to update
426+ * @param {number } concurrency - Max concurrent operations (default: 3)
427+ * @returns {Promise<{processed: number, failed: number}> }
428+ */
429+ async function processFilesWithConcurrency ( files , manifest , concurrency = 3 ) {
430+ const results = { processed : 0 , failed : 0 } ;
431+
432+ // Process files in batches of `concurrency`
433+ for ( let i = 0 ; i < files . length ; i += concurrency ) {
434+ const batch = files . slice ( i , i + concurrency ) ;
435+
436+ console . log ( `\n🔄 Processing batch ${ Math . floor ( i / concurrency ) + 1 } /${ Math . ceil ( files . length / concurrency ) } (${ batch . length } files concurrently)...` ) ;
437+
438+ // Process batch concurrently
439+ await Promise . all (
440+ batch . map ( async ( file ) => {
441+ try {
442+ await processFile ( file , manifest ) ;
443+ results . processed ++ ;
444+ } catch ( error ) {
445+ console . error ( `\n❌ Failed to process ${ file } :` , error . message ) ;
446+ results . failed ++ ;
447+ }
448+ } )
449+ ) ;
450+ }
451+
452+ return results ;
453+ }
454+
294455/**
295456 * Main execution
296457 */
@@ -312,25 +473,8 @@ async function main() {
312473 console . log ( `📋 Loaded existing manifest with ${ Object . keys ( manifest ) . length } entries\n` ) ;
313474 }
314475
315- // Process files sequentially (to avoid rate limits)
316- let processed = 0 ;
317- let skipped = 0 ;
318- let failed = 0 ;
319-
320- for ( const file of files ) {
321- try {
322- await processFile ( file , manifest ) ;
323- processed ++ ;
324-
325- // Rate limiting - wait 2 seconds between files
326- if ( processed < files . length ) {
327- await new Promise ( resolve => setTimeout ( resolve , 2000 ) ) ;
328- }
329- } catch ( error ) {
330- console . error ( `Failed to process ${ file } :` , error . message ) ;
331- failed ++ ;
332- }
333- }
476+ // Process files with concurrency limit of 3
477+ const results = await processFilesWithConcurrency ( files , manifest , 3 ) ;
334478
335479 // Save manifest
336480 mkdirSync ( dirname ( MANIFEST_PATH ) , { recursive : true } ) ;
@@ -339,9 +483,9 @@ async function main() {
339483 console . log ( '\n' + '=' . repeat ( 60 ) ) ;
340484 console . log ( '✨ Podcast generation complete!\n' ) ;
341485 console . log ( `📊 Summary:` ) ;
342- console . log ( ` ✅ Processed: ${ processed } ` ) ;
343- console . log ( ` ⚠️ Skipped : ${ skipped } ` ) ;
344- console . log ( ` ❌ Failed : ${ failed } ` ) ;
486+ console . log ( ` ✅ Processed: ${ results . processed } ` ) ;
487+ console . log ( ` ❌ Failed : ${ results . failed } ` ) ;
488+ console . log ( ` 📁 Total files : ${ files . length } ` ) ;
345489 console . log ( `\n📋 Manifest saved to: ${ MANIFEST_PATH } ` ) ;
346490 console . log ( '=' . repeat ( 60 ) ) ;
347491}
0 commit comments