Skip to content

Commit 4af7912

Browse files
committed
Updated podcasts
1 parent 6640402 commit 4af7912

15 files changed

+310
-103
lines changed

scripts/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ node generate-podcast.js
8989

9090
### Speakers
9191
- **Alex**: "Kore" voice (firm, professional)
92-
- **Sam**: "Puck" voice (upbeat, curious)
92+
- **Sam**: "Charon" voice (neutral, professional)
9393

9494
### Rate Limiting
9595
- 2-second delay between files to avoid API rate limits

scripts/generate-podcast.js

Lines changed: 205 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,26 @@ function findMarkdownFiles(dir) {
100100
return files.sort();
101101
}
102102

103+
/**
104+
* Count tokens in dialogue text using Gemini API
105+
*/
106+
async function countDialogueTokens(dialogue) {
107+
const model = genAI.getGenerativeModel({ model: TTS_MODEL });
108+
109+
const result = await model.countTokens({
110+
contents: [{
111+
role: 'user',
112+
parts: [{ text: dialogue }]
113+
}]
114+
});
115+
116+
return result.totalTokens;
117+
}
118+
103119
/**
104120
* Generate conversational dialogue from technical content
105121
*/
106-
async function generateDialogue(content, fileName) {
122+
async function generateDialogue(content, fileName, maxTokens = null) {
107123
console.log(` 🎭 Generating dialogue script...`);
108124

109125
const model = genAI.getGenerativeModel({ model: DIALOGUE_MODEL });
@@ -115,21 +131,30 @@ async function generateDialogue(content, fileName) {
115131
SPECIAL INSTRUCTIONS FOR THIS EPISODE:
116132
This is the course introduction. When discussing the section about how the course was developed using AI (including the podcast itself), have Alex and Sam briefly acknowledge in a natural, professional way that they themselves are AI-generated voices. This should feel like a thoughtful meta-moment, not gimmicky. Keep it concise - one or two exchanges where they note the self-referential nature of AI-generated hosts discussing AI-generated content. Make it feel authentic to how senior engineers would react to this realization.` : '';
117133

134+
const lengthConstraint = maxTokens ? `
135+
136+
CRITICAL LENGTH REQUIREMENT:
137+
The dialogue MUST be under ${maxTokens} tokens (approximately ${Math.floor(maxTokens * 4)} characters or ${Math.floor(maxTokens * 0.75)} words). This is a hard constraint due to TTS API limitations. Prioritize the most important concepts and keep explanations concise while maintaining quality.` : `
138+
139+
LENGTH GUIDELINE:
140+
Aim for a concise dialogue under 7,000 tokens to ensure it fits within TTS API limits while maintaining quality.`;
141+
118142
const prompt = `You are converting technical course content into a natural, engaging two-person podcast conversation.
119143
120144
Speakers:
121-
- Alex: The instructor - knowledgeable, clear, engaging teacher
122-
- Sam: Senior software engineer - curious, asks clarifying questions, relates concepts to real-world scenarios
145+
- Alex: The instructor - knowledgeable, clear, measured teaching style
146+
- Sam: Senior software engineer - thoughtful, asks clarifying questions, relates concepts to real-world scenarios
123147
124148
Guidelines:
125-
- Keep the conversation natural and flowing
126-
- Sam should ask relevant questions that a senior engineer would ask
149+
- Keep the conversation natural and flowing, but maintain professional composure
150+
- Conversational yet measured - avoid excessive enthusiasm or exclamations
151+
- Sam should ask relevant, thoughtful questions that a senior engineer would ask
127152
- Alex should explain concepts clearly but not patronizingly
128-
- Include brief moments of insight or "aha" moments
153+
- Include brief moments of insight or understanding
129154
- Keep technical accuracy - don't dumb down the content
130-
- Make it engaging but professional
155+
- Make it engaging but professional - prioritize clarity over energy
131156
- Break down complex concepts through dialogue
132-
- Reference real-world scenarios and examples${metaCommentary}
157+
- Reference real-world scenarios and examples${metaCommentary}${lengthConstraint}
133158
134159
Technical Content Title: ${fileName}
135160
@@ -180,46 +205,107 @@ function createWavHeader(pcmDataLength) {
180205
return header;
181206
}
182207

208+
/**
209+
* Retry a function with exponential backoff for transient errors
210+
* @param {Function} fn - Async function to retry
211+
* @param {number} maxAttempts - Maximum retry attempts (default: 4)
212+
* @returns {Promise} - Result of successful function call
213+
*/
214+
async function retryWithBackoff(fn, maxAttempts = 4) {
215+
let lastError;
216+
217+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
218+
try {
219+
return await fn();
220+
} catch (error) {
221+
lastError = error;
222+
223+
// Check if error is retryable (transient network/API errors)
224+
const isRetryable =
225+
error.message?.includes('fetch failed') ||
226+
error.message?.includes('ECONNRESET') ||
227+
error.message?.includes('ETIMEDOUT') ||
228+
error.message?.includes('ENOTFOUND') ||
229+
error.status === 429 || // Rate limit
230+
error.status === 500 || // Internal server error
231+
error.status === 503 || // Service unavailable
232+
error.status === 504; // Gateway timeout
233+
234+
// Don't retry permanent errors (auth, permission, not found)
235+
const isPermanent =
236+
error.status === 400 || // Bad request
237+
error.status === 401 || // Unauthorized
238+
error.status === 403 || // Forbidden
239+
error.status === 404; // Not found
240+
241+
if (isPermanent) {
242+
throw error; // Fail fast on permanent errors
243+
}
244+
245+
if (!isRetryable || attempt === maxAttempts - 1) {
246+
throw lastError; // Last attempt or non-retryable error
247+
}
248+
249+
// Exponential backoff: 1s, 2s, 4s, 8s
250+
const delay = Math.pow(2, attempt) * 1000;
251+
console.log(` ⏳ Retry ${attempt + 1}/${maxAttempts} after ${delay}ms (${error.message})`);
252+
await new Promise(resolve => setTimeout(resolve, delay));
253+
}
254+
}
255+
256+
throw lastError;
257+
}
258+
183259
/**
184260
* Convert dialogue text to audio using multi-speaker TTS
185261
*/
186262
async function generateAudio(dialogue, outputPath) {
187263
console.log(` 🎙️ Synthesizing audio...`);
188264

189-
const model = genAI.getGenerativeModel({
190-
model: TTS_MODEL,
191-
});
192-
193-
const result = await model.generateContent({
194-
contents: [{
195-
role: 'user',
196-
parts: [{ text: dialogue }]
197-
}],
198-
generationConfig: {
199-
responseModalities: ['AUDIO'],
200-
speechConfig: {
201-
multiSpeakerVoiceConfig: {
202-
speakerVoiceConfigs: [
203-
{
204-
speaker: 'Alex',
205-
voiceConfig: {
206-
prebuiltVoiceConfig: {
207-
voiceName: 'Kore' // Firm, professional voice
265+
// Wrap TTS API call with retry logic
266+
const result = await retryWithBackoff(async () => {
267+
const model = genAI.getGenerativeModel({
268+
model: TTS_MODEL,
269+
});
270+
271+
const response = await model.generateContent({
272+
contents: [{
273+
role: 'user',
274+
parts: [{ text: dialogue }]
275+
}],
276+
generationConfig: {
277+
responseModalities: ['AUDIO'],
278+
speechConfig: {
279+
multiSpeakerVoiceConfig: {
280+
speakerVoiceConfigs: [
281+
{
282+
speaker: 'Alex',
283+
voiceConfig: {
284+
prebuiltVoiceConfig: {
285+
voiceName: 'Kore' // Firm, professional voice
286+
}
208287
}
209-
}
210-
},
211-
{
212-
speaker: 'Sam',
213-
voiceConfig: {
214-
prebuiltVoiceConfig: {
215-
voiceName: 'Puck' // Upbeat, curious voice
288+
},
289+
{
290+
speaker: 'Sam',
291+
voiceConfig: {
292+
prebuiltVoiceConfig: {
293+
voiceName: 'Charon' // Neutral, professional voice
294+
}
216295
}
217296
}
218-
}
219-
]
297+
]
298+
}
220299
}
221300
}
301+
});
302+
303+
// Guarded response parsing - validate structure before accessing
304+
if (!response?.response?.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data) {
305+
throw new Error('TTS API returned malformed response - missing inlineData.data');
222306
}
307+
308+
return response;
223309
});
224310

225311
const audioData = result.response.candidates[0].content.parts[0].inlineData;
@@ -238,6 +324,8 @@ async function generateAudio(dialogue, outputPath) {
238324

239325
writeFileSync(outputPath, wavBuffer);
240326

327+
console.log(` ✅ Audio synthesized successfully`);
328+
241329
return {
242330
size: wavBuffer.length,
243331
format: 'audio/wav'
@@ -263,8 +351,46 @@ async function processFile(filePath, manifest) {
263351
return;
264352
}
265353

266-
// Generate dialogue
267-
const dialogue = await generateDialogue(content, fileName);
354+
// Generate dialogue with retry logic for token limit
355+
const TOKEN_LIMIT = 8192;
356+
const TOKEN_SAFETY_MARGIN = 500;
357+
const MAX_TOKENS = TOKEN_LIMIT - TOKEN_SAFETY_MARGIN;
358+
359+
const retryLimits = [
360+
{ maxTokens: null, attempt: 0 }, // First try: soft guideline (7,000 tokens)
361+
{ maxTokens: 7000, attempt: 1 }, // Retry 1: 7,000 tokens
362+
{ maxTokens: 6000, attempt: 2 }, // Retry 2: 6,000 tokens
363+
{ maxTokens: 5500, attempt: 3 }, // Retry 3: 5,500 tokens (last resort)
364+
];
365+
366+
let dialogue;
367+
let tokenCount;
368+
let attemptSucceeded = false;
369+
370+
for (const { maxTokens, attempt } of retryLimits) {
371+
if (attempt > 0) {
372+
console.log(` 🔄 Retry ${attempt}: Regenerating with ${maxTokens} token limit...`);
373+
}
374+
375+
dialogue = await generateDialogue(content, fileName, maxTokens);
376+
tokenCount = await countDialogueTokens(dialogue);
377+
378+
console.log(` 📊 Token count: ${tokenCount} / ${MAX_TOKENS} (${((tokenCount / MAX_TOKENS) * 100).toFixed(1)}%)`);
379+
380+
if (tokenCount <= MAX_TOKENS) {
381+
attemptSucceeded = true;
382+
if (attempt > 0) {
383+
console.log(` ✅ Success on attempt ${attempt + 1}`);
384+
}
385+
break;
386+
} else {
387+
console.log(` ⚠️ Exceeds limit by ${tokenCount - MAX_TOKENS} tokens`);
388+
}
389+
}
390+
391+
if (!attemptSucceeded) {
392+
throw new Error(`Failed to generate dialogue within token limit after ${retryLimits.length} attempts. Final count: ${tokenCount} tokens`);
393+
}
268394

269395
// Determine output path
270396
const outputFileName = `${fileName}.wav`;
@@ -279,18 +405,53 @@ async function processFile(filePath, manifest) {
279405
audioUrl,
280406
size: audioInfo.size,
281407
format: audioInfo.format,
408+
tokenCount: tokenCount,
282409
generatedAt: new Date().toISOString()
283410
};
284411

285412
console.log(` ✅ Generated: ${audioUrl}`);
286-
console.log(` 📊 Size: ${(audioInfo.size / 1024 / 1024).toFixed(2)} MB`);
413+
console.log(` 📊 Audio size: ${(audioInfo.size / 1024 / 1024).toFixed(2)} MB`);
414+
console.log(` 📊 Tokens: ${tokenCount}`);
287415

288416
} catch (error) {
289417
console.error(` ❌ Error: ${error.message}`);
290418
console.error(` Skipping this file and continuing...`);
291419
}
292420
}
293421

422+
/**
423+
* Process files with concurrency limit
424+
* @param {string[]} files - Array of file paths to process
425+
* @param {Object} manifest - Manifest object to update
426+
* @param {number} concurrency - Max concurrent operations (default: 3)
427+
* @returns {Promise<{processed: number, failed: number}>}
428+
*/
429+
async function processFilesWithConcurrency(files, manifest, concurrency = 3) {
430+
const results = { processed: 0, failed: 0 };
431+
432+
// Process files in batches of `concurrency`
433+
for (let i = 0; i < files.length; i += concurrency) {
434+
const batch = files.slice(i, i + concurrency);
435+
436+
console.log(`\n🔄 Processing batch ${Math.floor(i / concurrency) + 1}/${Math.ceil(files.length / concurrency)} (${batch.length} files concurrently)...`);
437+
438+
// Process batch concurrently
439+
await Promise.all(
440+
batch.map(async (file) => {
441+
try {
442+
await processFile(file, manifest);
443+
results.processed++;
444+
} catch (error) {
445+
console.error(`\n❌ Failed to process ${file}:`, error.message);
446+
results.failed++;
447+
}
448+
})
449+
);
450+
}
451+
452+
return results;
453+
}
454+
294455
/**
295456
* Main execution
296457
*/
@@ -312,25 +473,8 @@ async function main() {
312473
console.log(`📋 Loaded existing manifest with ${Object.keys(manifest).length} entries\n`);
313474
}
314475

315-
// Process files sequentially (to avoid rate limits)
316-
let processed = 0;
317-
let skipped = 0;
318-
let failed = 0;
319-
320-
for (const file of files) {
321-
try {
322-
await processFile(file, manifest);
323-
processed++;
324-
325-
// Rate limiting - wait 2 seconds between files
326-
if (processed < files.length) {
327-
await new Promise(resolve => setTimeout(resolve, 2000));
328-
}
329-
} catch (error) {
330-
console.error(`Failed to process ${file}:`, error.message);
331-
failed++;
332-
}
333-
}
476+
// Process files with concurrency limit of 3
477+
const results = await processFilesWithConcurrency(files, manifest, 3);
334478

335479
// Save manifest
336480
mkdirSync(dirname(MANIFEST_PATH), { recursive: true });
@@ -339,9 +483,9 @@ async function main() {
339483
console.log('\n' + '='.repeat(60));
340484
console.log('✨ Podcast generation complete!\n');
341485
console.log(`📊 Summary:`);
342-
console.log(` ✅ Processed: ${processed}`);
343-
console.log(` ⚠️ Skipped: ${skipped}`);
344-
console.log(` ❌ Failed: ${failed}`);
486+
console.log(` ✅ Processed: ${results.processed}`);
487+
console.log(` ❌ Failed: ${results.failed}`);
488+
console.log(` 📁 Total files: ${files.length}`);
345489
console.log(`\n📋 Manifest saved to: ${MANIFEST_PATH}`);
346490
console.log('='.repeat(60));
347491
}

0 commit comments

Comments
 (0)