Skip to content

Commit 70a4fe8

Browse files
committed
feat(evoai): enhance media message handling and transcription capabilities
- Added support for audio message detection and transcription using OpenAI's Whisper API. - Integrated media downloading for both audio and image messages, with appropriate error handling. - Updated logging to redact sensitive information from payloads. - Modified existing methods to accommodate the new message structure, ensuring seamless integration with EvoAI services.
1 parent 7112475 commit 70a4fe8

File tree

2 files changed

+98
-34
lines changed

2 files changed

+98
-34
lines changed

src/api/integrations/chatbot/evoai/controllers/evoai.controller.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,7 @@ export class EvoaiController extends ChatbotController implements ChatbotControl
848848
},
849849
debouncedContent,
850850
msg?.pushName,
851+
msg,
851852
);
852853
});
853854
} else {
@@ -872,6 +873,7 @@ export class EvoaiController extends ChatbotController implements ChatbotControl
872873
},
873874
content,
874875
msg?.pushName,
876+
msg,
875877
);
876878
}
877879

src/api/integrations/chatbot/evoai/services/evoai.service.ts

Lines changed: 96 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@ import { InstanceDto } from '@api/dto/instance.dto';
33
import { PrismaRepository } from '@api/repository/repository.service';
44
import { WAMonitoringService } from '@api/services/monitor.service';
55
import { Integration } from '@api/types/wa.types';
6+
import { ConfigService, Language } from '@config/env.config';
67
import { Logger } from '@config/logger.config';
78
import { Evoai, EvoaiSetting, IntegrationSession } from '@prisma/client';
89
import { sendTelemetry } from '@utils/sendTelemetry';
910
import axios from 'axios';
10-
import path from 'path';
11-
import { Readable } from 'stream';
11+
import { downloadMediaMessage } from 'baileys';
12+
import FormData from 'form-data';
1213
import { v4 as uuidv4 } from 'uuid';
1314

1415
export class EvoaiService {
1516
constructor(
1617
private readonly waMonitor: WAMonitoringService,
1718
private readonly prismaRepository: PrismaRepository,
19+
private readonly configService: ConfigService,
1820
) {}
1921

2022
private readonly logger = new Logger('EvoaiService');
@@ -45,12 +47,34 @@ export class EvoaiService {
4547
return content.includes('imageMessage');
4648
}
4749

48-
private isJSON(str: string): boolean {
50+
private isAudioMessage(content: string) {
51+
return content.includes('audioMessage');
52+
}
53+
54+
private async speechToText(audioBuffer: Buffer): Promise<string | null> {
4955
try {
50-
JSON.parse(str);
51-
return true;
52-
} catch (e) {
53-
return false;
56+
const apiKey = this.configService.get<any>('OPENAI')?.API_KEY;
57+
if (!apiKey) {
58+
this.logger.error('[EvoAI] No OpenAI API key set for Whisper transcription');
59+
return null;
60+
}
61+
const lang = this.configService.get<Language>('LANGUAGE').includes('pt')
62+
? 'pt'
63+
: this.configService.get<Language>('LANGUAGE');
64+
const formData = new FormData();
65+
formData.append('file', audioBuffer, 'audio.ogg');
66+
formData.append('model', 'whisper-1');
67+
formData.append('language', lang);
68+
const response = await axios.post('https://api.openai.com/v1/audio/transcriptions', formData, {
69+
headers: {
70+
...formData.getHeaders(),
71+
Authorization: `Bearer ${apiKey}`,
72+
},
73+
});
74+
return response?.data?.text || null;
75+
} catch (err) {
76+
this.logger.error(`[EvoAI] Whisper transcription failed: ${err}`);
77+
return null;
5478
}
5579
}
5680

@@ -62,6 +86,7 @@ export class EvoaiService {
6286
remoteJid: string,
6387
pushName: string,
6488
content: string,
89+
msg?: any,
6590
) {
6691
try {
6792
const endpoint: string = evoai.agentUrl;
@@ -76,27 +101,52 @@ export class EvoaiService {
76101
},
77102
];
78103

79-
// If content indicates an image/file, add as a file part
80-
if (this.isImageMessage(content)) {
81-
const contentSplit = content.split('|');
82-
const fileUrl = contentSplit[1].split('?')[0];
83-
const textPart = contentSplit[2] || content;
84-
parts[0].text = textPart;
85-
86-
// Try to fetch the file and encode as base64
87-
try {
88-
const fileResponse = await axios.get(fileUrl, { responseType: 'arraybuffer' });
89-
const fileContent = Buffer.from(fileResponse.data).toString('base64');
90-
const fileName = path.basename(fileUrl);
91-
parts.push({
92-
type: 'file',
93-
file: {
94-
name: fileName,
95-
bytes: fileContent,
96-
},
97-
});
98-
} catch (fileErr) {
99-
this.logger.error(`Failed to fetch or encode file for EvoAI: ${fileErr}`);
104+
// If content indicates an image/file, fetch and encode as base64, then send as a file part
105+
if ((this.isImageMessage(content) || this.isAudioMessage(content)) && msg) {
106+
const isImage = this.isImageMessage(content);
107+
const isAudio = this.isAudioMessage(content);
108+
this.logger.debug(`[EvoAI] Media message detected: ${content}`);
109+
110+
let transcribedText = null;
111+
if (isAudio) {
112+
try {
113+
this.logger.debug(`[EvoAI] Downloading audio for Whisper transcription`);
114+
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
115+
transcribedText = await this.speechToText(mediaBuffer);
116+
if (transcribedText) {
117+
parts[0].text = transcribedText;
118+
} else {
119+
parts[0].text = '[Audio message could not be transcribed]';
120+
}
121+
} catch (err) {
122+
this.logger.error(`[EvoAI] Failed to transcribe audio: ${err}`);
123+
parts[0].text = '[Audio message could not be transcribed]';
124+
}
125+
} else if (isImage) {
126+
const contentSplit = content.split('|');
127+
parts[0].text = contentSplit[2] || content;
128+
let fileContent = null,
129+
fileName = null,
130+
mimeType = null;
131+
try {
132+
this.logger.debug(
133+
`[EvoAI] Fetching image using downloadMediaMessage with msg.key: ${JSON.stringify(msg.key)}`,
134+
);
135+
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
136+
fileContent = Buffer.from(mediaBuffer).toString('base64');
137+
fileName = contentSplit[2] || `${msg.key.id}.jpg`;
138+
mimeType = 'image/jpeg';
139+
parts.push({
140+
type: 'file',
141+
file: {
142+
name: fileName,
143+
bytes: fileContent,
144+
mimeType: mimeType,
145+
},
146+
});
147+
} catch (fileErr) {
148+
this.logger.error(`[EvoAI] Failed to fetch or encode image for EvoAI: ${fileErr}`);
149+
}
100150
}
101151
}
102152

@@ -115,7 +165,17 @@ export class EvoaiService {
115165
};
116166

117167
this.logger.debug(`[EvoAI] Sending request to: ${endpoint}`);
118-
this.logger.debug(`[EvoAI] Payload: ${JSON.stringify(payload)}`);
168+
// Redact base64 file bytes from payload log
169+
const redactedPayload = JSON.parse(JSON.stringify(payload));
170+
if (redactedPayload?.params?.message?.parts) {
171+
redactedPayload.params.message.parts = redactedPayload.params.message.parts.map((part) => {
172+
if (part.type === 'file' && part.file && part.file.bytes) {
173+
return { ...part, file: { ...part.file, bytes: '[base64 omitted]' } };
174+
}
175+
return part;
176+
});
177+
}
178+
this.logger.debug(`[EvoAI] Payload: ${JSON.stringify(redactedPayload)}`);
119179

120180
if (instance.integration === Integration.WHATSAPP_BAILEYS) {
121181
await instance.client.presenceSubscribe(remoteJid);
@@ -129,7 +189,7 @@ export class EvoaiService {
129189
},
130190
});
131191

132-
this.logger.debug(`[EvoAI] Response: ${JSON.stringify(response.data)}`);
192+
this.logger.debug(`[EvoAI] Response: ${JSON.stringify(response.data.status)}`);
133193

134194
if (instance.integration === Integration.WHATSAPP_BAILEYS)
135195
await instance.client.sendPresenceUpdate('paused', remoteJid);
@@ -341,6 +401,7 @@ export class EvoaiService {
341401
session: IntegrationSession,
342402
content: string,
343403
pushName?: string,
404+
msg?: any,
344405
) {
345406
const data = await this.createNewSession(instance, {
346407
remoteJid,
@@ -352,7 +413,7 @@ export class EvoaiService {
352413
session = data.session;
353414
}
354415

355-
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content);
416+
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content, msg);
356417

357418
return;
358419
}
@@ -365,6 +426,7 @@ export class EvoaiService {
365426
settings: EvoaiSetting,
366427
content: string,
367428
pushName?: string,
429+
msg?: any,
368430
) {
369431
if (session && session.status !== 'opened') {
370432
return;
@@ -398,13 +460,13 @@ export class EvoaiService {
398460
});
399461
}
400462

401-
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName);
463+
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName, msg);
402464
return;
403465
}
404466
}
405467

406468
if (!session) {
407-
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName);
469+
await this.initNewSession(instance, remoteJid, evoai, settings, session, content, pushName, msg);
408470
return;
409471
}
410472

@@ -455,7 +517,7 @@ export class EvoaiService {
455517
return;
456518
}
457519

458-
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content);
520+
await this.sendMessageToBot(instance, session, settings, evoai, remoteJid, pushName, content, msg);
459521

460522
return;
461523
}

0 commit comments

Comments
 (0)