diff --git a/api/db/migrations/0007_rich_trauma.sql b/api/db/migrations/0007_rich_trauma.sql new file mode 100644 index 00000000..592889af --- /dev/null +++ b/api/db/migrations/0007_rich_trauma.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS "ai_prompts" ( + "hash" text PRIMARY KEY NOT NULL, + "prompt" text NOT NULL, + "response" text NOT NULL, + "record_imported_at" text DEFAULT CURRENT_TIMESTAMP NOT NULL +); diff --git a/api/db/migrations/meta/0007_snapshot.json b/api/db/migrations/meta/0007_snapshot.json new file mode 100644 index 00000000..57c701ed --- /dev/null +++ b/api/db/migrations/meta/0007_snapshot.json @@ -0,0 +1,516 @@ +{ + "id": "47729acd-56ae-4a65-931b-5a22f8d471dd", + "prevId": "54b5fefc-b314-4ce9-ad6e-be5314d2c4f4", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.ai_prompts": { + "name": "ai_prompts", + "schema": "", + "columns": { + "hash": { + "name": "hash", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "prompt": { + "name": "prompt", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "response": { + "name": "response", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.contributions": { + "name": "contributions", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + }, + "title_ar": { + "name": "title_ar", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "title_en": { + "name": "title_en", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "updated_at": { + "name": "updated_at", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "type": { + "name": "type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "run_id": { + "name": "run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "activity_count": { + "name": "activity_count", + "type": "integer", + "primaryKey": false, + "notNull": true + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "contributor_id": { + "name": "contributor_id", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "contributions_repository_id_repositories_id_fk": { + "name": "contributions_repository_id_repositories_id_fk", + "tableFrom": "contributions", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "contributions_contributor_id_contributors_id_fk": { + "name": "contributions_contributor_id_contributors_id_fk", + "tableFrom": "contributions", + "tableTo": "contributors", + "columnsFrom": [ + "contributor_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "contributions_url_unique": { + "name": "contributions_url_unique", + "nullsNotDistinct": false, + "columns": [ + "url" + ] + } + } + }, + "public.contributor_repository_relation": { + "name": "contributor_repository_relation", + "schema": "", + "columns": { + "contributor_id": { + "name": "contributor_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "repository_id": { + "name": "repository_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + }, + "run_id": { + "name": "run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "score": { + "name": "score", + "type": "integer", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "contributor_repository_relation_contributor_id_contributors_id_fk": { + "name": "contributor_repository_relation_contributor_id_contributors_id_fk", + "tableFrom": "contributor_repository_relation", + "tableTo": "contributors", + "columnsFrom": [ + "contributor_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "contributor_repository_relation_repository_id_repositories_id_fk": { + "name": "contributor_repository_relation_repository_id_repositories_id_fk", + "tableFrom": "contributor_repository_relation", + "tableTo": "repositories", + "columnsFrom": [ + "repository_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "contributor_repository_relation_pk": { + "name": "contributor_repository_relation_pk", + "columns": [ + "contributor_id", + "repository_id" + ] + } + }, + "uniqueConstraints": {} + }, + "public.contributors": { + "name": "contributors", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + }, + "run_id": { + "name": "run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name_ar": { + "name": "name_ar", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name_en": { + "name": "name_en", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "username": { + "name": "username", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "url": { + "name": "url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "avatar_url": { + "name": "avatar_url", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "contributors_url_unique": { + "name": "contributors_url_unique", + "nullsNotDistinct": false, + "columns": [ + "url" + ] + } + } + }, + "public.project_tag_relation": { + "name": "project_tag_relation", + "schema": "", + "columns": { + "project_id": { + "name": "project_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "tag_id": { + "name": "tag_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + }, + "run_id": { + "name": "run_id", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "project_tag_relation_project_id_projects_id_fk": { + "name": "project_tag_relation_project_id_projects_id_fk", + "tableFrom": "project_tag_relation", + "tableTo": "projects", + "columnsFrom": [ + "project_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "project_tag_relation_pk": { + "name": "project_tag_relation_pk", + "columns": [ + "project_id", + "tag_id" + ] + } + }, + "uniqueConstraints": {} + }, + "public.projects": { + "name": "projects", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + }, + "name_ar": { + "name": "name_ar", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name_en": { + "name": "name_en", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "run_id": { + "name": "run_id", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + }, + "public.repositories": { + "name": "repositories", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + }, + "provider": { + "name": "provider", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "owner": { + "name": "owner", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "run_id": { + "name": "run_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "project_id": { + "name": "project_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "stars": { + "name": "stars", + "type": "integer", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": { + "repositories_project_id_projects_id_fk": { + "name": "repositories_project_id_projects_id_fk", + "tableFrom": "repositories", + "tableTo": "projects", + "columnsFrom": [ + "project_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "repositories_provider_owner_name_unique": { + "name": "repositories_provider_owner_name_unique", + "nullsNotDistinct": false, + "columns": [ + "provider", + "owner", + "name" + ] + } + } + }, + "public.tags": { + "name": "tags", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "text", + "primaryKey": true, + "notNull": true + }, + "record_imported_at": { + "name": "record_imported_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "default": "CURRENT_TIMESTAMP" + }, + "run_id": { + "name": "run_id", + "type": "text", + "primaryKey": false, + "notNull": true + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {} + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/api/db/migrations/meta/_journal.json b/api/db/migrations/meta/_journal.json index ba1dc9c9..f8b7c340 100644 --- a/api/db/migrations/meta/_journal.json +++ b/api/db/migrations/meta/_journal.json @@ -50,6 +50,13 @@ "when": 1735688289807, "tag": "0006_amazing_stark_industries", "breakpoints": true + }, + { + "idx": 7, + "version": "7", + "when": 1735852741252, + "tag": "0007_rich_trauma", + "breakpoints": true } ] } \ No newline at end of file diff --git a/api/src/_utils/hash.ts b/api/src/_utils/hash.ts new file mode 100644 index 00000000..8bca0e21 --- /dev/null +++ b/api/src/_utils/hash.ts @@ -0,0 +1,6 @@ +import { createHash } from "crypto"; + +export function generateShortHash(input: string): string { + const hash = createHash("sha256").update(input).digest("hex"); + return hash.substring(0, 16); // First 16 chars (64 bits) should be sufficient +} diff --git a/api/src/ai/repository.ts b/api/src/ai/repository.ts new file mode 100644 index 00000000..6b060868 --- /dev/null +++ b/api/src/ai/repository.ts @@ -0,0 +1,44 @@ +import { PostgresService } from "src/postgres/service"; +import { Service } from "typedi"; +import { aiPromptsTable, AIPromptRow } from "./table"; +import { sql } from "drizzle-orm"; +import { camelCaseObject } from "src/_utils/case"; +import { generateShortHash } from "src/_utils/hash"; + +@Service() +export class AiPromptRepository { + constructor(private readonly postgresService: PostgresService) {} + + public async insert(promptObj: unknown, responseObj: unknown) { + const prompt = JSON.stringify(promptObj); + const hash = generateShortHash(prompt); + const response = JSON.stringify(responseObj); + + return await this.postgresService.db + .insert(aiPromptsTable) + .values({ hash, prompt, response }) + .returning({ hash: aiPromptsTable.hash }); + } + + public async findPromptResponse( + promptObj: unknown, + ): Promise | undefined> { + const prompt = JSON.stringify(promptObj); + const shortHash = generateShortHash(prompt); + + const statement = sql` + SELECT + response + FROM + ai_prompts + WHERE + hash = ${shortHash} + `; + + const raw = await this.postgresService.db.execute(statement); + const entries = Array.from(raw); + const entry = entries[0]; + const camelCased = camelCaseObject(entry) as Pick; + return camelCased; + } +} diff --git a/api/src/ai/service.ts b/api/src/ai/service.ts index 5ac6ce97..c1a720bf 100644 --- a/api/src/ai/service.ts +++ b/api/src/ai/service.ts @@ -5,6 +5,8 @@ import { targetConstructorToSchema } from "class-validator-jsonschema"; import { FetchService } from "src/fetch/service"; import { ClassConstructor, plainToClass } from "class-transformer"; import { validateSync } from "class-validator"; +import { AiPromptRepository } from "./repository"; +import { captureException } from "@sentry/node"; type AIChat = { role: "user" | "system"; content: string }; @@ -18,6 +20,7 @@ export class AIService { private readonly configService: ConfigService, private readonly logger: LoggerService, private readonly fetchService: FetchService, + private readonly aiPromptRepository: AiPromptRepository, ) {} public query = async ( @@ -34,18 +37,22 @@ export class AIService { ...payload, ]; + const cachedResponse = await this.aiPromptRepository.findPromptResponse( + payloadWithValidationPrompt, + ); + if (cachedResponse) return JSON.parse(cachedResponse.response); + + const body = { model: "gpt-4o", messages: payloadWithValidationPrompt }; + + this.logger.info({ message: "Cached response not found, querying AI..." }); + // todo-zm: change to captureEvent + captureException("AI Query", { tags: { type: "CRON" }, extra: { body } }); + const { OPENAI_KEY } = this.configService.env(); - // todo: cache response const res = await this.fetchService.post( "https://api.openai.com/v1/chat/completions", - { - headers: { Authorization: `Bearer ${OPENAI_KEY}` }, - body: { - model: "gpt-4o", - messages: payloadWithValidationPrompt, - }, - }, + { headers: { Authorization: `Bearer ${OPENAI_KEY}` }, body }, ); const chatResponseUnchecked = JSON.parse(res.choices[0].message.content) as T; @@ -61,6 +68,8 @@ export class AIService { )}`, ); + await this.aiPromptRepository.insert(payloadWithValidationPrompt, output); + return output; }; } diff --git a/api/src/ai/table.ts b/api/src/ai/table.ts new file mode 100644 index 00000000..dc46016f --- /dev/null +++ b/api/src/ai/table.ts @@ -0,0 +1,16 @@ +import { AiPromptEntity } from "@dzcode.io/models/dist/ai-prompt"; +import { sql } from "drizzle-orm"; +import { pgTable, text } from "drizzle-orm/pg-core"; + +export const aiPromptsTable = pgTable("ai_prompts", { + hash: text("hash").notNull().primaryKey(), + prompt: text("prompt").notNull(), + response: text("response").notNull(), + recordImportedAt: text("record_imported_at") + .notNull() + .default(sql`CURRENT_TIMESTAMP`), +}); + +aiPromptsTable.$inferSelect satisfies AiPromptEntity; + +export type AIPromptRow = typeof aiPromptsTable.$inferInsert; diff --git a/api/src/digest/cron.ts b/api/src/digest/cron.ts index 4941e120..f5bedcbd 100644 --- a/api/src/digest/cron.ts +++ b/api/src/digest/cron.ts @@ -84,10 +84,12 @@ export class DigestCron { // or uncomment to skip the cron // if (Math.random()) return; - const projectTitleSystemPrompt = `user will give you an open-source project name, and you will translate it to Arabic.`; + const projectTitleSystemPrompt = `user will give you an open-source project name, and you will translate it to Arabic. +it may contain non-translatable parts like acronyms, keep them as is.`; const contributorNameSystemPrompt = `user will give you an open-source contributor name, and you will translate it to Arabic. if the name contain both english and arabic only keep the parts related to the language.`; - const issueTitleSystemPrompt = `user will give you an open-source issue/PR title, and you will translate it to Arabic.`; + const issueTitleSystemPrompt = `user will give you an open-source issue/PR title, and you will translate it to Arabic. +it may contain non-translatable parts like acronyms, keep them as is.`; for (const project of projectsFromDataFolder) { let name_en = project.name; diff --git a/packages/models/src/ai-prompt/index.ts b/packages/models/src/ai-prompt/index.ts new file mode 100644 index 00000000..c42b1b39 --- /dev/null +++ b/packages/models/src/ai-prompt/index.ts @@ -0,0 +1,5 @@ +export type AiPromptEntity = { + hash: string; + prompt: string; + response: string; +};