From aaf564a5b3194e05be0d56219710bfb11746579a Mon Sep 17 00:00:00 2001 From: Chris Sdogkos Date: Sat, 31 Jan 2026 22:52:22 +0200 Subject: [PATCH 1/7] xkcd: create XKCD comic generator Use RAG with ChatGPT to store all XKCD comics and display the correct one based on the chat history. There is also the possibility to specify your own XKCD if you wish. Signed-off-by: Chris Sdogkos --- .../togetherjava/tjbot/features/Features.java | 2 + .../features/chatgpt/ChatGptService.java | 104 +++++++- .../tjbot/features/xkcd/XkcdCommand.java | 252 ++++++++++++++++++ .../tjbot/features/xkcd/XkcdPost.java | 8 + .../tjbot/features/xkcd/XkcdRetriever.java | 181 +++++++++++++ .../tjbot/features/xkcd/package-info.java | 7 + 6 files changed, 553 insertions(+), 1 deletion(-) create mode 100644 application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java create mode 100644 application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdPost.java create mode 100644 application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java create mode 100644 application/src/main/java/org/togetherjava/tjbot/features/xkcd/package-info.java diff --git a/application/src/main/java/org/togetherjava/tjbot/features/Features.java b/application/src/main/java/org/togetherjava/tjbot/features/Features.java index 6febd433b6..81646cd62c 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/Features.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/Features.java @@ -81,6 +81,7 @@ import org.togetherjava.tjbot.features.tophelper.TopHelpersPurgeMessagesRoutine; import org.togetherjava.tjbot.features.tophelper.TopHelpersService; import org.togetherjava.tjbot.features.voicechat.DynamicVoiceChat; +import org.togetherjava.tjbot.features.xkcd.XkcdCommand; import java.util.ArrayList; import java.util.Collection; @@ -213,6 +214,7 @@ public static Collection createFeatures(JDA jda, Database database, Con features.add(new JShellCommand(jshellEval)); features.add(new MessageCommand()); features.add(new RewriteCommand(chatGptService)); + features.add(new XkcdCommand(chatGptService)); FeatureBlacklist> blacklist = blacklistConfig.normal(); return blacklist.filterStream(features.stream(), Object::getClass).toList(); diff --git a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java index 08ddbee729..d3e7328e46 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java @@ -2,9 +2,16 @@ import com.openai.client.OpenAIClient; import com.openai.client.okhttp.OpenAIOkHttpClient; +import com.openai.models.files.FileCreateParams; +import com.openai.models.files.FileObject; +import com.openai.models.files.FilePurpose; import com.openai.models.responses.Response; import com.openai.models.responses.ResponseCreateParams; import com.openai.models.responses.ResponseOutputText; +import com.openai.models.responses.Tool; +import com.openai.models.responses.WebSearchTool; +import com.openai.models.vectorstores.VectorStore; +import com.openai.models.vectorstores.VectorStoreCreateParams; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -12,7 +19,10 @@ import javax.annotation.Nullable; +import java.io.File; +import java.nio.file.Path; import java.time.Duration; +import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -23,6 +33,8 @@ public class ChatGptService { private static final Logger logger = LoggerFactory.getLogger(ChatGptService.class); private static final Duration TIMEOUT = Duration.ofSeconds(90); + private static final String VECTOR_STORE_XKCD = "xkcd-comics"; + /** The maximum number of tokens allowed for the generated answer. */ private static final int MAX_TOKENS = 1000; @@ -88,14 +100,103 @@ public Optional askRaw(String inputPrompt, ChatGptModel chatModel) { return sendPrompt(inputPrompt, chatModel); } + /** + * Sends a prompt to the ChatGPT API with web capabilities and returns the response. + * + * @param prompt The prompt to send to ChatGPT. + * @param chatModel The AI model to use for this request. + * @return response from ChatGPT as a String. + */ + public Optional sendWebPrompt(String prompt, ChatGptModel chatModel) { + Tool webSearchTool = Tool + .ofWebSearch(WebSearchTool.builder().type(WebSearchTool.Type.WEB_SEARCH).build()); + + return sendPrompt(prompt, chatModel, List.of(webSearchTool)); + } + + /** + * Sends a prompt to the ChatGPT API and returns the response. + * + * @param prompt The prompt to send to ChatGPT. + * @param chatModel The AI model to use for this request. + * @return response from ChatGPT as a String. + */ + public Optional sendPrompt(String prompt, ChatGptModel chatModel) { + return sendPrompt(prompt, chatModel, List.of()); + } + + public Optional getUploadedFileId(String filePath) { + return openAIClient.files() + .list() + .items() + .stream() + .filter(fileObj -> fileObj.filename().equalsIgnoreCase(filePath)) + .map(FileObject::id) + .findFirst(); + } + + public Optional uploadFileIfNotExists(Path filePath, FilePurpose purpose) { + if (isDisabled) { + logger.warn("ChatGPT file upload attempted but service is disabled"); + return Optional.empty(); + } + + File file = filePath.toFile(); + if (!file.exists()) { + logger.warn("Could not find file '{}' to upload to ChatGPT", filePath); + return Optional.empty(); + } + + if (getUploadedFileId(filePath.toString()).isPresent()) { + logger.warn("File '{}' already exists.", filePath); + return Optional.empty(); + } + + FileCreateParams fileCreateParams = + FileCreateParams.builder().file(filePath).purpose(purpose).build(); + + FileObject fileObj = openAIClient.files().create(fileCreateParams); + String id = fileObj.id(); + + logger.info("Uploaded file to ChatGPT with ID {}", id); + return Optional.of(id); + } + + public String createOrGetXkcdVectorStore(String fileId) { + List vectorStores = openAIClient.vectorStores() + .list() + .items() + .stream() + .filter(vectorStore -> vectorStore.name().equalsIgnoreCase(VECTOR_STORE_XKCD)) + .toList(); + Optional vectorStore = vectorStores.stream().findFirst(); + + if (vectorStore.isPresent()) { + return vectorStore.get().id(); + } + + VectorStoreCreateParams params = VectorStoreCreateParams.builder() + .name(VECTOR_STORE_XKCD) + .fileIds(List.of(fileId)) + .build(); + + VectorStore newVectorStore = openAIClient.vectorStores().create(params); + String vectorStoreId = newVectorStore.id(); + + logger.info("Created vector store {} with XKCD data", vectorStoreId); + + return vectorStoreId; + } + /** * Sends a prompt to the ChatGPT API and returns the response. * * @param prompt The prompt to send to ChatGPT. * @param chatModel The AI model to use for this request. + * @param tools The list of OpenAPI tools to enhance the prompt's answers. * @return response from ChatGPT as a String. */ - private Optional sendPrompt(String prompt, ChatGptModel chatModel) { + public Optional sendPrompt(String prompt, ChatGptModel chatModel, List tools) { if (isDisabled) { logger.warn("ChatGPT request attempted but service is disabled"); return Optional.empty(); @@ -107,6 +208,7 @@ private Optional sendPrompt(String prompt, ChatGptModel chatModel) { ResponseCreateParams params = ResponseCreateParams.builder() .model(chatModel.toChatModel()) .input(prompt) + .tools(tools) .maxOutputTokens(MAX_TOKENS) .build(); diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java new file mode 100644 index 0000000000..d03cf83e56 --- /dev/null +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java @@ -0,0 +1,252 @@ +package org.togetherjava.tjbot.features.xkcd; + +import com.openai.models.responses.FileSearchTool; +import com.openai.models.responses.Tool; +import net.dv8tion.jda.api.EmbedBuilder; +import net.dv8tion.jda.api.entities.Message; +import net.dv8tion.jda.api.entities.MessageEmbed; +import net.dv8tion.jda.api.entities.channel.unions.MessageChannelUnion; +import net.dv8tion.jda.api.events.interaction.command.SlashCommandInteractionEvent; +import net.dv8tion.jda.api.interactions.commands.OptionMapping; +import net.dv8tion.jda.api.interactions.commands.OptionType; +import net.dv8tion.jda.api.interactions.commands.build.OptionData; +import net.dv8tion.jda.api.interactions.commands.build.SubcommandData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.togetherjava.tjbot.features.CommandVisibility; +import org.togetherjava.tjbot.features.SlashCommandAdapter; +import org.togetherjava.tjbot.features.chatgpt.ChatGptModel; +import org.togetherjava.tjbot.features.chatgpt.ChatGptService; + +import java.awt.*; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +/** + * Discord slash command that posts XKCD comics. + *

+ * Supports two subcommands: + *

    + *
  • {@code /xkcd relevant [amount]} - Uses ChatGPT + RAG vector store to find the most relevant + * XKCD from recent chat history (default: 100 messages, max: 100).
  • + *
  • {@code /xkcd custom } - Posts a specific XKCD comic by ID from local cache.
  • + *
+ * + * Relies on {@link XkcdRetriever} for local XKCD data and {@link ChatGptService} for AI-powered + * relevance matching via OpenAI's file search tool and vector stores. + */ +public final class XkcdCommand extends SlashCommandAdapter { + + private static final Logger logger = LoggerFactory.getLogger(XkcdCommand.class); + + public static final String COMMAND_NAME = "xkcd"; + public static final String SUBCOMMAND_RELEVANT = "relevant"; + public static final String SUBCOMMAND_CUSTOM = "custom"; + public static final String LAST_MESSAGES_AMOUNT_OPTION_NAME = "amount"; + public static final String XKCD_ID_OPTION_NAME = "id"; + public static final int MAXIMUM_MESSAGE_HISTORY = 100; + private static final ChatGptModel CHAT_GPT_MODEL = ChatGptModel.FAST; + private static final Pattern XKCD_POST_PATTERN = Pattern.compile("^\\D*(\\d+)"); + + private static final String CHATGPT_NO_ID_MESSAGE = + "ChatGPT could not respond with a XKCD post ID."; + + private final ChatGptService chatGptService; + private final XkcdRetriever xkcdRetriever; + + public XkcdCommand(ChatGptService chatGptService) { + super(COMMAND_NAME, "Post a relevant XKCD from the chat or your own", + CommandVisibility.GLOBAL); + + this.chatGptService = chatGptService; + this.xkcdRetriever = new XkcdRetriever(chatGptService); + + OptionData lastMessagesAmountOption = + new OptionData(OptionType.INTEGER, LAST_MESSAGES_AMOUNT_OPTION_NAME, + "The amount of messages to consider, starting from the most recent") + .setMinValue(0) + .setRequired(true) + .setMaxValue(MAXIMUM_MESSAGE_HISTORY); + + SubcommandData relevantSubcommand = new SubcommandData(SUBCOMMAND_RELEVANT, + "Let an LLM figure out the most relevant XKCD based on the chat history") + .addOptions(lastMessagesAmountOption); + + OptionData xkcdIdOption = new OptionData(OptionType.INTEGER, XKCD_ID_OPTION_NAME, + "The XKCD number to post to the chat") + .setMinValue(0) + .setRequired(true) + .setMaxValue(xkcdRetriever.getXkcdPosts().size()); + + SubcommandData customSubcommand = new SubcommandData(SUBCOMMAND_CUSTOM, + "Post your own XKCD regardless of the recent chat messages") + .addOptions(xkcdIdOption); + + getData().addSubcommands(relevantSubcommand, customSubcommand); + } + + @Override + public void onSlashCommand(SlashCommandInteractionEvent event) { + String subcommandName = Objects.requireNonNull(event.getSubcommandName()); + + switch (subcommandName) { + case SUBCOMMAND_RELEVANT -> handleRelevantXkcd(event); + case SUBCOMMAND_CUSTOM -> handleCustomXkcd(event); + default -> throw new IllegalArgumentException("Unknown subcommand"); + } + } + + private void handleRelevantXkcd(SlashCommandInteractionEvent event) { + Integer messagesAmount = + event.getOption(LAST_MESSAGES_AMOUNT_OPTION_NAME, OptionMapping::getAsInt); + + if (messagesAmount == null) { + messagesAmount = MAXIMUM_MESSAGE_HISTORY; + } + + if (messagesAmount <= 0 || messagesAmount > MAXIMUM_MESSAGE_HISTORY) { + return; + } + + MessageChannelUnion messageChannelUnion = event.getChannel(); + + messageChannelUnion.asTextChannel() + .getHistory() + .retrievePast(messagesAmount) + .queue(messages -> { + String discordChat = formatDiscordChatHistory(messages); + + event.deferReply().queue(); + + String xkcdComicsFileId = xkcdRetriever.getXkcdUploadedFileId(); + String xkcdVectorStore = + chatGptService.createOrGetXkcdVectorStore(xkcdComicsFileId); + FileSearchTool fileSearch = + FileSearchTool.builder().vectorStoreIds(List.of(xkcdVectorStore)).build(); + + Tool tool = Tool.ofFileSearch(fileSearch); + + Optional responseOptional = chatGptService.sendPrompt( + getChatgptRelevantPrompt(discordChat), CHAT_GPT_MODEL, List.of(tool)); + + Optional responseIdOptional = + getXkcdIdFromMessage(responseOptional.orElseThrow()); + + if (responseIdOptional.isEmpty()) { + event.getHook().setEphemeral(true).sendMessage(CHATGPT_NO_ID_MESSAGE).queue(); + return; + } + + int responseId = responseIdOptional.orElseThrow(); + + logger.debug("Response: {}", responseOptional.orElseThrow()); + + logger.debug("ChatGPT chose XKCD ID: {}", responseId); + Optional embedOptional = + constructEmbed(responseId, "Most relevant XKCD according to ChatGPT."); + + embedOptional.ifPresent(embed -> event.getHook().sendMessageEmbeds(embed).queue()); + }, error -> logger.error("Failed to retrieve the chat history in #{}", + messageChannelUnion.getName(), error)); + } + + private void handleCustomXkcd(SlashCommandInteractionEvent event) { + Integer xkcdId = event.getOption(XKCD_ID_OPTION_NAME, OptionMapping::getAsInt); + + event.deferReply().queue(); + + if (xkcdId == null) { + event.getHook().setEphemeral(true).sendMessage("Could not find this XKCD").queue(); + return; + } + + Optional messageEmbedOptional = + constructEmbed(xkcdId, "Handpicked by member."); + messageEmbedOptional.ifPresentOrElse( + messageEmbed -> event.getHook().sendMessageEmbeds(messageEmbed).queue(), () -> { + event.getHook() + .setEphemeral(true) + .sendMessage("Could not find XKCD with ID #" + xkcdId) + .queue(); + logger.error("Could not find XKCD with ID #{}", xkcdId); + }); + } + + private Optional constructEmbed(int xkcdId, String footer) { + Optional xkcdPostOptional = xkcdRetriever.getXkcdPost(xkcdId); + + if (xkcdPostOptional.isEmpty()) { + logger.warn("Could not find XKCD post with ID {} from local map", xkcdId); + return Optional.empty(); + } + + XkcdPost xkcdPost = xkcdPostOptional.get(); + + return Optional + .of(new EmbedBuilder().setTitle("%s (#%d)".formatted(xkcdPost.title(), xkcdId)) + .setImage(xkcdPost.img()) + .setUrl("https://xkcd.com/" + xkcdId) + .setColor(Color.CYAN) + .setFooter(footer) + .build()); + } + + private Optional getXkcdIdFromMessage(String response) { + Matcher matcher = XKCD_POST_PATTERN.matcher(response.trim()); + + if (!matcher.find()) { + return Optional.empty(); + } + + try { + return Optional.of(Integer.parseInt(matcher.group(1))); + } catch (NumberFormatException _) { + logger.warn("Extracted ID '{}' is not a valid integer", matcher.group(1)); + return Optional.empty(); + } + } + + private String formatDiscordChatHistory(List messages) { + return messages.stream() + .filter(message -> !message.getAuthor().isBot()) + .map(message -> "%s: %s".formatted(message.getAuthor().getName(), + message.getContentRaw())) + .collect(Collectors.toSet()) + .toString(); + } + + private static String getChatgptRelevantPrompt(String discordChat) { + return """ + + %s + + + # Role + You are very experienced with XKCD and you have read every XKCD comic inside and out. + You also understand online humor very well and have a good history of making peopel laugh. + + # Task + Carefully read the Discord chat and come up with the MOST relevant XKCD comic you have read. + You should mention the number FIRST. The more relevant, the more points and money you get. + You should reason on why it's the most relevant XKCD. If you can pick one that is funnily + the most relevant, legendary. MAKE SURE THE XKCD ID MATCHES THE ACTUAL + ARTICLE BY LOOKING AT THE FILES LIST OF XKCD POSTS. + + + Answer: 219 + Explanation: Because the user ABC was talking about XYZ, and that XKCD post is the most + relevant + + + Answer: 74 + Explanation: ... + + """ + .formatted(discordChat); + } +} diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdPost.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdPost.java new file mode 100644 index 0000000000..8e45fdf01a --- /dev/null +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdPost.java @@ -0,0 +1,8 @@ +package org.togetherjava.tjbot.features.xkcd; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; + +@JsonIgnoreProperties(ignoreUnknown = true) +public record XkcdPost(int id, String safeTitle, String transcript, String alt, String img, + String title) { +} diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java new file mode 100644 index 0000000000..613d3b64b9 --- /dev/null +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java @@ -0,0 +1,181 @@ +package org.togetherjava.tjbot.features.xkcd; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.openai.models.files.FilePurpose; +import org.apache.commons.lang3.IntegerRange; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.togetherjava.tjbot.features.chatgpt.ChatGptService; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Semaphore; + +public class XkcdRetriever { + + private static final Logger logger = LoggerFactory.getLogger(XkcdRetriever.class); + + private static final HttpClient CLIENT = + HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build(); + private static final String XKCD_GET_URL = "https://xkcd.com/%d/info.0.json"; + public static final String SAVED_XKCD_PATH = "xkcd.generated.json"; + private static final int XKCD_POSTS_AMOUNT = 3201; + private static final ObjectMapper objectMapper = new ObjectMapper(); + + private final Map xkcdPosts = new HashMap<>(); + private String xkcdUploadedFileId; + private final ChatGptService chatGptService; + + public XkcdRetriever(ChatGptService chatGptService) { + this.chatGptService = chatGptService; + + Optional xkcdUploadedFileIdOptional = + chatGptService.getUploadedFileId(SAVED_XKCD_PATH); + + if (xkcdUploadedFileIdOptional.isPresent()) { + logger.info("XKCD posts file {} is already uploaded", SAVED_XKCD_PATH); + xkcdUploadedFileId = xkcdUploadedFileIdOptional.get(); + } + + Path savedXckdsPath = Path.of(SAVED_XKCD_PATH); + if (savedXckdsPath.toFile().exists()) { + populateXkcdPostsFromFile(savedXckdsPath); + + if (xkcdUploadedFileIdOptional.isEmpty()) { + logger.info( + "Will attempt to upload XKCD posts from existing file '{}' since it is not uploaded", + SAVED_XKCD_PATH); + uploadXkcdFile(savedXckdsPath); + } + return; + } + + objectMapper.enable(SerializationFeature.INDENT_OUTPUT); + Semaphore semaphore = new Semaphore(10); + + logger.info("Could not find file '{}', fetching {} XKCD posts...", SAVED_XKCD_PATH, + XKCD_POSTS_AMOUNT); + try (ExecutorService executor = Executors.newFixedThreadPool(20)) { + try { + CompletableFuture.allOf(IntegerRange.of(1, XKCD_POSTS_AMOUNT) + .toIntStream() + .filter(id -> id != 404) // XKCD has a joke on comic ID 404 so exclude + .mapToObj(xkcdId -> CompletableFuture.runAsync(() -> { + semaphore.acquireUninterruptibly(); + try { + Optional postOptional = this.retrieveXkcdPost(xkcdId).join(); + postOptional.ifPresent(post -> xkcdPosts.put(xkcdId, post)); + + Thread.sleep(50); + } catch (InterruptedException _) { + Thread.currentThread().interrupt(); + } finally { + semaphore.release(); + } + }, executor)) + .toArray(CompletableFuture[]::new)).join(); + } finally { + executor.shutdown(); + } + } + + saveToFile(savedXckdsPath, xkcdPosts); + uploadXkcdFile(savedXckdsPath); + logger.info("Done. Fetched {} XKCD posts and saving to '{}'.", xkcdPosts.size(), + SAVED_XKCD_PATH); + } + + public Optional getXkcdPost(int id) { + return Optional.ofNullable(xkcdPosts.get(id)); + } + + public CompletableFuture> retrieveXkcdPost(int id) { + HttpRequest request = + HttpRequest.newBuilder(URI.create(String.format(XKCD_GET_URL, id))).build(); + + logger.debug("Retrieving XKCD post {}...", id); + + return CLIENT.sendAsync(request, HttpResponse.BodyHandlers.ofString()) + .thenApply(response -> { + int statusCode = response.statusCode(); + + if (statusCode < HttpURLConnection.HTTP_OK) { + logger.warn("Tried to retrieve XKCD post, but failed with status code: {}", + statusCode); + return Optional.empty(); + } + + try { + return Optional.of(objectMapper.readValue(response.body(), XkcdPost.class)); + } catch (IOException e) { + logger.error("Tried to parse XKCD post but failed, response body: {}", + response.body(), e); + return Optional.empty(); + } + }); + } + + private void uploadXkcdFile(Path savedXckdsPath) { + Optional fileIdOptional = + chatGptService.uploadFileIfNotExists(savedXckdsPath, FilePurpose.USER_DATA); + + if (fileIdOptional.isEmpty()) { + return; + } + + String fileId = fileIdOptional.get(); + logger.info("XKCD posts have been uploaded with ID '{}'", fileId); + + xkcdUploadedFileId = fileId; + + } + + public void saveToFile(Path path, Map posts) { + try { + objectMapper.writeValue(path.toFile(), posts); + logger.info("Saved XKCD posts to '{}'", path); + } catch (IOException e) { + logger.error("Failed to save XKCD posts to {}", path, e); + } + } + + private void populateXkcdPostsFromFile(Path path) { + try { + String jsonContent = Files.readString(path); + + Map loadedPosts = + objectMapper.readValue(jsonContent, new TypeReference<>() {}); + + xkcdPosts.clear(); + xkcdPosts.putAll(loadedPosts); + + logger.info("Loaded {} XKCD posts from {}", xkcdPosts.size(), path); + } catch (IOException e) { + logger.error("Failed to load XKCD posts from {}", path, e); + } + } + + public String getXkcdUploadedFileId() { + return xkcdUploadedFileId; + } + + public Map getXkcdPosts() { + return xkcdPosts; + } +} diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/package-info.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/package-info.java new file mode 100644 index 0000000000..51a6ee898c --- /dev/null +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/package-info.java @@ -0,0 +1,7 @@ +@MethodsReturnNonnullByDefault +@ParametersAreNonnullByDefault +package org.togetherjava.tjbot.features.xkcd; + +import org.togetherjava.tjbot.annotations.MethodsReturnNonnullByDefault; + +import javax.annotation.ParametersAreNonnullByDefault; From c522fdcf1c04a571570b9ab33e394546c82dace5 Mon Sep 17 00:00:00 2001 From: Chris Sdogkos Date: Sun, 1 Feb 2026 21:07:44 +0200 Subject: [PATCH 2/7] Rename to createOrGetVectorStore Signed-off-by: Chris Sdogkos --- .../togetherjava/tjbot/features/chatgpt/ChatGptService.java | 2 +- .../java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java index d3e7328e46..88a6f3034b 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java @@ -162,7 +162,7 @@ public Optional uploadFileIfNotExists(Path filePath, FilePurpose purpose return Optional.of(id); } - public String createOrGetXkcdVectorStore(String fileId) { + public String createOrGetVectorStore(String fileId) { List vectorStores = openAIClient.vectorStores() .list() .items() diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java index d03cf83e56..63acc53074 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java @@ -124,8 +124,7 @@ private void handleRelevantXkcd(SlashCommandInteractionEvent event) { event.deferReply().queue(); String xkcdComicsFileId = xkcdRetriever.getXkcdUploadedFileId(); - String xkcdVectorStore = - chatGptService.createOrGetXkcdVectorStore(xkcdComicsFileId); + String xkcdVectorStore = chatGptService.createOrGetVectorStore(xkcdComicsFileId); FileSearchTool fileSearch = FileSearchTool.builder().vectorStoreIds(List.of(xkcdVectorStore)).build(); From 1ac4e67247ff35b389e96410b686d1ce1421022c Mon Sep 17 00:00:00 2001 From: Chris Sdogkos Date: Sun, 1 Feb 2026 21:20:51 +0200 Subject: [PATCH 3/7] refactor(ChatGptService): use java.nio library for file checking Signed-off-by: Chris Sdogkos --- .../togetherjava/tjbot/features/chatgpt/ChatGptService.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java index 88a6f3034b..467dfd4e35 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java @@ -19,7 +19,7 @@ import javax.annotation.Nullable; -import java.io.File; +import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; import java.util.List; @@ -141,8 +141,7 @@ public Optional uploadFileIfNotExists(Path filePath, FilePurpose purpose return Optional.empty(); } - File file = filePath.toFile(); - if (!file.exists()) { + if (!Files.notExists(filePath)) { logger.warn("Could not find file '{}' to upload to ChatGPT", filePath); return Optional.empty(); } From 4ae181d841c2251724f42cf0e44e53003244449f Mon Sep 17 00:00:00 2001 From: Chris Sdogkos Date: Sun, 1 Feb 2026 21:57:53 +0200 Subject: [PATCH 4/7] refactor(ChatGptService, XkcdCommand): do not call XKCD stuff et al - ChatGptService: Refrain from polluting it with XKCD related calls, - ChatGptService: provide JavaDocs to the methods that don't have one, - ChatGptService: remove unused sendWebPrompt method - XkcdCommand and XkcdRetriever: Refactor code into functions for readability. Signed-off-by: Chris Sdogkos --- .../features/chatgpt/ChatGptService.java | 59 +++++++++++------- .../tjbot/features/xkcd/XkcdCommand.java | 62 ++++++++++--------- .../tjbot/features/xkcd/XkcdRetriever.java | 56 +++++++++-------- 3 files changed, 98 insertions(+), 79 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java index 467dfd4e35..687b7fe01f 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java @@ -9,7 +9,6 @@ import com.openai.models.responses.ResponseCreateParams; import com.openai.models.responses.ResponseOutputText; import com.openai.models.responses.Tool; -import com.openai.models.responses.WebSearchTool; import com.openai.models.vectorstores.VectorStore; import com.openai.models.vectorstores.VectorStoreCreateParams; import org.slf4j.Logger; @@ -33,8 +32,6 @@ public class ChatGptService { private static final Logger logger = LoggerFactory.getLogger(ChatGptService.class); private static final Duration TIMEOUT = Duration.ofSeconds(90); - private static final String VECTOR_STORE_XKCD = "xkcd-comics"; - /** The maximum number of tokens allowed for the generated answer. */ private static final int MAX_TOKENS = 1000; @@ -100,20 +97,6 @@ public Optional askRaw(String inputPrompt, ChatGptModel chatModel) { return sendPrompt(inputPrompt, chatModel); } - /** - * Sends a prompt to the ChatGPT API with web capabilities and returns the response. - * - * @param prompt The prompt to send to ChatGPT. - * @param chatModel The AI model to use for this request. - * @return response from ChatGPT as a String. - */ - public Optional sendWebPrompt(String prompt, ChatGptModel chatModel) { - Tool webSearchTool = Tool - .ofWebSearch(WebSearchTool.builder().type(WebSearchTool.Type.WEB_SEARCH).build()); - - return sendPrompt(prompt, chatModel, List.of(webSearchTool)); - } - /** * Sends a prompt to the ChatGPT API and returns the response. * @@ -125,6 +108,13 @@ public Optional sendPrompt(String prompt, ChatGptModel chatModel) { return sendPrompt(prompt, chatModel, List.of()); } + /** + * Lists all files uploaded to OpenAI and returns the ID of the first file matching the given + * filename (case-insensitive). + * + * @param filePath The filename to search for among uploaded files. + * @return An Optional containing the file ID if found, or empty if no matching file exists. + */ public Optional getUploadedFileId(String filePath) { return openAIClient.files() .list() @@ -135,6 +125,18 @@ public Optional getUploadedFileId(String filePath) { .findFirst(); } + /** + * Uploads the specified file to OpenAI if it exists locally and hasn't been uploaded before. + * + * @param filePath The local path to the file to upload. + * @param purpose The OpenAI file purpose (e.g., {@link FilePurpose#ASSISTANTS}) + * @return an Optional containing the uploaded file ID, or empty if: + *
    + *
  • service is disabled
  • + *
  • file doesn't exist locally
  • + *
  • file with matching name already uploaded
  • + *
+ */ public Optional uploadFileIfNotExists(Path filePath, FilePurpose purpose) { if (isDisabled) { logger.warn("ChatGPT file upload attempted but service is disabled"); @@ -161,29 +163,40 @@ public Optional uploadFileIfNotExists(Path filePath, FilePurpose purpose return Optional.of(id); } - public String createOrGetVectorStore(String fileId) { + /** + * Creates a new vector store with the given file ID if none exists or returns the ID of the + * existing vector store with that name. + *

+ * You can use this for RAG purposes, it is an effective way to give ChatGPT extra information + * from what it has been trained. + * + * @param fileId The ID of the file to include in the new vector store. + * @return The vector store ID (existing or newly created). + */ + public String createOrGetVectorStore(String fileId, String vectorStoreName) { List vectorStores = openAIClient.vectorStores() .list() .items() .stream() - .filter(vectorStore -> vectorStore.name().equalsIgnoreCase(VECTOR_STORE_XKCD)) + .filter(vectorStore -> vectorStore.name().equalsIgnoreCase(vectorStoreName)) .toList(); Optional vectorStore = vectorStores.stream().findFirst(); if (vectorStore.isPresent()) { - return vectorStore.get().id(); + String vectorStoreId = vectorStore.get().id(); + logger.debug("Got vector store {}", vectorStoreId); + return vectorStoreId; } VectorStoreCreateParams params = VectorStoreCreateParams.builder() - .name(VECTOR_STORE_XKCD) + .name(vectorStoreName) .fileIds(List.of(fileId)) .build(); VectorStore newVectorStore = openAIClient.vectorStores().create(params); String vectorStoreId = newVectorStore.id(); - logger.info("Created vector store {} with XKCD data", vectorStoreId); - + logger.debug("Created vector store {}", vectorStoreId); return vectorStoreId; } diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java index 63acc53074..b44deb3866 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java @@ -49,6 +49,7 @@ public final class XkcdCommand extends SlashCommandAdapter { public static final String SUBCOMMAND_CUSTOM = "custom"; public static final String LAST_MESSAGES_AMOUNT_OPTION_NAME = "amount"; public static final String XKCD_ID_OPTION_NAME = "id"; + private static final String VECTOR_STORE_XKCD = "xkcd-comics"; public static final int MAXIMUM_MESSAGE_HISTORY = 100; private static final ChatGptModel CHAT_GPT_MODEL = ChatGptModel.FAST; private static final Pattern XKCD_POST_PATTERN = Pattern.compile("^\\D*(\\d+)"); @@ -119,37 +120,8 @@ private void handleRelevantXkcd(SlashCommandInteractionEvent event) { .getHistory() .retrievePast(messagesAmount) .queue(messages -> { - String discordChat = formatDiscordChatHistory(messages); - event.deferReply().queue(); - - String xkcdComicsFileId = xkcdRetriever.getXkcdUploadedFileId(); - String xkcdVectorStore = chatGptService.createOrGetVectorStore(xkcdComicsFileId); - FileSearchTool fileSearch = - FileSearchTool.builder().vectorStoreIds(List.of(xkcdVectorStore)).build(); - - Tool tool = Tool.ofFileSearch(fileSearch); - - Optional responseOptional = chatGptService.sendPrompt( - getChatgptRelevantPrompt(discordChat), CHAT_GPT_MODEL, List.of(tool)); - - Optional responseIdOptional = - getXkcdIdFromMessage(responseOptional.orElseThrow()); - - if (responseIdOptional.isEmpty()) { - event.getHook().setEphemeral(true).sendMessage(CHATGPT_NO_ID_MESSAGE).queue(); - return; - } - - int responseId = responseIdOptional.orElseThrow(); - - logger.debug("Response: {}", responseOptional.orElseThrow()); - - logger.debug("ChatGPT chose XKCD ID: {}", responseId); - Optional embedOptional = - constructEmbed(responseId, "Most relevant XKCD according to ChatGPT."); - - embedOptional.ifPresent(embed -> event.getHook().sendMessageEmbeds(embed).queue()); + sendRelevantXkcdEmbedFromMessages(messages, event); }, error -> logger.error("Failed to retrieve the chat history in #{}", messageChannelUnion.getName(), error)); } @@ -176,6 +148,36 @@ private void handleCustomXkcd(SlashCommandInteractionEvent event) { }); } + private void sendRelevantXkcdEmbedFromMessages(List messages, + SlashCommandInteractionEvent event) { + String discordChat = formatDiscordChatHistory(messages); + String xkcdComicsFileId = xkcdRetriever.getXkcdUploadedFileId(); + String xkcdVectorStore = + chatGptService.createOrGetVectorStore(xkcdComicsFileId, VECTOR_STORE_XKCD); + FileSearchTool fileSearch = + FileSearchTool.builder().vectorStoreIds(List.of(xkcdVectorStore)).build(); + + Tool tool = Tool.ofFileSearch(fileSearch); + + Optional responseOptional = chatGptService + .sendPrompt(getChatgptRelevantPrompt(discordChat), CHAT_GPT_MODEL, List.of(tool)); + + Optional responseIdOptional = getXkcdIdFromMessage(responseOptional.orElseThrow()); + + if (responseIdOptional.isEmpty()) { + event.getHook().setEphemeral(true).sendMessage(CHATGPT_NO_ID_MESSAGE).queue(); + return; + } + + int responseId = responseIdOptional.orElseThrow(); + + logger.debug("ChatGPT chose XKCD ID: {}", responseId); + Optional embedOptional = + constructEmbed(responseId, "Most relevant XKCD according to ChatGPT."); + + embedOptional.ifPresent(embed -> event.getHook().sendMessageEmbeds(embed).queue()); + } + private Optional constructEmbed(int xkcdId, String footer) { Optional xkcdPostOptional = xkcdRetriever.getXkcdPost(xkcdId); diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java index 613d3b64b9..624a953198 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java @@ -36,6 +36,9 @@ public class XkcdRetriever { private static final String XKCD_GET_URL = "https://xkcd.com/%d/info.0.json"; public static final String SAVED_XKCD_PATH = "xkcd.generated.json"; private static final int XKCD_POSTS_AMOUNT = 3201; + private static final int FETCH_XCKD_POSTS_POOL_SIZE = 20; + private static final int FETCH_XKCD_POSTS_SEMAPHORE_SIZE = 10; + private static final int FETCH_XKCD_POSTS_THREAD_SLEEP_MS = 50; private static final ObjectMapper objectMapper = new ObjectMapper(); private final Map xkcdPosts = new HashMap<>(); @@ -66,33 +69,34 @@ public XkcdRetriever(ChatGptService chatGptService) { return; } + logger.info("Could not find XKCD posts locally saved in '{}' so will fetch...", + SAVED_XKCD_PATH); + fetchAllXkcdPosts(savedXckdsPath); + } + + private void fetchAllXkcdPosts(Path savedXckdsPath) { objectMapper.enable(SerializationFeature.INDENT_OUTPUT); - Semaphore semaphore = new Semaphore(10); - - logger.info("Could not find file '{}', fetching {} XKCD posts...", SAVED_XKCD_PATH, - XKCD_POSTS_AMOUNT); - try (ExecutorService executor = Executors.newFixedThreadPool(20)) { - try { - CompletableFuture.allOf(IntegerRange.of(1, XKCD_POSTS_AMOUNT) - .toIntStream() - .filter(id -> id != 404) // XKCD has a joke on comic ID 404 so exclude - .mapToObj(xkcdId -> CompletableFuture.runAsync(() -> { - semaphore.acquireUninterruptibly(); - try { - Optional postOptional = this.retrieveXkcdPost(xkcdId).join(); - postOptional.ifPresent(post -> xkcdPosts.put(xkcdId, post)); - - Thread.sleep(50); - } catch (InterruptedException _) { - Thread.currentThread().interrupt(); - } finally { - semaphore.release(); - } - }, executor)) - .toArray(CompletableFuture[]::new)).join(); - } finally { - executor.shutdown(); - } + Semaphore semaphore = new Semaphore(FETCH_XKCD_POSTS_SEMAPHORE_SIZE); + + logger.info("Fetching {} XKCD posts...", XKCD_POSTS_AMOUNT); + try (ExecutorService executor = Executors.newFixedThreadPool(FETCH_XCKD_POSTS_POOL_SIZE)) { + CompletableFuture.allOf(IntegerRange.of(1, XKCD_POSTS_AMOUNT) + .toIntStream() + .filter(id -> id != 404) // XKCD has a joke on comic ID 404 so exclude + .mapToObj(xkcdId -> CompletableFuture.runAsync(() -> { + semaphore.acquireUninterruptibly(); + try { + Optional postOptional = this.retrieveXkcdPost(xkcdId).join(); + postOptional.ifPresent(post -> xkcdPosts.put(xkcdId, post)); + + Thread.sleep(FETCH_XKCD_POSTS_THREAD_SLEEP_MS); + } catch (InterruptedException _) { + Thread.currentThread().interrupt(); + } finally { + semaphore.release(); + } + }, executor)) + .toArray(CompletableFuture[]::new)).join(); } saveToFile(savedXckdsPath, xkcdPosts); From dd4c2510a3a8cc2690b06d82c82ab5862a4df26b Mon Sep 17 00:00:00 2001 From: Chris Sdogkos Date: Sun, 1 Feb 2026 22:17:45 +0200 Subject: [PATCH 5/7] XkcdRetriever: make more methods private and add JavaDocs Signed-off-by: Chris Sdogkos --- .../tjbot/features/xkcd/XkcdRetriever.java | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java index 624a953198..fa843635ef 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java @@ -27,6 +27,15 @@ import java.util.concurrent.Executors; import java.util.concurrent.Semaphore; +/** + * Retrieves and caches XKCD comic posts from the official XKCD JSON API. + *

+ * This class handles fetching XKCD comics (1-{@value #XKCD_POSTS_AMOUNT}, excluding the joke comic + * #404) using concurrent HTTP requests with rate limiting via semaphore and thread pool. + *

+ * Posts are cached locally in {@value #SAVED_XKCD_PATH} as JSON and uploaded to OpenAI using the + * provided {@link ChatGptService} if not already present. + */ public class XkcdRetriever { private static final Logger logger = LoggerFactory.getLogger(XkcdRetriever.class); @@ -74,6 +83,18 @@ public XkcdRetriever(ChatGptService chatGptService) { fetchAllXkcdPosts(savedXckdsPath); } + public Optional getXkcdPost(int id) { + return Optional.ofNullable(xkcdPosts.get(id)); + } + + public String getXkcdUploadedFileId() { + return xkcdUploadedFileId; + } + + public Map getXkcdPosts() { + return xkcdPosts; + } + private void fetchAllXkcdPosts(Path savedXckdsPath) { objectMapper.enable(SerializationFeature.INDENT_OUTPUT); Semaphore semaphore = new Semaphore(FETCH_XKCD_POSTS_SEMAPHORE_SIZE); @@ -105,11 +126,7 @@ private void fetchAllXkcdPosts(Path savedXckdsPath) { SAVED_XKCD_PATH); } - public Optional getXkcdPost(int id) { - return Optional.ofNullable(xkcdPosts.get(id)); - } - - public CompletableFuture> retrieveXkcdPost(int id) { + private CompletableFuture> retrieveXkcdPost(int id) { HttpRequest request = HttpRequest.newBuilder(URI.create(String.format(XKCD_GET_URL, id))).build(); @@ -150,7 +167,7 @@ private void uploadXkcdFile(Path savedXckdsPath) { } - public void saveToFile(Path path, Map posts) { + private void saveToFile(Path path, Map posts) { try { objectMapper.writeValue(path.toFile(), posts); logger.info("Saved XKCD posts to '{}'", path); @@ -174,12 +191,4 @@ private void populateXkcdPostsFromFile(Path path) { logger.error("Failed to load XKCD posts from {}", path, e); } } - - public String getXkcdUploadedFileId() { - return xkcdUploadedFileId; - } - - public Map getXkcdPosts() { - return xkcdPosts; - } } From d4323eb64e18ef3cfc81de3a3041b264611eba32 Mon Sep 17 00:00:00 2001 From: Chris Sdogkos Date: Sun, 1 Feb 2026 22:50:06 +0200 Subject: [PATCH 6/7] Reduce amount of public declarations and organize them Signed-off-by: Chris Sdogkos --- .../tjbot/features/xkcd/XkcdCommand.java | 13 ++++++------- .../tjbot/features/xkcd/XkcdRetriever.java | 4 ++-- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java index b44deb3866..fafb0fee77 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java @@ -44,16 +44,15 @@ public final class XkcdCommand extends SlashCommandAdapter { private static final Logger logger = LoggerFactory.getLogger(XkcdCommand.class); - public static final String COMMAND_NAME = "xkcd"; - public static final String SUBCOMMAND_RELEVANT = "relevant"; - public static final String SUBCOMMAND_CUSTOM = "custom"; - public static final String LAST_MESSAGES_AMOUNT_OPTION_NAME = "amount"; - public static final String XKCD_ID_OPTION_NAME = "id"; + private static final String COMMAND_NAME = "xkcd"; + private static final String SUBCOMMAND_RELEVANT = "relevant"; + private static final String SUBCOMMAND_CUSTOM = "custom"; + private static final String LAST_MESSAGES_AMOUNT_OPTION_NAME = "amount"; + private static final String XKCD_ID_OPTION_NAME = "id"; + private static final int MAXIMUM_MESSAGE_HISTORY = 100; private static final String VECTOR_STORE_XKCD = "xkcd-comics"; - public static final int MAXIMUM_MESSAGE_HISTORY = 100; private static final ChatGptModel CHAT_GPT_MODEL = ChatGptModel.FAST; private static final Pattern XKCD_POST_PATTERN = Pattern.compile("^\\D*(\\d+)"); - private static final String CHATGPT_NO_ID_MESSAGE = "ChatGPT could not respond with a XKCD post ID."; diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java index fa843635ef..00edc380eb 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java @@ -43,7 +43,7 @@ public class XkcdRetriever { private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build(); private static final String XKCD_GET_URL = "https://xkcd.com/%d/info.0.json"; - public static final String SAVED_XKCD_PATH = "xkcd.generated.json"; + private static final String SAVED_XKCD_PATH = "xkcd.generated.json"; private static final int XKCD_POSTS_AMOUNT = 3201; private static final int FETCH_XCKD_POSTS_POOL_SIZE = 20; private static final int FETCH_XKCD_POSTS_SEMAPHORE_SIZE = 10; @@ -51,8 +51,8 @@ public class XkcdRetriever { private static final ObjectMapper objectMapper = new ObjectMapper(); private final Map xkcdPosts = new HashMap<>(); - private String xkcdUploadedFileId; private final ChatGptService chatGptService; + private String xkcdUploadedFileId; public XkcdRetriever(ChatGptService chatGptService) { this.chatGptService = chatGptService; From b313e8af13517754ac4ee2249c0dd4923b4fb10d Mon Sep 17 00:00:00 2001 From: Chris Sdogkos Date: Tue, 3 Feb 2026 11:31:49 +0200 Subject: [PATCH 7/7] Address code review comments from @Zabuzard and @tj-wazei - refactor(XkcdRetriever): rename to XkcdService - doc(ChatGptService): clarify the meaning of RAG and vector store - XkcdService: improve fetchAllXkcdPosts method - XkcdCommand: do not allow more than 40KB of chat messages Signed-off-by: Chris Sdogkos --- .../features/chatgpt/ChatGptService.java | 6 ++- .../tjbot/features/xkcd/XkcdCommand.java | 44 ++++++++++++++---- .../{XkcdRetriever.java => XkcdService.java} | 46 ++++++++++--------- 3 files changed, 63 insertions(+), 33 deletions(-) rename application/src/main/java/org/togetherjava/tjbot/features/xkcd/{XkcdRetriever.java => XkcdService.java} (84%) diff --git a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java index 687b7fe01f..cd77176714 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java @@ -167,8 +167,10 @@ public Optional uploadFileIfNotExists(Path filePath, FilePurpose purpose * Creates a new vector store with the given file ID if none exists or returns the ID of the * existing vector store with that name. *

- * You can use this for RAG purposes, it is an effective way to give ChatGPT extra information - * from what it has been trained. + * A vector store indexes document content as embeddings for semantic search. You can use this + * for RAG (Retrieval-Augmented Generation), where the model retrieves relevant context from + * your documents before generating responses, effectively giving it access to information + * beyond its training data. * * @param fileId The ID of the file to include in the new vector store. * @return The vector store ID (existing or newly created). diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java index fafb0fee77..0413dc8977 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java @@ -11,6 +11,7 @@ import net.dv8tion.jda.api.interactions.commands.OptionType; import net.dv8tion.jda.api.interactions.commands.build.OptionData; import net.dv8tion.jda.api.interactions.commands.build.SubcommandData; +import org.apache.commons.lang3.IntegerRange; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,7 +38,7 @@ *

  • {@code /xkcd custom } - Posts a specific XKCD comic by ID from local cache.
  • * * - * Relies on {@link XkcdRetriever} for local XKCD data and {@link ChatGptService} for AI-powered + * Relies on {@link XkcdService} for local XKCD data and {@link ChatGptService} for AI-powered * relevance matching via OpenAI's file search tool and vector stores. */ public final class XkcdCommand extends SlashCommandAdapter { @@ -50,6 +51,7 @@ public final class XkcdCommand extends SlashCommandAdapter { private static final String LAST_MESSAGES_AMOUNT_OPTION_NAME = "amount"; private static final String XKCD_ID_OPTION_NAME = "id"; private static final int MAXIMUM_MESSAGE_HISTORY = 100; + private static final int MESSAGE_HISTORY_CUTOFF_SIZE_KB = 40_000; private static final String VECTOR_STORE_XKCD = "xkcd-comics"; private static final ChatGptModel CHAT_GPT_MODEL = ChatGptModel.FAST; private static final Pattern XKCD_POST_PATTERN = Pattern.compile("^\\D*(\\d+)"); @@ -57,14 +59,14 @@ public final class XkcdCommand extends SlashCommandAdapter { "ChatGPT could not respond with a XKCD post ID."; private final ChatGptService chatGptService; - private final XkcdRetriever xkcdRetriever; + private final XkcdService xkcdService; public XkcdCommand(ChatGptService chatGptService) { super(COMMAND_NAME, "Post a relevant XKCD from the chat or your own", CommandVisibility.GLOBAL); this.chatGptService = chatGptService; - this.xkcdRetriever = new XkcdRetriever(chatGptService); + this.xkcdService = new XkcdService(chatGptService); OptionData lastMessagesAmountOption = new OptionData(OptionType.INTEGER, LAST_MESSAGES_AMOUNT_OPTION_NAME, @@ -81,7 +83,7 @@ public XkcdCommand(ChatGptService chatGptService) { "The XKCD number to post to the chat") .setMinValue(0) .setRequired(true) - .setMaxValue(xkcdRetriever.getXkcdPosts().size()); + .setMaxValue(xkcdService.getXkcdPosts().size()); SubcommandData customSubcommand = new SubcommandData(SUBCOMMAND_CUSTOM, "Post your own XKCD regardless of the recent chat messages") @@ -149,8 +151,9 @@ private void handleCustomXkcd(SlashCommandInteractionEvent event) { private void sendRelevantXkcdEmbedFromMessages(List messages, SlashCommandInteractionEvent event) { - String discordChat = formatDiscordChatHistory(messages); - String xkcdComicsFileId = xkcdRetriever.getXkcdUploadedFileId(); + List discordChatCutoff = cutoffDiscordChatHistory(messages); + String discordChatFormatted = formatDiscordChatHistory(discordChatCutoff); + String xkcdComicsFileId = xkcdService.getXkcdUploadedFileId(); String xkcdVectorStore = chatGptService.createOrGetVectorStore(xkcdComicsFileId, VECTOR_STORE_XKCD); FileSearchTool fileSearch = @@ -158,8 +161,8 @@ private void sendRelevantXkcdEmbedFromMessages(List messages, Tool tool = Tool.ofFileSearch(fileSearch); - Optional responseOptional = chatGptService - .sendPrompt(getChatgptRelevantPrompt(discordChat), CHAT_GPT_MODEL, List.of(tool)); + Optional responseOptional = chatGptService.sendPrompt( + getChatgptRelevantPrompt(discordChatFormatted), CHAT_GPT_MODEL, List.of(tool)); Optional responseIdOptional = getXkcdIdFromMessage(responseOptional.orElseThrow()); @@ -174,11 +177,15 @@ private void sendRelevantXkcdEmbedFromMessages(List messages, Optional embedOptional = constructEmbed(responseId, "Most relevant XKCD according to ChatGPT."); - embedOptional.ifPresent(embed -> event.getHook().sendMessageEmbeds(embed).queue()); + embedOptional.ifPresentOrElse(embed -> event.getHook().sendMessageEmbeds(embed).queue(), + () -> event.getHook() + .setEphemeral(true) + .sendMessage("I could not find post with ID " + responseId) + .queue()); } private Optional constructEmbed(int xkcdId, String footer) { - Optional xkcdPostOptional = xkcdRetriever.getXkcdPost(xkcdId); + Optional xkcdPostOptional = xkcdService.getXkcdPost(xkcdId); if (xkcdPostOptional.isEmpty()) { logger.warn("Could not find XKCD post with ID {} from local map", xkcdId); @@ -220,6 +227,23 @@ private String formatDiscordChatHistory(List messages) { .toString(); } + private List cutoffDiscordChatHistory(List messages) { + int cutoffMessageIndex = (int) IntegerRange.of(0, messages.size() - 1) + .toIntStream() + .map(index -> countMessagesLength(messages.subList(0, index))) + .filter(length -> length < MESSAGE_HISTORY_CUTOFF_SIZE_KB) + .count(); + + return messages.subList(0, cutoffMessageIndex); + } + + private int countMessagesLength(List messages) { + return messages.stream() + .mapToInt(message -> message.getContentRaw().length() + + message.getAuthor().getName().length()) + .sum(); + } + private static String getChatgptRelevantPrompt(String discordChat) { return """ diff --git a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdService.java similarity index 84% rename from application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java rename to application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdService.java index 00edc380eb..dfc48abf58 100644 --- a/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java +++ b/application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdService.java @@ -20,11 +20,14 @@ import java.nio.file.Path; import java.time.Duration; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Optional; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.Semaphore; /** @@ -36,25 +39,23 @@ * Posts are cached locally in {@value #SAVED_XKCD_PATH} as JSON and uploaded to OpenAI using the * provided {@link ChatGptService} if not already present. */ -public class XkcdRetriever { +public class XkcdService { - private static final Logger logger = LoggerFactory.getLogger(XkcdRetriever.class); + private static final Logger logger = LoggerFactory.getLogger(XkcdService.class); private static final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build(); private static final String XKCD_GET_URL = "https://xkcd.com/%d/info.0.json"; private static final String SAVED_XKCD_PATH = "xkcd.generated.json"; private static final int XKCD_POSTS_AMOUNT = 3201; - private static final int FETCH_XCKD_POSTS_POOL_SIZE = 20; private static final int FETCH_XKCD_POSTS_SEMAPHORE_SIZE = 10; - private static final int FETCH_XKCD_POSTS_THREAD_SLEEP_MS = 50; private static final ObjectMapper objectMapper = new ObjectMapper(); private final Map xkcdPosts = new HashMap<>(); private final ChatGptService chatGptService; private String xkcdUploadedFileId; - public XkcdRetriever(ChatGptService chatGptService) { + public XkcdService(ChatGptService chatGptService) { this.chatGptService = chatGptService; Optional xkcdUploadedFileIdOptional = @@ -97,27 +98,30 @@ public Map getXkcdPosts() { private void fetchAllXkcdPosts(Path savedXckdsPath) { objectMapper.enable(SerializationFeature.INDENT_OUTPUT); - Semaphore semaphore = new Semaphore(FETCH_XKCD_POSTS_SEMAPHORE_SIZE); logger.info("Fetching {} XKCD posts...", XKCD_POSTS_AMOUNT); - try (ExecutorService executor = Executors.newFixedThreadPool(FETCH_XCKD_POSTS_POOL_SIZE)) { - CompletableFuture.allOf(IntegerRange.of(1, XKCD_POSTS_AMOUNT) + try (ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor()) { + Semaphore semaphore = new Semaphore(FETCH_XKCD_POSTS_SEMAPHORE_SIZE); + List> futures = IntegerRange.of(1, XKCD_POSTS_AMOUNT) .toIntStream() .filter(id -> id != 404) // XKCD has a joke on comic ID 404 so exclude - .mapToObj(xkcdId -> CompletableFuture.runAsync(() -> { + .mapToObj(xkcdId -> executor.submit(() -> { semaphore.acquireUninterruptibly(); - try { - Optional postOptional = this.retrieveXkcdPost(xkcdId).join(); - postOptional.ifPresent(post -> xkcdPosts.put(xkcdId, post)); - - Thread.sleep(FETCH_XKCD_POSTS_THREAD_SLEEP_MS); - } catch (InterruptedException _) { - Thread.currentThread().interrupt(); - } finally { - semaphore.release(); - } - }, executor)) - .toArray(CompletableFuture[]::new)).join(); + retrieveXkcdPost(xkcdId).join().ifPresent(post -> xkcdPosts.put(xkcdId, post)); + semaphore.release(); + })) + .toList(); + + try { + for (Future future : futures) { + future.get(); + } + } catch (InterruptedException e) { + logger.error("Failed to wait for future", e); + Thread.currentThread().interrupt(); + } catch (ExecutionException e) { + logger.error("Could not get result from future", e); + } } saveToFile(savedXckdsPath, xkcdPosts);