From 2c5676bfa59c9684d95d3eb37c9e33d8c7862061 Mon Sep 17 00:00:00 2001 From: liutao <821580467@qq.com> Date: Wed, 12 Nov 2025 14:05:04 +0800 Subject: [PATCH 1/7] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E6=9C=AC=E5=9C=B0?= =?UTF-8?q?=E8=BF=90=E8=A1=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bun.lock | 1 + start.bat | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/bun.lock b/bun.lock index 20e895e7..9ece8757 100644 --- a/bun.lock +++ b/bun.lock @@ -1,5 +1,6 @@ { "lockfileVersion": 1, + "configVersion": 0, "workspaces": { "": { "name": "copilot-api", diff --git a/start.bat b/start.bat index 1a0f8cb8..d101ebd8 100644 --- a/start.bat +++ b/start.bat @@ -15,6 +15,6 @@ echo The usage viewer page will open automatically after the server starts echo. start "" "https://ericc-ch.github.io/copilot-api?endpoint=http://localhost:4141/usage" -bun run dev +bun run --watch ./src/main.ts start pause From d5c538d412c616b29ffa440d7a0b223164549cba Mon Sep 17 00:00:00 2001 From: liutao <821580467@qq.com> Date: Wed, 12 Nov 2025 16:22:34 +0800 Subject: [PATCH 2/7] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E8=BD=AC=E6=8D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/model-consumption.json | 44 ++++++ src/lib/model-matcher.ts | 139 ++++++++++++++++++ src/routes/chat-completions/handler.ts | 14 ++ src/routes/messages/handler.ts | 14 ++ src/routes/models/route.ts | 43 ++++-- src/server.ts | 5 + .../copilot/create-chat-completions.ts | 24 ++- src/services/copilot/get-models.ts | 8 +- src/start.ts | 54 ++++++- tsconfig.json | 1 + 10 files changed, 334 insertions(+), 12 deletions(-) create mode 100644 src/lib/model-consumption.json create mode 100644 src/lib/model-matcher.ts diff --git a/src/lib/model-consumption.json b/src/lib/model-consumption.json new file mode 100644 index 00000000..3db475ee --- /dev/null +++ b/src/lib/model-consumption.json @@ -0,0 +1,44 @@ +{ + "models": [ + { + "name": "GPT-4.1", + "consumption": "0x" + }, + { + "name": "GPT-4o", + "consumption": "0x" + }, + { + "name": "GPT-5 mini", + "consumption": "0x" + }, + { + "name": "Grok Code Fast 1", + "consumption": "0x" + }, + { + "name": "Claude Haiku 4.5", + "consumption": "0.33x" + }, + { + "name": "Claude Sonnet 4", + "consumption": "1x" + }, + { + "name": "Claude Sonnet 4.5", + "consumption": "1x" + }, + { + "name": "Gemini 2.5 Pro", + "consumption": "1x" + }, + { + "name": "GPT-5", + "consumption": "1x" + }, + { + "name": "GPT-5-Codex (Preview)", + "consumption": "1x" + } + ] +} diff --git a/src/lib/model-matcher.ts b/src/lib/model-matcher.ts new file mode 100644 index 00000000..0084906a --- /dev/null +++ b/src/lib/model-matcher.ts @@ -0,0 +1,139 @@ +import consola from "consola" + +import { state } from "./state" + +/** + * Find a matching model from available models + * If exact match exists, return it + * If no exact match, try to find by prefix (e.g., claude-haiku-4-5-xxx -> claude-haiku-4.5) + */ +export function findMatchingModel(requestedModel: string): string | null { + const availableModels = state.models?.data.filter( + (m) => typeof m.capabilities?.limits?.max_context_window_tokens === "number", + ) + + if (!availableModels || availableModels.length === 0) { + return null + } + + const availableModelIds = availableModels.map((m) => m.id) + + consola.debug(`Looking for match for: ${requestedModel}`) + consola.debug(`Available models: ${availableModelIds.join(", ")}`) + + // Try exact match first + if (availableModelIds.includes(requestedModel)) { + return requestedModel + } + + // Normalize the requested model + // 1. Replace underscores with hyphens + // 2. Remove date suffix (8 digits at the end) + // 3. Replace version numbers: 4-5 -> 4.5 + let normalizedRequested = requestedModel + .toLowerCase() + .replace(/_/g, "-") + .replace(/-(\d{8})$/, "") // Remove -20251001 style suffix + .replace(/(\d)-(\d)/g, "$1.$2") // Replace 4-5 with 4.5 + + consola.debug(`Normalized requested: ${normalizedRequested}`) + + // Try exact match after normalization + for (const availableId of availableModelIds) { + if (availableId.toLowerCase() === normalizedRequested) { + consola.info( + `🔄 Model normalized match: '${requestedModel}' -> '${availableId}'`, + ) + return availableId + } + } + + // Try prefix matching + for (const availableId of availableModelIds) { + const normalizedAvailable = availableId.toLowerCase() + + // Check if they start with each other + if ( + normalizedAvailable.startsWith(normalizedRequested) || + normalizedRequested.startsWith(normalizedAvailable) + ) { + consola.info( + `🔄 Model prefix match: '${requestedModel}' -> '${availableId}'`, + ) + return availableId + } + } + + // Try fuzzy matching by comparing main parts + const requestedParts = normalizedRequested.split("-") + for (const availableId of availableModelIds) { + const normalizedAvailable = availableId.toLowerCase() + const availableParts = normalizedAvailable.split("-") + + // Match by comparing first N-1 parts (everything except version) + if (requestedParts.length >= 3 && availableParts.length >= 3) { + const requestedBase = requestedParts.slice(0, -1).join("-") + const availableBase = availableParts.slice(0, -1).join("-") + + if (requestedBase === availableBase) { + consola.info( + `🔄 Model base match: '${requestedModel}' -> '${availableId}'`, + ) + return availableId + } + } + } + + consola.debug(`No match found for: ${requestedModel}`) + return null +} + +/** + * Validate and potentially replace the requested model + * Returns the validated model ID or throws/returns error info + */ +export function validateAndReplaceModel(requestedModel: string): { + success: boolean + model?: string + error?: { + message: string + code: string + param: string + type: string + } +} { + const availableModels = state.models?.data.filter( + (m) => typeof m.capabilities?.limits?.max_context_window_tokens === "number", + ) + const availableModelIds = availableModels?.map((m) => m.id) || [] + + const matchedModel = findMatchingModel(requestedModel) + + if (!matchedModel) { + consola.error(`❌ Model not available: ${requestedModel}`) + consola.error(`Available models: ${availableModelIds.join(", ")}`) + + return { + success: false, + error: { + message: `The requested model '${requestedModel}' is not supported. Available models: ${availableModelIds.join(", ")}`, + code: "model_not_supported", + param: "model", + type: "invalid_request_error", + }, + } + } + + if (matchedModel !== requestedModel) { + consola.success( + `✓ Model matched and replaced: ${requestedModel} -> ${matchedModel}`, + ) + } else { + consola.success(`✓ Model validated: ${matchedModel}`) + } + + return { + success: true, + model: matchedModel, + } +} diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 04a5ae9e..faa67791 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -4,6 +4,7 @@ import consola from "consola" import { streamSSE, type SSEMessage } from "hono/streaming" import { awaitApproval } from "~/lib/approval" +import { validateAndReplaceModel } from "~/lib/model-matcher" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" @@ -20,6 +21,19 @@ export async function handleCompletion(c: Context) { let payload = await c.req.json() consola.debug("Request payload:", JSON.stringify(payload).slice(-400)) + // Log the requested model + consola.info(`Requested model: ${payload.model}`) + + // Validate and potentially replace model + const validation = validateAndReplaceModel(payload.model) + + if (!validation.success) { + return c.json({ error: validation.error }, 400) + } + + // Replace model if a match was found + payload.model = validation.model! + // Find the selected model const selectedModel = state.models?.data.find( (model) => model.id === payload.model, diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf624..19c61a47 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -4,6 +4,7 @@ import consola from "consola" import { streamSSE } from "hono/streaming" import { awaitApproval } from "~/lib/approval" +import { validateAndReplaceModel } from "~/lib/model-matcher" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" import { @@ -34,6 +35,19 @@ export async function handleCompletion(c: Context) { JSON.stringify(openAIPayload), ) + // Log the requested model + consola.info(`Requested model: ${openAIPayload.model}`) + + // Validate and potentially replace model + const validation = validateAndReplaceModel(openAIPayload.model) + + if (!validation.success) { + return c.json({ error: validation.error }, 400) + } + + // Replace model if a match was found + openAIPayload.model = validation.model! + if (state.manualApprove) { await awaitApproval() } diff --git a/src/routes/models/route.ts b/src/routes/models/route.ts index 5254e2af..a3ce0110 100644 --- a/src/routes/models/route.ts +++ b/src/routes/models/route.ts @@ -3,6 +3,7 @@ import { Hono } from "hono" import { forwardError } from "~/lib/error" import { state } from "~/lib/state" import { cacheModels } from "~/lib/utils" +import modelConsumptionData from "~/lib/model-consumption.json" export const modelRoutes = new Hono() @@ -13,15 +14,39 @@ modelRoutes.get("/", async (c) => { await cacheModels() } - const models = state.models?.data.map((model) => ({ - id: model.id, - object: "model", - type: "model", - created: 0, // No date available from source - created_at: new Date(0).toISOString(), // No date available from source - owned_by: model.vendor, - display_name: model.name, - })) + // Create a map for quick consumption lookup + const consumptionMap = new Map( + modelConsumptionData.models.map((m) => [m.name, m.consumption]), + ) + + // Helper function to convert consumption string to number for sorting + const consumptionToNumber = (consumption: string): number => { + if (consumption === "N/A") return 999 // Put N/A at the end + const match = consumption.match(/^([\d.]+)x$/) + return match ? Number.parseFloat(match[1]) : 999 + } + + // Filter to only include models with context window information (Available models) + const models = state.models?.data + .filter((model) => { + const maxTokens = model.capabilities?.limits?.max_context_window_tokens + return typeof maxTokens === "number" + }) + .map((model) => ({ + model, + consumption: consumptionMap.get(model.name) || "N/A", + })) + .sort((a, b) => consumptionToNumber(a.consumption) - consumptionToNumber(b.consumption)) + .map((item) => ({ + id: item.model.id, + object: "model", + type: "model", + created: 0, // No date available from source + created_at: new Date(0).toISOString(), // No date available from source + owned_by: item.model.vendor, + display_name: item.model.name, + max_context_length: item.model.capabilities?.limits?.max_context_window_tokens, + })) return c.json({ object: "list", diff --git a/src/server.ts b/src/server.ts index 462a278f..ce897ed6 100644 --- a/src/server.ts +++ b/src/server.ts @@ -27,5 +27,10 @@ server.route("/v1/chat/completions", completionRoutes) server.route("/v1/models", modelRoutes) server.route("/v1/embeddings", embeddingRoutes) +// Compatibility with tools that expect api/v0/ prefix +server.route("/api/v0/models", modelRoutes) +server.route("/api/v0/chat/completions", completionRoutes) +server.route("/api/v0/embeddings", embeddingRoutes) + // Anthropic compatible endpoints server.route("/v1/messages", messageRoutes) diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151d..94966946 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -35,7 +35,29 @@ export const createChatCompletions = async ( }) if (!response.ok) { - consola.error("Failed to create chat completions", response) + const errorBody = await response.text() + consola.error(`Failed to create chat completions for model: ${payload.model}`) + consola.error(`Response status: ${response.status} ${response.statusText}`) + consola.error(`Response body: ${errorBody}`) + + // Try to parse error details + try { + const errorJson = JSON.parse(errorBody) + if (errorJson.error?.message) { + consola.error(`Error message: ${errorJson.error.message}`) + + // If model not supported, list available models + if (errorJson.error.code === "model_not_supported") { + const availableModels = state.models?.data + .filter((m) => typeof m.capabilities?.limits?.max_context_window_tokens === "number") + .map((m) => m.id) + consola.error(`Available models: ${availableModels?.join(", ")}`) + } + } + } catch { + // If parsing fails, we already logged the raw body + } + throw new HTTPError("Failed to create chat completions", response) } diff --git a/src/services/copilot/get-models.ts b/src/services/copilot/get-models.ts index 3cfa30af..42d96b3f 100644 --- a/src/services/copilot/get-models.ts +++ b/src/services/copilot/get-models.ts @@ -9,7 +9,12 @@ export const getModels = async () => { if (!response.ok) throw new HTTPError("Failed to get models", response) - return (await response.json()) as ModelsResponse + const result = await response.json() as ModelsResponse + result.data = result.data.filter( + (model: any) => + model.model_picker_category !== undefined && model.model_picker_enabled === true + ) + return result } export interface ModelsResponse { @@ -48,6 +53,7 @@ export interface Model { preview: boolean vendor: string version: string + model_picker_category?: string policy?: { state: string terms: string diff --git a/src/start.ts b/src/start.ts index 14abbbdf..38b31b1e 100644 --- a/src/start.ts +++ b/src/start.ts @@ -13,6 +13,7 @@ import { state } from "./lib/state" import { setupCopilotToken, setupGitHubToken } from "./lib/token" import { cacheModels, cacheVSCodeVersion } from "./lib/utils" import { server } from "./server" +import modelConsumptionData from "./lib/model-consumption.json" interface RunServerOptions { port: number @@ -60,8 +61,59 @@ export async function runServer(options: RunServerOptions): Promise { await setupCopilotToken() await cacheModels() + // consola.info( + // `Full Model Info:\n${ + // state.models?.data + // ?.filter(model => model.model_picker_enabled === true) + // .map(model => { + // const { + // capabilities, + // policy, + // vendor, + // preview, + // model_picker_enabled, + // object, + // ...rest } = model; + // let fullInfo = JSON.stringify(rest, null, 2); + // return `- ${model.id}\n${fullInfo}`; + // }).join("\n") + // }` + // ) + + // Create a map for quick consumption lookup + const consumptionMap = new Map( + modelConsumptionData.models.map(m => [m.name, m.consumption]) + ); + + // Helper function to convert consumption string to number for sorting + const consumptionToNumber = (consumption: string): number => { + if (consumption === "N/A") return 999; // Put N/A at the end + const match = consumption.match(/^([\d.]+)x$/); + return match ? Number.parseFloat(match[1]) : 999; + }; + consola.info( - `Available models: \n${state.models?.data.map((model) => `- ${model.id}`).join("\n")}`, + `Available models:\n${ + state.models?.data + .map(model => { + let maxTokens = model.capabilities?.limits?.max_context_window_tokens; + let maxTokensStr = "N/A"; + if (typeof maxTokens === "number") { + maxTokensStr = maxTokens >= 1000 ? `${maxTokens / 1000}k` : `${maxTokens}`; + } + const consumption = consumptionMap.get(model.name) || "N/A"; + return { model, maxTokensStr, consumption }; + }) + .filter(item => item.maxTokensStr !== "N/A") + .sort((a, b) => consumptionToNumber(a.consumption) - consumptionToNumber(b.consumption)) + .map((item, i) => { + const consumptionStr = `(${item.consumption})`.padEnd(8, " "); + const idStr = item.model.id.padEnd(24, " "); + const nameStr = item.model.name.padEnd(32, " "); + const contextStr = `context: ${item.maxTokensStr}`; + return `${i + 1}.\t${consumptionStr}${idStr}\t${nameStr}\t${contextStr}`; + }).join("\n") + }` ) const serverUrl = `http://localhost:${options.port}` diff --git a/tsconfig.json b/tsconfig.json index 8ff821ca..5f43fef1 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -6,6 +6,7 @@ "skipLibCheck": true, "allowJs": true, + "resolveJsonModule": true, "moduleResolution": "Bundler", "moduleDetection": "force", "erasableSyntaxOnly": true, From 2ff9a4b9989737781264f861e662ac1a68c34921 Mon Sep 17 00:00:00 2001 From: liutao <821580467@qq.com> Date: Wed, 12 Nov 2025 19:17:38 +0800 Subject: [PATCH 3/7] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E6=83=85=E5=86=B5=E5=88=B7=E6=96=B0=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=8C=E4=BC=98=E5=8C=96=E6=A8=A1=E5=9E=8B=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/model-matcher.ts | 79 ++++++++++++++++++++++++-- src/lib/refresh-usage.ts | 69 ++++++++++++++++++++++ src/lib/state.ts | 4 ++ src/routes/chat-completions/handler.ts | 4 ++ src/routes/messages/handler.ts | 4 ++ src/start.ts | 13 +++++ 6 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 src/lib/refresh-usage.ts diff --git a/src/lib/model-matcher.ts b/src/lib/model-matcher.ts index 0084906a..795a2d6e 100644 --- a/src/lib/model-matcher.ts +++ b/src/lib/model-matcher.ts @@ -1,11 +1,54 @@ import consola from "consola" +import modelConsumptionData from "./model-consumption.json" import { state } from "./state" +/** + * Get model consumption value + */ +function getModelConsumption(modelName: string): number { + const consumptionMap = new Map( + modelConsumptionData.models.map((m) => [m.name, m.consumption]), + ) + const consumption = consumptionMap.get(modelName) || "N/A" + + if (consumption === "N/A") return 999 + const match = consumption.match(/^([\d.]+)x$/) + return match ? Number.parseFloat(match[1]) : 999 +} + +/** + * Check if premium interactions usage is high (>50%) + */ +function isPremiumUsageHigh(): boolean { + if (!state.premiumInteractions) { + return false + } + + const usagePercent = 100 - state.premiumInteractions.percent_remaining + return usagePercent > 50 +} + +/** + * Get all 0x consumption models + */ +function getZeroConsumptionModels(): string[] { + const availableModels = state.models?.data.filter( + (m) => typeof m.capabilities?.limits?.max_context_window_tokens === "number", + ) + + if (!availableModels) return [] + + return availableModels + .filter((m) => getModelConsumption(m.name) === 0) + .map((m) => m.id) +} + /** * Find a matching model from available models * If exact match exists, return it * If no exact match, try to find by prefix (e.g., claude-haiku-4-5-xxx -> claude-haiku-4.5) + * If premium usage >50%, only match to 0x consumption models */ export function findMatchingModel(requestedModel: string): string | null { const availableModels = state.models?.data.filter( @@ -16,16 +59,34 @@ export function findMatchingModel(requestedModel: string): string | null { return null } - const availableModelIds = availableModels.map((m) => m.id) + const highUsage = isPremiumUsageHigh() + const zeroConsumptionModels = highUsage ? getZeroConsumptionModels() : [] + const allAvailableModelIds = availableModels.map((m) => m.id) consola.debug(`Looking for match for: ${requestedModel}`) - consola.debug(`Available models: ${availableModelIds.join(", ")}`) - - // Try exact match first - if (availableModelIds.includes(requestedModel)) { + consola.debug(`All available models: ${allAvailableModelIds.join(", ")}`) + + // Try exact match first (always allow exact match, even if high usage) + if (allAvailableModelIds.includes(requestedModel)) { + // If high usage and model is not 0x, warn but still allow + if (highUsage && !zeroConsumptionModels.includes(requestedModel)) { + consola.warn( + `⚠️ Premium usage >50%, but exact match found: ${requestedModel}`, + ) + } return requestedModel } + // For fuzzy matching when usage is high, only consider 0x models + let availableModelIds = allAvailableModelIds + if (highUsage && zeroConsumptionModels.length > 0) { + consola.info( + `⚠️ Premium usage >50%, restricting fuzzy matching to 0x consumption models`, + ) + availableModelIds = zeroConsumptionModels + consola.debug(`0x models for matching: ${availableModelIds.join(", ")}`) + } + // Normalize the requested model // 1. Replace underscores with hyphens // 2. Remove date suffix (8 digits at the end) @@ -84,6 +145,14 @@ export function findMatchingModel(requestedModel: string): string | null { } } + // Fallback: if high usage and no match found, use first 0x model + if (highUsage && zeroConsumptionModels.length > 0) { + consola.warn( + `⚠️ No matching 0x model found, falling back to: ${zeroConsumptionModels[0]}`, + ) + return zeroConsumptionModels[0] + } + consola.debug(`No match found for: ${requestedModel}`) return null } diff --git a/src/lib/refresh-usage.ts b/src/lib/refresh-usage.ts new file mode 100644 index 00000000..eef4fddc --- /dev/null +++ b/src/lib/refresh-usage.ts @@ -0,0 +1,69 @@ +import consola from "consola" + +import { state } from "./state" +import { getCopilotUsage } from "~/services/github/get-copilot-usage" + +// Cache configuration +const USAGE_CACHE_TTL_MS = 60 * 1000 // 1 minute cache +let lastUsageFetchTime = 0 +let isFetching = false + +/** + * Refresh premium interactions usage information + * with cache to avoid excessive API calls + */ +export async function refreshUsage(): Promise { + const now = Date.now() + + // Check if cache is still valid + if (now - lastUsageFetchTime < USAGE_CACHE_TTL_MS) { + consola.debug( + `Using cached usage info (cached ${Math.floor((now - lastUsageFetchTime) / 1000)}s ago)`, + ) + return + } + + // Prevent concurrent fetches + if (isFetching) { + consola.debug("Usage fetch already in progress, skipping") + return + } + + try { + isFetching = true + consola.debug("Fetching latest usage information...") + + const usage = await getCopilotUsage() + state.premiumInteractions = usage.quota_snapshots.premium_interactions + + lastUsageFetchTime = now + + const usagePercent = 100 - state.premiumInteractions.percent_remaining + consola.debug( + `✓ Usage refreshed: ${usagePercent.toFixed(1)}% (${state.premiumInteractions.remaining}/${state.premiumInteractions.entitlement} remaining)`, + ) + } catch (error) { + consola.warn("Failed to refresh usage information:", error) + // Continue with existing state - don't block the main flow + } finally { + isFetching = false + } +} + +/** + * Force refresh usage (bypass cache) + */ +export async function forceRefreshUsage(): Promise { + lastUsageFetchTime = 0 + await refreshUsage() +} + +/** + * Get current usage percentage + */ +export function getCurrentUsagePercent(): number | null { + if (!state.premiumInteractions) { + return null + } + return 100 - state.premiumInteractions.percent_remaining +} diff --git a/src/lib/state.ts b/src/lib/state.ts index 5ba4dc1d..b38f83f2 100644 --- a/src/lib/state.ts +++ b/src/lib/state.ts @@ -1,4 +1,5 @@ import type { ModelsResponse } from "~/services/copilot/get-models" +import type { QuotaDetail } from "~/services/github/get-copilot-usage" export interface State { githubToken?: string @@ -15,6 +16,9 @@ export interface State { // Rate limiting configuration rateLimitSeconds?: number lastRequestTimestamp?: number + + // Usage information + premiumInteractions?: QuotaDetail } export const state: State = { diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index faa67791..b503ae5d 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -6,6 +6,7 @@ import { streamSSE, type SSEMessage } from "hono/streaming" import { awaitApproval } from "~/lib/approval" import { validateAndReplaceModel } from "~/lib/model-matcher" import { checkRateLimit } from "~/lib/rate-limit" +import { refreshUsage } from "~/lib/refresh-usage" import { state } from "~/lib/state" import { getTokenCount } from "~/lib/tokenizer" import { isNullish } from "~/lib/utils" @@ -16,6 +17,9 @@ import { } from "~/services/copilot/create-chat-completions" export async function handleCompletion(c: Context) { + // Refresh usage information before processing request + await refreshUsage() + await checkRateLimit(state) let payload = await c.req.json() diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 19c61a47..594aa094 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -6,6 +6,7 @@ import { streamSSE } from "hono/streaming" import { awaitApproval } from "~/lib/approval" import { validateAndReplaceModel } from "~/lib/model-matcher" import { checkRateLimit } from "~/lib/rate-limit" +import { refreshUsage } from "~/lib/refresh-usage" import { state } from "~/lib/state" import { createChatCompletions, @@ -24,6 +25,9 @@ import { import { translateChunkToAnthropicEvents } from "./stream-translation" export async function handleCompletion(c: Context) { + // Refresh usage information before processing request + await refreshUsage() + await checkRateLimit(state) const anthropicPayload = await c.req.json() diff --git a/src/start.ts b/src/start.ts index 38b31b1e..bc03521b 100644 --- a/src/start.ts +++ b/src/start.ts @@ -61,6 +61,19 @@ export async function runServer(options: RunServerOptions): Promise { await setupCopilotToken() await cacheModels() + // Initial fetch of usage information for premium interactions tracking + const { forceRefreshUsage, getCurrentUsagePercent } = await import( + "./lib/refresh-usage" + ) + await forceRefreshUsage() + + const usagePercent = getCurrentUsagePercent() + if (usagePercent !== null && state.premiumInteractions) { + consola.info( + `Premium interactions usage: ${usagePercent.toFixed(1)}% (${state.premiumInteractions.remaining}/${state.premiumInteractions.entitlement} remaining)`, + ) + } + // consola.info( // `Full Model Info:\n${ // state.models?.data From a4bfefa0ce795ca4bd8a327e5c182ce2b8257542 Mon Sep 17 00:00:00 2001 From: liutao <821580467@qq.com> Date: Fri, 14 Nov 2025 14:24:27 +0800 Subject: [PATCH 4/7] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E6=B6=88=E8=80=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/model-consumption.json | 59 +++++++++------------------------- 1 file changed, 16 insertions(+), 43 deletions(-) diff --git a/src/lib/model-consumption.json b/src/lib/model-consumption.json index 3db475ee..ed730485 100644 --- a/src/lib/model-consumption.json +++ b/src/lib/model-consumption.json @@ -1,44 +1,17 @@ { - "models": [ - { - "name": "GPT-4.1", - "consumption": "0x" - }, - { - "name": "GPT-4o", - "consumption": "0x" - }, - { - "name": "GPT-5 mini", - "consumption": "0x" - }, - { - "name": "Grok Code Fast 1", - "consumption": "0x" - }, - { - "name": "Claude Haiku 4.5", - "consumption": "0.33x" - }, - { - "name": "Claude Sonnet 4", - "consumption": "1x" - }, - { - "name": "Claude Sonnet 4.5", - "consumption": "1x" - }, - { - "name": "Gemini 2.5 Pro", - "consumption": "1x" - }, - { - "name": "GPT-5", - "consumption": "1x" - }, - { - "name": "GPT-5-Codex (Preview)", - "consumption": "1x" - } - ] -} +"models": [ +{"name": "GPT-4.1", "consumption": "0x"}, +{"name": "GPT-4o", "consumption": "0x"}, +{"name": "GPT-5 mini", "consumption": "0x"}, +{"name": "Grok Code Fast 1", "consumption": "0x"}, +{"name": "Claude Haiku 4.5", "consumption": "0.33x"}, +{"name": "Claude Sonnet 4", "consumption": "1x"}, +{"name": "Claude Sonnet 4.5", "consumption": "1x"}, +{"name": "Gemini 2.5 Pro", "consumption": "1x"}, +{"name": "GPT-5", "consumption": "1x"}, +{"name": "GPT-5-Codex (Preview)", "consumption": "1x"}, +{"name": "GPT-5.1", "consumption": "1x"}, +{"name": "GPT-5.1-Codex", "consumption": "1x"}, +{"name": "GPT-5.1-Codex-Mini", "consumption": "0.33x"} +] +} \ No newline at end of file From f4e1eb88b15f9bacbc3f3cbc62c8bb97460704bb Mon Sep 17 00:00:00 2001 From: liutao <821580467@qq.com> Date: Fri, 14 Nov 2025 22:17:30 +0800 Subject: [PATCH 5/7] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E7=B2=BE=E7=A1=AE?= =?UTF-8?q?=E5=8C=B9=E9=85=8D=E5=8F=AF=E4=BB=A5=E8=B7=B3=E8=BF=87=E9=99=90?= =?UTF-8?q?=E5=88=B6=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/model-matcher.ts | 167 +++++++++++++++++++++++++++------------ 1 file changed, 118 insertions(+), 49 deletions(-) diff --git a/src/lib/model-matcher.ts b/src/lib/model-matcher.ts index 795a2d6e..ebf8894f 100644 --- a/src/lib/model-matcher.ts +++ b/src/lib/model-matcher.ts @@ -4,162 +4,225 @@ import modelConsumptionData from "./model-consumption.json" import { state } from "./state" /** - * Get model consumption value + * 获取模型消耗值 + * 从配置文件中查询指定模型的消耗系数 + * + * @param modelName - 模型名称 + * @returns 消耗系数(如 1.0, 2.0 等),未找到或解析失败返回 999 + * + * @example + * getModelConsumption("claude-3.5-sonnet") // 返回 1.0 + * getModelConsumption("gpt-4") // 返回 2.0 + * getModelConsumption("unknown-model") // 返回 999 */ function getModelConsumption(modelName: string): number { + // 将模型消耗数据转换为 Map 结构,方便快速查询 const consumptionMap = new Map( modelConsumptionData.models.map((m) => [m.name, m.consumption]), ) + + // 获取消耗值,未找到则返回 "N/A" const consumption = consumptionMap.get(modelName) || "N/A" + // 如果模型不在配置中,返回最大值 999 if (consumption === "N/A") return 999 + + // 解析消耗值(格式如 "1.0x", "2.5x") const match = consumption.match(/^([\d.]+)x$/) return match ? Number.parseFloat(match[1]) : 999 } /** - * Check if premium interactions usage is high (>50%) + * 检查高级交互配额使用率是否过高(超过 50%) + * + * 当高级模型使用率超过 50% 时,系统会倾向于使用 0x 消耗的模型, + * 以避免快速耗尽配额 + * + * @returns true 表示使用率 >50%,false 表示使用率 ≤50% 或未初始化 */ function isPremiumUsageHigh(): boolean { + // 如果没有高级交互数据,认为使用率不高 if (!state.premiumInteractions) { return false } + // 计算已使用百分比:100% - 剩余百分比 = 已使用百分比 const usagePercent = 100 - state.premiumInteractions.percent_remaining return usagePercent > 50 } /** - * Get all 0x consumption models + * 获取所有 0x 消耗的模型列表 + * + * 这些模型不计入高级交互配额,可以在配额紧张时优先使用 + * + * @returns 0x 消耗模型的 ID 数组 */ function getZeroConsumptionModels(): string[] { + // 筛选出有效的可用模型(必须有上下文窗口限制配置) const availableModels = state.models?.data.filter( (m) => typeof m.capabilities?.limits?.max_context_window_tokens === "number", ) if (!availableModels) return [] + // 过滤出消耗值为 0 的模型,返回其 ID return availableModels .filter((m) => getModelConsumption(m.name) === 0) .map((m) => m.id) } /** - * Find a matching model from available models - * If exact match exists, return it - * If no exact match, try to find by prefix (e.g., claude-haiku-4-5-xxx -> claude-haiku-4.5) - * If premium usage >50%, only match to 0x consumption models + * 从可用模型中查找匹配的模型 + * + * 匹配策略: + * 1. 标准化匹配:标准化后的模型名称匹配(下划线转连字符、版本号格式化) + * 2. 前缀匹配:模型名称前缀匹配 + * 3. 基础名称匹配:忽略版本号后的基础名称匹配 + * + * 配额保护: + * - 当高级交互使用率 >50% 时,所有匹配仅限于 0x 消耗模型 + * - 无匹配时降级到第一个 0x 模型 + * + * @param requestedModel - 请求的模型标识符 + * @returns 匹配的模型 ID,未找到返回 null + * + * @example + * findMatchingModel("claude-3-5-sonnet") // 返回 "claude-3.5-sonnet" + * findMatchingModel("gpt-4-20240101") // 返回 "gpt-4" */ export function findMatchingModel(requestedModel: string): string | null { + // 获取所有有效的可用模型(必须配置了上下文窗口限制) const availableModels = state.models?.data.filter( (m) => typeof m.capabilities?.limits?.max_context_window_tokens === "number", ) + // 如果没有可用模型,直接返回 null if (!availableModels || availableModels.length === 0) { return null } + // 检查是否处于高使用率状态 const highUsage = isPremiumUsageHigh() + + // 如果使用率高,获取 0x 消耗模型列表用于限制匹配范围 const zeroConsumptionModels = highUsage ? getZeroConsumptionModels() : [] + + // 提取所有可用模型的 ID const allAvailableModelIds = availableModels.map((m) => m.id) - consola.debug(`Looking for match for: ${requestedModel}`) - consola.debug(`All available models: ${allAvailableModelIds.join(", ")}`) + consola.debug(`正在查找匹配模型:${requestedModel}`) + consola.debug(`所有可用模型:${allAvailableModelIds.join(", ")}`) - // Try exact match first (always allow exact match, even if high usage) - if (allAvailableModelIds.includes(requestedModel)) { - // If high usage and model is not 0x, warn but still allow - if (highUsage && !zeroConsumptionModels.includes(requestedModel)) { - consola.warn( - `⚠️ Premium usage >50%, but exact match found: ${requestedModel}`, - ) - } - return requestedModel - } - - // For fuzzy matching when usage is high, only consider 0x models + // ========== 配额保护:高使用率时限制模糊匹配范围 ========== let availableModelIds = allAvailableModelIds if (highUsage && zeroConsumptionModels.length > 0) { consola.info( - `⚠️ Premium usage >50%, restricting fuzzy matching to 0x consumption models`, + `⚠️ 高级交互使用率 >50%,模糊匹配仅限 0x 消耗模型`, ) availableModelIds = zeroConsumptionModels - consola.debug(`0x models for matching: ${availableModelIds.join(", ")}`) + consola.debug(`用于匹配的 0x 模型:${availableModelIds.join(", ")}`) } - // Normalize the requested model - // 1. Replace underscores with hyphens - // 2. Remove date suffix (8 digits at the end) - // 3. Replace version numbers: 4-5 -> 4.5 + // ========== 标准化处理:统一模型名称格式 ========== + // 1. 转换为小写 + // 2. 下划线转连字符(claude_3_5 -> claude-3-5) + // 3. 移除日期后缀(-20251001 等 8 位数字) + // 4. 版本号格式化(4-5 -> 4.5) let normalizedRequested = requestedModel .toLowerCase() - .replace(/_/g, "-") - .replace(/-(\d{8})$/, "") // Remove -20251001 style suffix - .replace(/(\d)-(\d)/g, "$1.$2") // Replace 4-5 with 4.5 + .replace(/_/g, "-") // 下划线转连字符 + .replace(/-(\d{8})$/, "") // 移除 -20251001 风格的日期后缀 + .replace(/(\d)-(\d)/g, "$1.$2") // 版本号:4-5 -> 4.5 - consola.debug(`Normalized requested: ${normalizedRequested}`) + consola.debug(`标准化后的请求模型:${normalizedRequested}`) - // Try exact match after normalization + // ========== 策略 1:标准化后精确匹配 ========== for (const availableId of availableModelIds) { if (availableId.toLowerCase() === normalizedRequested) { consola.info( - `🔄 Model normalized match: '${requestedModel}' -> '${availableId}'`, + `🔄 标准化匹配成功:'${requestedModel}' -> '${availableId}'`, ) return availableId } } - // Try prefix matching + // ========== 策略 2:前缀匹配 ========== + // 检查请求的模型和可用模型是否有前缀关系 + // 例如:claude-3.5 可以匹配 claude-3.5-sonnet-20241022 for (const availableId of availableModelIds) { const normalizedAvailable = availableId.toLowerCase() - // Check if they start with each other + // 双向前缀检查:请求模型是可用模型的前缀,或可用模型是请求模型的前缀 if ( normalizedAvailable.startsWith(normalizedRequested) || normalizedRequested.startsWith(normalizedAvailable) ) { consola.info( - `🔄 Model prefix match: '${requestedModel}' -> '${availableId}'`, + `🔄 前缀匹配成功:'${requestedModel}' -> '${availableId}'`, ) return availableId } } - // Try fuzzy matching by comparing main parts + // ========== 策略 3:基础名称匹配(忽略版本号) ========== + // 将模型名称按 "-" 分割,比较除最后一部分外的所有部分 + // 例如:claude-3-5-sonnet-v2 和 claude-3-5-sonnet-v1 的基础名称都是 claude-3-5-sonnet const requestedParts = normalizedRequested.split("-") for (const availableId of availableModelIds) { const normalizedAvailable = availableId.toLowerCase() const availableParts = normalizedAvailable.split("-") - // Match by comparing first N-1 parts (everything except version) + // 只对至少有 3 个部分的模型名称进行基础匹配(避免过于宽泛) if (requestedParts.length >= 3 && availableParts.length >= 3) { + // 提取基础名称(去掉最后一个部分,通常是版本号或日期) const requestedBase = requestedParts.slice(0, -1).join("-") const availableBase = availableParts.slice(0, -1).join("-") if (requestedBase === availableBase) { consola.info( - `🔄 Model base match: '${requestedModel}' -> '${availableId}'`, + `🔄 基础名称匹配成功:'${requestedModel}' -> '${availableId}'`, ) return availableId } } } - // Fallback: if high usage and no match found, use first 0x model + // ========== 降级策略:使用率高时降级到第一个 0x 模型 ========== if (highUsage && zeroConsumptionModels.length > 0) { consola.warn( - `⚠️ No matching 0x model found, falling back to: ${zeroConsumptionModels[0]}`, + `⚠️ 未找到匹配的 0x 模型,降级到:${zeroConsumptionModels[0]}`, ) return zeroConsumptionModels[0] } - consola.debug(`No match found for: ${requestedModel}`) + // 所有策略都失败,返回 null + consola.debug(`未找到匹配模型:${requestedModel}`) return null } /** - * Validate and potentially replace the requested model - * Returns the validated model ID or throws/returns error info + * 验证并替换请求的模型 + * + * 该函数是模型匹配的主要入口点,负责: + * 1. 调用 findMatchingModel 查找匹配的模型 + * 2. 验证模型是否可用 + * 3. 返回验证结果或错误信息 + * + * @param requestedModel - 用户请求的模型标识符 + * @returns 包含验证结果的对象 + * - success: true 表示验证成功,false 表示失败 + * - model: 匹配的模型 ID(成功时) + * - error: 错误详情(失败时) + * + * @example + * // 成功匹配 + * validateAndReplaceModel("claude-3-5-sonnet") + * // 返回:{ success: true, model: "claude-3.5-sonnet" } + * + * // 匹配失败 + * validateAndReplaceModel("unknown-model") + * // 返回:{ success: false, error: { ... } } */ export function validateAndReplaceModel(requestedModel: string): { success: boolean @@ -171,21 +234,24 @@ export function validateAndReplaceModel(requestedModel: string): { type: string } } { + // 获取所有有效的可用模型列表 const availableModels = state.models?.data.filter( (m) => typeof m.capabilities?.limits?.max_context_window_tokens === "number", ) const availableModelIds = availableModels?.map((m) => m.id) || [] + // 尝试查找匹配的模型 const matchedModel = findMatchingModel(requestedModel) + // ========== 验证失败:未找到匹配的模型 ========== if (!matchedModel) { - consola.error(`❌ Model not available: ${requestedModel}`) - consola.error(`Available models: ${availableModelIds.join(", ")}`) + consola.error(`❌ 模型不可用:${requestedModel}`) + consola.error(`可用模型列表:${availableModelIds.join(", ")}`) return { success: false, error: { - message: `The requested model '${requestedModel}' is not supported. Available models: ${availableModelIds.join(", ")}`, + message: `请求的模型 '${requestedModel}' 不受支持。可用模型:${availableModelIds.join(", ")}`, code: "model_not_supported", param: "model", type: "invalid_request_error", @@ -193,12 +259,15 @@ export function validateAndReplaceModel(requestedModel: string): { } } + // ========== 验证成功:记录结果 ========== if (matchedModel !== requestedModel) { + // 模型被替换(通过模糊匹配找到) consola.success( - `✓ Model matched and replaced: ${requestedModel} -> ${matchedModel}`, + `✓ 模型匹配并替换:${requestedModel} -> ${matchedModel}`, ) } else { - consola.success(`✓ Model validated: ${matchedModel}`) + // 精确匹配 + consola.success(`✓ 模型验证通过:${matchedModel}`) } return { From 62a73b70178df06d098731a845db2525b861efb8 Mon Sep 17 00:00:00 2001 From: liutao <821580467@qq.com> Date: Mon, 24 Nov 2025 19:32:16 +0800 Subject: [PATCH 6/7] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=B6=88=E8=80=97?= =?UTF-8?q?=E4=BF=A1=E6=81=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/model-consumption.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib/model-consumption.json b/src/lib/model-consumption.json index ed730485..cb1c570f 100644 --- a/src/lib/model-consumption.json +++ b/src/lib/model-consumption.json @@ -12,6 +12,8 @@ {"name": "GPT-5-Codex (Preview)", "consumption": "1x"}, {"name": "GPT-5.1", "consumption": "1x"}, {"name": "GPT-5.1-Codex", "consumption": "1x"}, -{"name": "GPT-5.1-Codex-Mini", "consumption": "0.33x"} +{"name": "GPT-5.1-Codex-Mini", "consumption": "0.33x"}, +{"name": "Gemini 3 Pro (Preview)", "consumption": "1x"}, +{"name": "Raptor mini (Preview)", "consumption": "0x"} ] } \ No newline at end of file From 61326bde5ac27a8ba85d976bdab92fff764ade8f Mon Sep 17 00:00:00 2001 From: liutao <821580467@qq.com> Date: Fri, 5 Dec 2025 19:41:54 +0800 Subject: [PATCH 7/7] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E6=B6=88=E8=80=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lib/model-consumption.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib/model-consumption.json b/src/lib/model-consumption.json index cb1c570f..98a406d1 100644 --- a/src/lib/model-consumption.json +++ b/src/lib/model-consumption.json @@ -14,6 +14,8 @@ {"name": "GPT-5.1-Codex", "consumption": "1x"}, {"name": "GPT-5.1-Codex-Mini", "consumption": "0.33x"}, {"name": "Gemini 3 Pro (Preview)", "consumption": "1x"}, -{"name": "Raptor mini (Preview)", "consumption": "0x"} +{"name": "Raptor mini (Preview)", "consumption": "0x"}, +{"name": "GPT-5.1-Codex-Max (Preview)", "consumption": "1x"}, +{"name": "Claude Opus 4.5 (Preview)", "consumption": "1x"} ] } \ No newline at end of file