From 9738d78f4f7be2e56e486ef10e3e52910d5fff61 Mon Sep 17 00:00:00 2001 From: Jacob Chung Date: Wed, 26 Nov 2025 12:29:29 -0800 Subject: [PATCH 1/3] feat: use dynamic token limits from listAvailableModels API --- .../agenticChat/agenticChatController.test.ts | 152 ++++++++++++++---- .../agenticChat/agenticChatController.ts | 88 +++++++--- .../agenticChat/constants/constants.ts | 7 - .../agenticChat/constants/modelSelection.ts | 7 +- .../utils/tokenLimitsCalculator.test.ts | 150 +++++++++++++++++ .../utils/tokenLimitsCalculator.ts | 64 ++++++++ .../chat/chatSessionService.ts | 20 +++ 7 files changed, 428 insertions(+), 60 deletions(-) create mode 100644 server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts create mode 100644 server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts index f136047a7c..6004826534 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts @@ -35,6 +35,7 @@ import { ChatUpdateParams, ConnectionMetadata, } from '@aws/language-server-runtimes/server-interface' +import { Model } from '@aws/language-server-runtimes/protocol' import { TestFeatures } from '@aws/language-server-runtimes/testing' import * as assert from 'assert' import { createIterableResponse, setCredentialsForAmazonQTokenServiceManagerFactory } from '../../shared/testUtils' @@ -58,7 +59,8 @@ import { LocalProjectContextController } from '../../shared/localProjectContextC import { CancellationError } from '@aws/lsp-core' import { ToolApprovalException } from './tools/toolShared' import * as constants from './constants/constants' -import { GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT, GENERIC_ERROR_MS } from './constants/constants' +import { GENERIC_ERROR_MS } from './constants/constants' +import { TokenLimitsCalculator } from './utils/tokenLimitsCalculator' import { MISSING_BEARER_TOKEN_ERROR } from '../../shared/constants' import { AmazonQError, @@ -1194,8 +1196,9 @@ describe('AgenticChatController', () => { assert.strictEqual(typedChatResult.body, errorMsg) }) - it('truncate input to 500k character ', async function () { - const input = 'X'.repeat(GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT + 10) + it('truncate input to dynamic input limit', async function () { + const defaultLimits = TokenLimitsCalculator.calculate() + const input = 'X'.repeat(defaultLimits.inputLimit + 10) generateAssistantResponseStub.restore() generateAssistantResponseStub = sinon.stub(CodeWhispererStreaming.prototype, 'generateAssistantResponse') generateAssistantResponseStub.callsFake(() => {}) @@ -1205,7 +1208,7 @@ describe('AgenticChatController', () => { generateAssistantResponseStub.firstCall.firstArg assert.deepStrictEqual( calledRequestInput.conversationState?.currentMessage?.userInputMessage?.content?.length, - GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT + defaultLimits.inputLimit ) }) it('shows generic errorMsg on internal errors', async function () { @@ -1576,23 +1579,27 @@ describe('AgenticChatController', () => { }) }) describe('truncateRequest', () => { + // Use dynamic input limit from TokenLimitsCalculator for all truncation tests + const defaultLimits = TokenLimitsCalculator.calculate() + const inputLimit = defaultLimits.inputLimit // 490_000 for default 200K tokens + it('should truncate user input message if exceeds limit', () => { const request: GenerateAssistantResponseCommandInput = { conversationState: { currentMessage: { userInputMessage: { - content: 'a'.repeat(590_000), + content: 'a'.repeat(inputLimit + 100_000), userInputMessageContext: { editorState: { relevantDocuments: [ { relativeFilePath: '', - text: 'a'.repeat(490_000), + text: 'a'.repeat(inputLimit - 10_000), }, ], document: { relativeFilePath: '', - text: 'a'.repeat(490_000), + text: 'a'.repeat(inputLimit - 10_000), }, }, }, @@ -1601,7 +1608,7 @@ describe('AgenticChatController', () => { history: [ { userInputMessage: { - content: 'a'.repeat(490_000), + content: 'a'.repeat(inputLimit - 10_000), }, }, ], @@ -1609,7 +1616,7 @@ describe('AgenticChatController', () => { }, } const result = chatController.truncateRequest(request) - assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, 500_000) + assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, inputLimit) assert.strictEqual( request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState ?.document?.text?.length || 0, @@ -1641,11 +1648,13 @@ describe('AgenticChatController', () => { }) it('should truncate relevant documents if combined length exceeds remaining budget', () => { + // Use content that leaves room for some docs but not all + const contentLength = 400_000 const request: GenerateAssistantResponseCommandInput = { conversationState: { currentMessage: { userInputMessage: { - content: 'a'.repeat(400_000), + content: 'a'.repeat(contentLength), userInputMessageContext: { editorState: { relevantDocuments: [ @@ -1664,7 +1673,7 @@ describe('AgenticChatController', () => { ], document: { relativeFilePath: '', - text: 'a'.repeat(490_000), + text: 'a'.repeat(inputLimit - 10_000), }, }, }, @@ -1673,7 +1682,7 @@ describe('AgenticChatController', () => { history: [ { userInputMessage: { - content: 'a'.repeat(490_000), + content: 'a'.repeat(inputLimit - 10_000), }, }, ], @@ -1681,7 +1690,10 @@ describe('AgenticChatController', () => { }, } const result = chatController.truncateRequest(request) - assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, 400_000) + assert.strictEqual( + request.conversationState?.currentMessage?.userInputMessage?.content?.length, + contentLength + ) assert.strictEqual( request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState ?.document?.text?.length || 0, @@ -1693,7 +1705,8 @@ describe('AgenticChatController', () => { 2 ) assert.strictEqual(request.conversationState?.history?.length || 0, 1) - assert.strictEqual(result, 99700) + // Remaining budget = inputLimit - contentLength - 100 - 200 = 490_000 - 400_000 - 300 = 89_700 + assert.strictEqual(result, inputLimit - contentLength - 100 - 200) }) it('should truncate current editor if combined length exceeds remaining budget', () => { const request: GenerateAssistantResponseCommandInput = { @@ -1756,26 +1769,30 @@ describe('AgenticChatController', () => { assert.strictEqual(request.conversationState?.history?.length || 0, 3) }) it('should return remaining budget for history', () => { + const contentLength = 100_000 + const docLength = 100_000 + const relevantDoc1Length = 1000 + const relevantDoc2Length = 1000 const request: GenerateAssistantResponseCommandInput = { conversationState: { currentMessage: { userInputMessage: { - content: 'a'.repeat(100_000), + content: 'a'.repeat(contentLength), userInputMessageContext: { editorState: { relevantDocuments: [ { relativeFilePath: '', - text: 'a'.repeat(1000), + text: 'a'.repeat(relevantDoc1Length), }, { relativeFilePath: '', - text: 'a'.repeat(1000), + text: 'a'.repeat(relevantDoc2Length), }, ], document: { relativeFilePath: '', - text: 'a'.repeat(100_000), + text: 'a'.repeat(docLength), }, }, }, @@ -1802,11 +1819,14 @@ describe('AgenticChatController', () => { }, } const result = chatController.truncateRequest(request) - assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, 100_000) + assert.strictEqual( + request.conversationState?.currentMessage?.userInputMessage?.content?.length, + contentLength + ) assert.strictEqual( request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState ?.document?.text?.length || 0, - 100_000 + docLength ) assert.strictEqual( request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState @@ -1814,15 +1834,20 @@ describe('AgenticChatController', () => { 2 ) assert.strictEqual(request.conversationState?.history?.length || 0, 3) - assert.strictEqual(result, 298000) + // Remaining budget = inputLimit - contentLength - relevantDoc1Length - relevantDoc2Length - docLength + // = 490_000 - 100_000 - 1000 - 1000 - 100_000 = 288_000 + assert.strictEqual(result, inputLimit - contentLength - relevantDoc1Length - relevantDoc2Length - docLength) }) it('should truncate images when they exceed budget', () => { + // Content that leaves small room for images + const contentLength = inputLimit - 6_600 // Leave room for small images but not large one + const smallImageChars = 3.3 // 1000 bytes * 3.3 / 1000 const request: GenerateAssistantResponseCommandInput = { conversationState: { currentMessage: { userInputMessage: { - content: 'a'.repeat(493_400), + content: 'a'.repeat(contentLength), images: [ { format: 'png', @@ -1852,15 +1877,17 @@ describe('AgenticChatController', () => { // Should only keep the first and third images (small ones) assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.images?.length, 2) - assert.strictEqual(result, 500000 - 493400 - 3.3 - 3.3) // remaining budget after content and images + assert.strictEqual(result, inputLimit - contentLength - smallImageChars - smallImageChars) // remaining budget after content and images }) it('should handle images without bytes', () => { + const contentLength = 400_000 + const smallImageChars = 3.3 // 1000 bytes * 3.3 / 1000 const request: GenerateAssistantResponseCommandInput = { conversationState: { currentMessage: { userInputMessage: { - content: 'a'.repeat(400_000), + content: 'a'.repeat(contentLength), images: [ { format: 'png', @@ -1884,7 +1911,7 @@ describe('AgenticChatController', () => { // Should keep both images since the first one has 0 chars assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.images?.length, 2) - assert.strictEqual(result, 500000 - 400000 - 3.3) // remaining budget after content and second image + assert.strictEqual(result, inputLimit - contentLength - smallImageChars) // remaining budget after content and second image }) it('should truncate relevantDocuments and images together with equal priority', () => { @@ -1925,7 +1952,7 @@ describe('AgenticChatController', () => { 1 ) assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.images?.length, 1) - assert.strictEqual(result, 500000 - 400000 - 100 - 3.3) + assert.strictEqual(result, inputLimit - 400000 - 100 - 3.3) }) it('should respect additionalContext order for mixed file and image truncation', () => { @@ -2048,8 +2075,8 @@ describe('AgenticChatController', () => { assert.strictEqual(keptDoc?.relativeFilePath, 'file1.ts') // docs[0] assert.strictEqual(keptDoc?.text, 'a'.repeat(30_000)) - // Remaining budget should be 20.5k (100k - 33k - 30k - 16.5k) - assert.strictEqual(result, 500000 - 400000 - 33000 - 30000 - 16500) + // Remaining budget = inputLimit - 400000 - 33000 - 30000 - 16500 + assert.strictEqual(result, inputLimit - 400000 - 33000 - 30000 - 16500) }) }) @@ -3001,6 +3028,59 @@ ${' '.repeat(8)}} setModelIdStub.restore() }) + + it('should recalculate token limits when model changes', () => { + const mockTabId = 'tab-1' + const initialModelId = 'model-1' + const newModelId = 'model-2' + const setModelIdStub = sinon.stub(ChatDatabase.prototype, 'setModelId') + + // Mock getCachedModels to return models with different token limits + const cachedModels: Model[] = [ + { + id: 'model-1', + name: 'Model 1', + description: 'Test', + tokenLimits: { maxInputTokens: 200000 }, + }, + { + id: 'model-2', + name: 'Model 2', + description: 'Test', + tokenLimits: { maxInputTokens: 300000 }, + }, + ] + const getCachedModelsStub = sinon.stub(ChatDatabase.prototype, 'getCachedModels').returns({ + models: cachedModels, + defaultModelId: 'model-1', + timestamp: Date.now(), + }) + + // Create a session and set initial model + chatController.onTabAdd({ tabId: mockTabId }) + const session = chatSessionManagementService.getSession(mockTabId).data! + session.modelId = initialModelId + + // Get initial token limits (default 200K) + const initialLimits = session.tokenLimits + assert.strictEqual(initialLimits.maxInputTokens, 200000) + + // Switch to a model with different token limits + chatController.onPromptInputOptionChange({ + tabId: mockTabId, + optionsValues: { 'model-selection': newModelId }, + }) + + // Verify token limits were recalculated based on new model's maxInputTokens (300K) + const newLimits = session.tokenLimits + assert.strictEqual(newLimits.maxInputTokens, 300000) + assert.strictEqual(newLimits.maxOverallCharacters, Math.floor(300000 * 3.5)) + assert.strictEqual(newLimits.inputLimit, Math.floor(0.7 * newLimits.maxOverallCharacters)) + assert.strictEqual(newLimits.compactionThreshold, Math.floor(0.7 * newLimits.maxOverallCharacters)) + + setModelIdStub.restore() + getCachedModelsStub.restore() + }) }) describe('onListAvailableModels', () => { @@ -3137,11 +3217,13 @@ ${' '.repeat(8)}} modelId: 'claude-3-sonnet', modelName: 'Claude 3 Sonnet', description: 'Advanced AI model', + tokenLimits: { maxInputTokens: 200000 }, }, 'claude-4-sonnet': { modelId: 'claude-4-sonnet', modelName: 'Claude 4 Sonnet', description: 'Latest AI model', + tokenLimits: { maxInputTokens: 300000 }, }, }, defaultModel: { modelId: 'claude-3-sonnet' }, @@ -3160,8 +3242,18 @@ ${' '.repeat(8)}} assert.strictEqual(result.tabId, mockTabId) assert.strictEqual(result.models.length, 2) assert.deepStrictEqual(result.models, [ - { id: 'claude-3-sonnet', name: 'Claude 3 Sonnet', description: 'Advanced AI model' }, - { id: 'claude-4-sonnet', name: 'Claude 4 Sonnet', description: 'Latest AI model' }, + { + id: 'claude-3-sonnet', + name: 'Claude 3 Sonnet', + description: 'Advanced AI model', + tokenLimits: { maxInputTokens: 200000 }, + }, + { + id: 'claude-4-sonnet', + name: 'Claude 4 Sonnet', + description: 'Latest AI model', + tokenLimits: { maxInputTokens: 300000 }, + }, ]) // Verify cache was updated diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts index f9471c153f..a9543d336a 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts @@ -167,6 +167,7 @@ import { ExecuteBash, ExecuteBashParams } from './tools/executeBash' import { ExplanatoryParams, InvokeOutput, ToolApprovalException } from './tools/toolShared' import { validatePathBasic, validatePathExists, validatePaths as validatePathsSync } from './utils/pathValidation' import { calculateModifiedLines } from './utils/fileModificationMetrics' +import { TokenLimitsCalculator } from './utils/tokenLimitsCalculator' import { GrepSearch, SanitizedRipgrepOutput } from './tools/grepSearch' import { FileSearch, FileSearchParams, isFileSearchParams } from './tools/fileSearch' import { FsReplace, FsReplaceParams } from './tools/fsReplace' @@ -175,7 +176,6 @@ import { diffLines } from 'diff' import { GENERIC_ERROR_MS, LOADING_THRESHOLD_MS, - GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT, OUTPUT_LIMIT_EXCEEDS_PARTIAL_MSG, RESPONSE_TIMEOUT_MS, RESPONSE_TIMEOUT_PARTIAL_MSG, @@ -187,8 +187,6 @@ import { DEFAULT_WINDOW_REJECT_SHORTCUT, DEFAULT_MACOS_STOP_SHORTCUT, DEFAULT_WINDOW_STOP_SHORTCUT, - COMPACTION_CHARACTER_THRESHOLD, - MAX_OVERALL_CHARACTERS, FSREAD_MEMORY_BANK_MAX_PER_FILE, FSREAD_MEMORY_BANK_MAX_TOTAL, } from './constants/constants' @@ -726,11 +724,18 @@ export class AgenticChatController implements ChatHandlers { // Wait for the response to be completed before proceeding this.#log('Model Response: ', JSON.stringify(responseResult, null, 2)) if (responseResult.models) { - models = Object.values(responseResult.models).map(({ modelId, modelName, description }) => ({ - id: modelId ?? 'unknown', - name: modelName ?? modelId ?? 'unknown', - description: description ?? '', - })) + models = Object.values(responseResult.models).map( + ({ modelId, modelName, description, tokenLimits }) => ({ + id: modelId ?? 'unknown', + name: modelName ?? modelId ?? 'unknown', + description: description ?? '', + tokenLimits: tokenLimits + ? { + maxInputTokens: tokenLimits.maxInputTokens, + } + : undefined, + }) + ) } defaultModelId = responseResult.defaultModel?.modelId @@ -775,6 +780,14 @@ export class AgenticChatController implements ChatHandlers { // Handle error cases by returning default model if (!success || errorFromAPI) { + // Even in error cases, calculate token limits from the default/fallback model + if (success && session) { + const fallbackModel = models.find(model => model.id === DEFAULT_MODEL_ID) + const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(fallbackModel) + const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens) + session.setTokenLimits(tokenLimits) + this.#log(`Token limits calculated for fallback model (error case): ${JSON.stringify(tokenLimits)}`) + } return { tabId: params.tabId, models: models, @@ -818,6 +831,15 @@ export class AgenticChatController implements ChatHandlers { // Store the selected model in the session session.modelId = selectedModelId + // Extract maxInputTokens from the selected model and calculate token limits + const selectedModel = models.find(model => model.id === selectedModelId) + const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel) + const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens) + session.setTokenLimits(tokenLimits) + this.#log( + `Token limits calculated for initial model selection (${selectedModelId}): ${JSON.stringify(tokenLimits)}` + ) + return { tabId: params.tabId, models: models, @@ -1195,8 +1217,8 @@ export class AgenticChatController implements ChatHandlers { /** * Runs the compaction, making requests and processing tool uses until completion */ - #shouldCompact(currentRequestCount: number): boolean { - if (currentRequestCount > COMPACTION_CHARACTER_THRESHOLD) { + #shouldCompact(currentRequestCount: number, compactionThreshold: number): boolean { + if (currentRequestCount > compactionThreshold) { this.#debug(`Current request total character count is: ${currentRequestCount}, prompting user to compact`) return true } else { @@ -1396,7 +1418,7 @@ export class AgenticChatController implements ChatHandlers { throw new CancellationError('user') } - this.truncateRequest(currentRequestInput, additionalContext) + this.truncateRequest(currentRequestInput, additionalContext, session.tokenLimits.inputLimit) const currentMessage = currentRequestInput.conversationState?.currentMessage const conversationId = conversationIdentifier ?? '' if (!currentMessage || !conversationId) { @@ -1672,13 +1694,17 @@ export class AgenticChatController implements ChatHandlers { currentRequestInput = this.#updateRequestInputWithToolResults(currentRequestInput, toolResults, content) } - if (this.#shouldCompact(currentRequestCount)) { + if (this.#shouldCompact(currentRequestCount, session.tokenLimits.compactionThreshold)) { this.#telemetryController.emitCompactNudge( currentRequestCount, this.#features.runtime.serverInfo.version ?? '' ) const messageId = this.#getMessageIdForCompact(uuid()) - const confirmationResult = this.#processCompactConfirmation(messageId, currentRequestCount) + const confirmationResult = this.#processCompactConfirmation( + messageId, + currentRequestCount, + session.tokenLimits.maxOverallCharacters + ) const cachedButtonBlockId = await chatResultStream.writeResultBlock(confirmationResult) await this.waitForCompactApproval(messageId, chatResultStream, cachedButtonBlockId, session) // Get the compaction request input @@ -1731,10 +1757,17 @@ export class AgenticChatController implements ChatHandlers { * performs truncation of request before sending to backend service. * Returns the remaining character budget for chat history. * @param request + * @param additionalContext + * @param inputLimit - The dynamic input limit from the session's token limits */ - truncateRequest(request: ChatCommandInput, additionalContext?: AdditionalContentEntryAddition[]): number { - // TODO: Confirm if this limit applies to SendMessage and rename this constant - let remainingCharacterBudget = GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT + truncateRequest( + request: ChatCommandInput, + additionalContext?: AdditionalContentEntryAddition[], + inputLimit?: number + ): number { + // Use dynamic inputLimit from session, or fall back to default calculated value + const effectiveInputLimit = inputLimit ?? TokenLimitsCalculator.calculate().inputLimit + let remainingCharacterBudget = effectiveInputLimit if (!request?.conversationState?.currentMessage?.userInputMessage) { return remainingCharacterBudget } @@ -1743,9 +1776,9 @@ export class AgenticChatController implements ChatHandlers { // 1. prioritize user input message let truncatedUserInputMessage = '' if (message) { - if (message.length > GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT) { - this.#debug(`Truncating userInputMessage to ${GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT} characters}`) - truncatedUserInputMessage = message.substring(0, GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT) + if (message.length > effectiveInputLimit) { + this.#debug(`Truncating userInputMessage to ${effectiveInputLimit} characters}`) + truncatedUserInputMessage = message.substring(0, effectiveInputLimit) remainingCharacterBudget = remainingCharacterBudget - truncatedUserInputMessage.length request.conversationState.currentMessage.userInputMessage.content = truncatedUserInputMessage } else { @@ -2826,7 +2859,7 @@ export class AgenticChatController implements ChatHandlers { }) } - #processCompactConfirmation(messageId: string, characterCount: number): ChatResult { + #processCompactConfirmation(messageId: string, characterCount: number, maxOverallCharacters: number): ChatResult { const buttons = [{ id: 'allow-tools', text: 'Allow', icon: 'ok', status: 'clear' }] const header = { icon: 'warning', @@ -2834,7 +2867,7 @@ export class AgenticChatController implements ChatHandlers { body: COMPACTION_HEADER_BODY, buttons, } as any - const body = COMPACTION_BODY(Math.round((characterCount / MAX_OVERALL_CHARACTERS) * 100)) + const body = COMPACTION_BODY(Math.round((characterCount / maxOverallCharacters) * 100)) return { type: 'tool', messageId, @@ -4643,8 +4676,19 @@ export class AgenticChatController implements ChatHandlers { } session.pairProgrammingMode = params.optionsValues['pair-programmer-mode'] === 'true' - session.modelId = params.optionsValues['model-selection'] + const newModelId = params.optionsValues['model-selection'] + + // Recalculate token limits when model changes + if (newModelId && newModelId !== session.modelId) { + const cachedData = this.#chatHistoryDb.getCachedModels() + const selectedModel = cachedData?.models?.find(model => model.id === newModelId) + const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel) + const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens) + session.setTokenLimits(tokenLimits) + this.#log(`Token limits calculated for model switch (${newModelId}): ${JSON.stringify(tokenLimits)}`) + } + session.modelId = newModelId this.#chatHistoryDb.setModelId(session.modelId) this.#chatHistoryDb.setPairProgrammingMode(session.pairProgrammingMode) } diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts index fb710eae9b..901e08964b 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts @@ -10,14 +10,7 @@ export const RESPONSE_TIMEOUT_MS = 240_000 export const SERVICE_MANAGER_TIMEOUT_MS = 10_000 //10 seconds export const SERVICE_MANAGER_POLL_INTERVAL_MS = 100 -// LLM Constants -export const GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT = 500_000 - // Compaction -// Maximum number of characters per request used for compaction prompt -// 200K tokens * 3.5 = 700K characters, intentionally overestimating with 3.5:1 ratio -export const MAX_OVERALL_CHARACTERS = 700_000 -export const COMPACTION_CHARACTER_THRESHOLD = 0.7 * MAX_OVERALL_CHARACTERS export const COMPACTION_BODY = (threshold: number) => `The context window is almost full (${threshold}%) and exceeding it will clear your history. Amazon Q can compact your history instead.` export const COMPACTION_HEADER_BODY = 'Compact chat history?' diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts index 9e9927b10c..0bf0ea0693 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts @@ -10,12 +10,14 @@ export enum BedrockModel { type ModelDetails = { label: string description: string + maxInputTokens: number } export const FALLBACK_MODEL_RECORD: Record = { [BedrockModel.CLAUDE_SONNET_4_20250514_V1_0]: { label: 'Claude Sonnet 4', description: 'Hybrid reasoning and coding for regular use', + maxInputTokens: 200_000, }, } @@ -24,9 +26,12 @@ export const BEDROCK_MODEL_TO_MODEL_ID: Record = { } export const FALLBACK_MODEL_OPTIONS: ListAvailableModelsResult['models'] = Object.entries(FALLBACK_MODEL_RECORD).map( - ([value, { label, description }]) => ({ + ([value, { label, description, maxInputTokens }]) => ({ id: value, name: label, description: description, + tokenLimits: { + maxInputTokens: maxInputTokens, + }, }) ) diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts new file mode 100644 index 0000000000..abb0184986 --- /dev/null +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts @@ -0,0 +1,150 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as assert from 'assert' +import { + TokenLimitsCalculator, + TOKENS_TO_CHARACTERS_RATIO, + INPUT_LIMIT_RATIO, + COMPACTION_THRESHOLD_RATIO, + DEFAULT_MAX_INPUT_TOKENS, +} from './tokenLimitsCalculator' +import { FALLBACK_MODEL_OPTIONS } from '../constants/modelSelection' + +describe('TokenLimitsCalculator', () => { + describe('calculate()', () => { + /** + * **Feature: dynamic-token-limits, Property 1: Character calculation consistency** + * **Validates: Requirements 1.2** + */ + it('should calculate maxOverallCharacters as Math.floor(maxInputTokens * 3.5)', () => { + const testCases = [1, 100, 1000, 200_000, 500_000, 1_000_000] + + for (const maxInputTokens of testCases) { + const result = TokenLimitsCalculator.calculate(maxInputTokens) + const expected = Math.floor(maxInputTokens * TOKENS_TO_CHARACTERS_RATIO) + + assert.strictEqual( + result.maxOverallCharacters, + expected, + `For maxInputTokens=${maxInputTokens}, expected maxOverallCharacters=${expected} but got ${result.maxOverallCharacters}` + ) + } + }) + + /** + * **Feature: dynamic-token-limits, Property 2: Input limit calculation consistency** + * **Validates: Requirements 1.3, 1.4** + */ + it('should calculate inputLimit and compactionThreshold as Math.floor(0.7 * maxOverallCharacters)', () => { + const testCases = [1, 100, 1000, 200_000, 500_000, 1_000_000] + + for (const maxInputTokens of testCases) { + const result = TokenLimitsCalculator.calculate(maxInputTokens) + const expectedInputLimit = Math.floor(INPUT_LIMIT_RATIO * result.maxOverallCharacters) + const expectedCompactionThreshold = Math.floor(COMPACTION_THRESHOLD_RATIO * result.maxOverallCharacters) + + assert.strictEqual( + result.inputLimit, + expectedInputLimit, + `For maxInputTokens=${maxInputTokens}, expected inputLimit=${expectedInputLimit} but got ${result.inputLimit}` + ) + + assert.strictEqual( + result.compactionThreshold, + expectedCompactionThreshold, + `For maxInputTokens=${maxInputTokens}, expected compactionThreshold=${expectedCompactionThreshold} but got ${result.compactionThreshold}` + ) + } + }) + + it('should use DEFAULT_MAX_INPUT_TOKENS when no argument is provided', () => { + const result = TokenLimitsCalculator.calculate() + + assert.strictEqual(result.maxInputTokens, DEFAULT_MAX_INPUT_TOKENS) + assert.strictEqual( + result.maxOverallCharacters, + Math.floor(DEFAULT_MAX_INPUT_TOKENS * TOKENS_TO_CHARACTERS_RATIO) + ) + }) + + it('should return correct default values for 200K tokens', () => { + const result = TokenLimitsCalculator.calculate(200_000) + const expectedMaxOverallCharacters = Math.floor(200_000 * TOKENS_TO_CHARACTERS_RATIO) + const expectedInputLimit = Math.floor(INPUT_LIMIT_RATIO * expectedMaxOverallCharacters) + const expectedCompactionThreshold = Math.floor(COMPACTION_THRESHOLD_RATIO * expectedMaxOverallCharacters) + + assert.strictEqual(result.maxInputTokens, 200_000) + assert.strictEqual(result.maxOverallCharacters, expectedMaxOverallCharacters) + assert.strictEqual(result.inputLimit, expectedInputLimit) + assert.strictEqual(result.compactionThreshold, expectedCompactionThreshold) + }) + }) + + describe('extractMaxInputTokens()', () => { + /** + * **Feature: dynamic-token-limits, Property 3: Default fallback consistency** + * **Validates: Requirements 2.1, 2.3** + */ + it('should return DEFAULT_MAX_INPUT_TOKENS for undefined model', () => { + const result = TokenLimitsCalculator.extractMaxInputTokens(undefined) + assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS) + }) + + it('should return DEFAULT_MAX_INPUT_TOKENS for model without tokenLimits', () => { + const result = TokenLimitsCalculator.extractMaxInputTokens({}) + assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS) + }) + + it('should return DEFAULT_MAX_INPUT_TOKENS for model with undefined tokenLimits', () => { + const result = TokenLimitsCalculator.extractMaxInputTokens({ tokenLimits: undefined }) + assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS) + }) + + it('should return DEFAULT_MAX_INPUT_TOKENS for model with tokenLimits but undefined maxInputTokens', () => { + const result = TokenLimitsCalculator.extractMaxInputTokens({ tokenLimits: {} }) + assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS) + }) + + it('should return DEFAULT_MAX_INPUT_TOKENS for model with tokenLimits but null maxInputTokens', () => { + const result = TokenLimitsCalculator.extractMaxInputTokens({ + tokenLimits: { maxInputTokens: null as unknown as undefined }, + }) + assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS) + }) + + it('should return the actual maxInputTokens when provided', () => { + const result = TokenLimitsCalculator.extractMaxInputTokens({ + tokenLimits: { maxInputTokens: 500_000 }, + }) + assert.strictEqual(result, 500_000) + }) + }) + + describe('FALLBACK_MODEL_OPTIONS', () => { + /** + * Verify FALLBACK_MODEL_OPTIONS includes tokenLimits.maxInputTokens of 200,000 + * **Validates: Requirements 2.4** + */ + it('should include tokenLimits.maxInputTokens of 200,000 for all fallback models', () => { + assert.ok(FALLBACK_MODEL_OPTIONS.length > 0, 'FALLBACK_MODEL_OPTIONS should contain at least one model') + + for (const model of FALLBACK_MODEL_OPTIONS) { + const modelWithTokenLimits = model as typeof model & { + tokenLimits?: { maxInputTokens?: number } + } + assert.ok( + modelWithTokenLimits.tokenLimits !== undefined, + `Model ${model.id} should have tokenLimits defined` + ) + assert.strictEqual( + modelWithTokenLimits.tokenLimits?.maxInputTokens, + DEFAULT_MAX_INPUT_TOKENS, + `Model ${model.id} should have tokenLimits.maxInputTokens of ${DEFAULT_MAX_INPUT_TOKENS}` + ) + } + }) + }) +}) diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts new file mode 100644 index 0000000000..ff53e2ee40 --- /dev/null +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts @@ -0,0 +1,64 @@ +/** + * Token limits calculator for dynamic LLM context window management. + * + * This utility calculates character limits based on the maxInputTokens value + * returned from the listAvailableModels API, replacing hardcoded constants. + */ + +/** + * Interface representing calculated token and character limits for a model. + */ +export interface TokenLimits { + /** Raw token limit from API (default: 200,000) */ + maxInputTokens: number + /** Maximum character count for overall context window: maxInputTokens * 3.5 */ + maxOverallCharacters: number + /** Input character limit for assistant responses: 0.7 * maxOverallCharacters */ + inputLimit: number + /** Threshold at which compaction is triggered: 0.7 * maxOverallCharacters */ + compactionThreshold: number +} + +/** Default maximum input tokens when API doesn't provide a value */ +export const DEFAULT_MAX_INPUT_TOKENS = 200_000 + +/** Ratio for converting tokens to characters (approximately 3.5 characters per token) */ +export const TOKENS_TO_CHARACTERS_RATIO = 3.5 + +/** Ratio of max overall characters used for input limit */ +export const INPUT_LIMIT_RATIO = 0.7 + +/** Ratio of max overall characters used for compaction threshold */ +export const COMPACTION_THRESHOLD_RATIO = 0.7 + +/** + * Utility class for calculating token and character limits based on model capabilities. + */ +export class TokenLimitsCalculator { + /** + * Calculate character limits from maxInputTokens + * @param maxInputTokens - The maximum input tokens from the model, defaults to 200K + * @returns TokenLimits object with all calculated values + */ + static calculate(maxInputTokens: number = DEFAULT_MAX_INPUT_TOKENS): TokenLimits { + const maxOverallCharacters = Math.floor(maxInputTokens * TOKENS_TO_CHARACTERS_RATIO) + const inputLimit = Math.floor(INPUT_LIMIT_RATIO * maxOverallCharacters) + const compactionThreshold = Math.floor(COMPACTION_THRESHOLD_RATIO * maxOverallCharacters) + + return { + maxInputTokens, + maxOverallCharacters, + inputLimit, + compactionThreshold, + } + } + + /** + * Extract maxInputTokens from API response with fallback + * @param model - Model object from listAvailableModels response + * @returns maxInputTokens value or default (200,000) + */ + static extractMaxInputTokens(model?: { tokenLimits?: { maxInputTokens?: number } }): number { + return model?.tokenLimits?.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS + } +} diff --git a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts index bb67a8aed0..4d74756320 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts @@ -17,6 +17,7 @@ import { enabledModelSelection } from '../../shared/utils' import { QErrorTransformer } from '../agenticChat/retry/errorTransformer' import { DelayNotification } from '../agenticChat/retry/delayInterceptor' import { MAX_REQUEST_ATTEMPTS } from '../agenticChat/constants/constants' +import { TokenLimits, TokenLimitsCalculator } from '../agenticChat/utils/tokenLimitsCalculator' export type ChatSessionServiceConfig = CodeWhispererStreamingClientConfig type FileChange = { before?: string; after?: string } @@ -47,6 +48,7 @@ export class ChatSessionService { #logging?: Logging #origin?: Origin #errorTransformer: QErrorTransformer + #tokenLimits: TokenLimits public getConversationType(): string { return this.#conversationType @@ -138,6 +140,24 @@ export class ChatSessionService { // Initialize Q-specific error transformation this.#errorTransformer = new QErrorTransformer(logging, () => this.isModelSelectionEnabled()) + + // Initialize token limits with default values + this.#tokenLimits = TokenLimitsCalculator.calculate() + } + + /** + * Gets the token limits for this session + */ + public get tokenLimits(): TokenLimits { + return this.#tokenLimits + } + + /** + * Sets the token limits for this session + * @param limits The token limits to set + */ + public setTokenLimits(limits: TokenLimits): void { + this.#tokenLimits = limits } public async sendMessage(request: SendMessageCommandInput): Promise { From 8874dbdaac4c1a5be082a6084e51d819f00f8010 Mon Sep 17 00:00:00 2001 From: Jacob Chung Date: Tue, 2 Dec 2025 11:30:37 -0800 Subject: [PATCH 2/3] fix: dependency issues --- app/aws-lsp-partiql-runtimes/package.json | 2 +- chat-client/package.json | 2 +- package-lock.json | 67 ++----------------- .../agenticChat/tools/mcp/mcpOauthClient.ts | 1 - 4 files changed, 8 insertions(+), 64 deletions(-) diff --git a/app/aws-lsp-partiql-runtimes/package.json b/app/aws-lsp-partiql-runtimes/package.json index 2c958f1945..36841189f7 100644 --- a/app/aws-lsp-partiql-runtimes/package.json +++ b/app/aws-lsp-partiql-runtimes/package.json @@ -11,7 +11,7 @@ "package": "npm run compile && npm run compile:webpack" }, "dependencies": { - "@aws/language-server-runtimes": "0.3.8", + "@aws/language-server-runtimes": "^0.3.8", "@aws/lsp-partiql": "0.0.20" }, "devDependencies": { diff --git a/chat-client/package.json b/chat-client/package.json index a125bea5fd..7ca2bc9c87 100644 --- a/chat-client/package.json +++ b/chat-client/package.json @@ -26,7 +26,7 @@ "dependencies": { "@aws/chat-client-ui-types": "0.1.68", "@aws/language-server-runtimes": "^0.3.8", - "@aws/language-server-runtimes-types": "0.1.62", + "@aws/language-server-runtimes-types": "^0.1.63", "@aws/mynah-ui": "^4.38.0" }, "devDependencies": { diff --git a/package-lock.json b/package-lock.json index abe58d3538..f55eb416fc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -157,7 +157,7 @@ "name": "@aws/lsp-partiql-runtimes", "version": "0.0.1", "dependencies": { - "@aws/language-server-runtimes": "0.3.8", + "@aws/language-server-runtimes": "^0.3.8", "@aws/lsp-partiql": "0.0.20" }, "devDependencies": { @@ -278,7 +278,7 @@ "dependencies": { "@aws/chat-client-ui-types": "0.1.68", "@aws/language-server-runtimes": "^0.3.8", - "@aws/language-server-runtimes-types": "0.1.62", + "@aws/language-server-runtimes-types": "^0.1.63", "@aws/mynah-ui": "^4.38.0" }, "devDependencies": { @@ -5627,9 +5627,9 @@ } }, "node_modules/@aws/language-server-runtimes-types": { - "version": "0.1.62", - "resolved": "https://registry.npmjs.org/@aws/language-server-runtimes-types/-/language-server-runtimes-types-0.1.62.tgz", - "integrity": "sha512-d/RSCZZzniaNeME+iM47l9Xx66vFvlQqGyLaWA5vFyKU0FkhN8/6CjPV4C4lxh3s8H4qOGsHm1w0y7t+zTgu4g==", + "version": "0.1.63", + "resolved": "https://registry.npmjs.org/@aws/language-server-runtimes-types/-/language-server-runtimes-types-0.1.63.tgz", + "integrity": "sha512-0Aeh0rQF4nOWXB0IlvroBoldlDaXsMvrZ4Ec3zgaU8wqlnh+WSDJiVPTgB1zCqPbDNybZxh7Z8nGh133hxk+FA==", "license": "Apache-2.0", "dependencies": { "vscode-languageserver-textdocument": "^1.0.12", @@ -5823,7 +5823,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -6704,7 +6703,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" }, @@ -6728,7 +6726,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -9103,7 +9100,6 @@ "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -11436,7 +11432,6 @@ "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.1.tgz", "integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==", "license": "MIT", - "peer": true, "dependencies": { "undici-types": "~6.21.0" } @@ -11667,7 +11662,6 @@ "integrity": "sha512-fe0rz9WJQ5t2iaLfdbDc9T80GJy0AeO453q8C3YCilnGozvOyCG5t+EZtg7j7D88+c3FipfP/x+wzGnh1xp8ZA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.10.0", "@typescript-eslint/scope-manager": "8.47.0", @@ -11698,7 +11692,6 @@ "integrity": "sha512-lJi3PfxVmo0AkEY93ecfN+r8SofEqZNGByvHAI3GBLrvt1Cw6H5k1IM02nSzu0RfUafr2EvFSw0wAsZgubNplQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "8.47.0", "@typescript-eslint/types": "8.47.0", @@ -12188,7 +12181,6 @@ "integrity": "sha512-i38o7wlipLllNrk2hzdDfAmk6nrqm3lR2MtAgWgtHbwznZAKkB84KpkNFfmUXw5Kg3iP1zKlSjwZpKqenuLc+Q==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=18.20.0" }, @@ -12243,7 +12235,6 @@ "integrity": "sha512-HdzDrRs+ywAqbXGKqe1i/bLtCv47plz4TvsHFH3j729OooT5VH38ctFn5aLXgECmiAKDkmH/A6kOq2Zh5DIxww==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "chalk": "^5.1.2", "loglevel": "^1.6.0", @@ -13110,7 +13101,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "devOptional": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -13177,7 +13167,6 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -14098,7 +14087,6 @@ "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz", "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==", "license": "Apache-2.0", - "peer": true, "peerDependencies": { "bare-abort-controller": "*" }, @@ -14630,7 +14618,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -14969,7 +14956,6 @@ "integrity": "sha512-RITGBfijLkBddZvnn8jdqoTypxvqbOLYQkGGxXzeFjVHvudaPw0HNFD9x928/eUwYWd2dPCugVqspGALTZZQKw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "assertion-error": "^1.1.0", "check-error": "^1.0.3", @@ -15894,7 +15880,6 @@ "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "env-paths": "^2.2.1", "import-fresh": "^3.3.0", @@ -17781,7 +17766,6 @@ "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -18443,7 +18427,6 @@ "integrity": "sha512-/XxRRR90gNSuNf++w1jOQjhC5LE9Ixf/iAQctVb/miEI3dwzPZTuG27/omoh5REfSLDoPXofM84vAH/ULtz35g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@vitest/snapshot": "^3.2.4", "deep-eql": "^5.0.2", @@ -18750,7 +18733,6 @@ "resolved": "https://registry.npmjs.org/express/-/express-5.1.0.tgz", "integrity": "sha512-DT9ck5YIRU+8GYzzU5kT3eHGA5iL+1Zd0EutOmTE9Dtk+Tvuzd23VBU+ec7HPNSTxXYO55gPV/hq4pSBJDjFpA==", "license": "MIT", - "peer": true, "dependencies": { "accepts": "^2.0.0", "body-parser": "^2.2.0", @@ -21619,7 +21601,6 @@ "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@jest/core": "^29.7.0", "@jest/types": "^29.6.3", @@ -24041,7 +24022,6 @@ "integrity": "sha512-mTT6RgopEYABzXWFx+GcJ+ZQ32kp4fMf0xvpZIIfSq9Z8lC/++MtcCnQ9t5FP2veYEP95FIYSvW+U9fV4xrlig==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "browser-stdout": "^1.3.1", "chokidar": "^4.0.1", @@ -26177,7 +26157,6 @@ "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==", "dev": true, "license": "MIT", - "peer": true, "bin": { "prettier": "bin/prettier.cjs" }, @@ -29838,7 +29817,6 @@ "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@cspotcode/source-map-support": "^0.8.0", "@tsconfig/node10": "^1.0.7", @@ -30027,39 +30005,11 @@ "node": ">=4" } }, - "node_modules/tsconfig-paths": { - "version": "4.2.0", - "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-4.2.0.tgz", - "integrity": "sha512-NoZ4roiN7LnbKn9QqE1amc9DJfzvZXxF4xDavcOWt1BPkdx+m+0gJuPM+S0vCe7zTJMYUP0R8pO2XMr+Y8oLIg==", - "dev": true, - "license": "MIT", - "optional": true, - "dependencies": { - "json5": "^2.2.2", - "minimist": "^1.2.6", - "strip-bom": "^3.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/tsconfig-paths/node_modules/strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", - "dev": true, - "license": "MIT", - "optional": true, - "engines": { - "node": ">=4" - } - }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", - "license": "0BSD", - "peer": true + "license": "0BSD" }, "node_modules/tsx": { "version": "4.20.6", @@ -30225,7 +30175,6 @@ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" @@ -30929,7 +30878,6 @@ "integrity": "sha512-QVM/asb5sDESz37ow/BAOA0z2HtUJsuAjPKHdw+Vx92PaQP3EfHwTgxK2T5rgwa0WRNh+c+n/0nEqIvqBl01sA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@types/node": "^20.11.30", "@types/sinonjs__fake-timers": "^8.1.5", @@ -31015,7 +30963,6 @@ "integrity": "sha512-HU1JOuV1OavsZ+mfigY0j8d1TgQgbZ6M+J75zDkpEAwYeXjWSqrGJtgnPblJjd/mAyTNQ7ygw0MiKOn6etz8yw==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.8", @@ -31065,7 +31012,6 @@ "integrity": "sha512-MfwFQ6SfwinsUVi0rNJm7rHZ31GyTcpVE5pgVA3hwFRb7COD4TzjUUwhGWKfO50+xdc2MQPuEBBJoqIMGt3JDw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@discoveryjs/json-ext": "^0.6.1", "@webpack-cli/configtest": "^3.0.1", @@ -32529,7 +32475,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts index 2e207c449e..82851a2fb9 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts @@ -3,7 +3,6 @@ * All Rights Reserved. SPDX-License-Identifier: Apache-2.0 */ -import type { RequestInit } from 'node-fetch' import * as crypto from 'crypto' import * as path from 'path' import { spawn } from 'child_process' From 29b84f002b6499e9be77020cccecb2a4d539ac42 Mon Sep 17 00:00:00 2001 From: Jacob Chung Date: Fri, 5 Dec 2025 13:05:23 -0800 Subject: [PATCH 3/3] refactor: encapsulate model ID and token limits in session --- .../agenticChat/agenticChatController.test.ts | 8 ++-- .../agenticChat/agenticChatController.ts | 35 ++++++----------- .../chat/chatSessionService.test.ts | 39 +++++++++++++++++++ .../chat/chatSessionService.ts | 22 ++++++++--- 4 files changed, 72 insertions(+), 32 deletions(-) diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts index 6004826534..749e889aad 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts @@ -3059,7 +3059,7 @@ ${' '.repeat(8)}} // Create a session and set initial model chatController.onTabAdd({ tabId: mockTabId }) const session = chatSessionManagementService.getSession(mockTabId).data! - session.modelId = initialModelId + session.setModel(initialModelId, cachedModels) // Get initial token limits (default 200K) const initialLimits = session.tokenLimits @@ -3137,7 +3137,7 @@ ${' '.repeat(8)}} getCachedModelsStub.returns(cachedData) const session = chatSessionManagementService.getSession(mockTabId).data! - session.modelId = 'model1' + session.setModel('model1', cachedData.models) const result = await chatController.onListAvailableModels({ tabId: mockTabId }) @@ -3324,7 +3324,7 @@ ${' '.repeat(8)}} it('should use defaultModelId from cache when session has no modelId', async () => { const session = chatSessionManagementService.getSession(mockTabId).data! - session.modelId = undefined + session.setModel(undefined, undefined) const result = await chatController.onListAvailableModels({ tabId: mockTabId }) @@ -3341,7 +3341,7 @@ ${' '.repeat(8)}} }) const session = chatSessionManagementService.getSession(mockTabId).data! - session.modelId = undefined + session.setModel(undefined, undefined) const result = await chatController.onListAvailableModels({ tabId: mockTabId }) diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts index a9543d336a..543a640378 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts @@ -780,13 +780,12 @@ export class AgenticChatController implements ChatHandlers { // Handle error cases by returning default model if (!success || errorFromAPI) { - // Even in error cases, calculate token limits from the default/fallback model + // Even in error cases, set the model with token limits if (success && session) { - const fallbackModel = models.find(model => model.id === DEFAULT_MODEL_ID) - const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(fallbackModel) - const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens) - session.setTokenLimits(tokenLimits) - this.#log(`Token limits calculated for fallback model (error case): ${JSON.stringify(tokenLimits)}`) + session.setModel(DEFAULT_MODEL_ID, models) + this.#log( + `Model set for fallback (error case): ${DEFAULT_MODEL_ID}, tokenLimits: ${JSON.stringify(session.tokenLimits)}` + ) } return { tabId: params.tabId, @@ -828,16 +827,10 @@ export class AgenticChatController implements ChatHandlers { selectedModelId = defaultModelId || getMappedModelId(DEFAULT_MODEL_ID) } - // Store the selected model in the session - session.modelId = selectedModelId - - // Extract maxInputTokens from the selected model and calculate token limits - const selectedModel = models.find(model => model.id === selectedModelId) - const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel) - const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens) - session.setTokenLimits(tokenLimits) + // Store the selected model in the session (automatically calculates token limits) + session.setModel(selectedModelId, models) this.#log( - `Token limits calculated for initial model selection (${selectedModelId}): ${JSON.stringify(tokenLimits)}` + `Model set for initial selection: ${selectedModelId}, tokenLimits: ${JSON.stringify(session.tokenLimits)}` ) return { @@ -4678,17 +4671,13 @@ export class AgenticChatController implements ChatHandlers { session.pairProgrammingMode = params.optionsValues['pair-programmer-mode'] === 'true' const newModelId = params.optionsValues['model-selection'] - // Recalculate token limits when model changes - if (newModelId && newModelId !== session.modelId) { + // Set model (automatically recalculates token limits) + if (newModelId !== session.modelId) { const cachedData = this.#chatHistoryDb.getCachedModels() - const selectedModel = cachedData?.models?.find(model => model.id === newModelId) - const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel) - const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens) - session.setTokenLimits(tokenLimits) - this.#log(`Token limits calculated for model switch (${newModelId}): ${JSON.stringify(tokenLimits)}`) + session.setModel(newModelId, cachedData?.models) + this.#log(`Model set for model switch: ${newModelId}, tokenLimits: ${JSON.stringify(session.tokenLimits)}`) } - session.modelId = newModelId this.#chatHistoryDb.setModelId(session.modelId) this.#chatHistoryDb.setPairProgrammingMode(session.pairProgrammingMode) } diff --git a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts index bc776c2f85..6cc86b31c6 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts @@ -329,6 +329,45 @@ describe('Chat Session Service', () => { }) }) + describe('setModel encapsulation', () => { + let chatSessionService: ChatSessionService + + beforeEach(() => { + chatSessionService = new ChatSessionService() + }) + + it('should initialize with undefined modelId and default token limits', () => { + assert.strictEqual(chatSessionService.modelId, undefined) + assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 200_000) + }) + + it('should set modelId and calculate token limits together', () => { + const models = [ + { id: 'model-1', name: 'Model 1', description: 'Test', tokenLimits: { maxInputTokens: 300_000 } }, + ] + + chatSessionService.setModel('model-1', models) + + assert.strictEqual(chatSessionService.modelId, 'model-1') + assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 300_000) + assert.strictEqual(chatSessionService.tokenLimits.maxOverallCharacters, Math.floor(300_000 * 3.5)) + }) + + it('should use default token limits when model not found in list', () => { + chatSessionService.setModel('unknown-model', []) + + assert.strictEqual(chatSessionService.modelId, 'unknown-model') + assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 200_000) + }) + + it('should use default token limits when models list is undefined', () => { + chatSessionService.setModel('some-model', undefined) + + assert.strictEqual(chatSessionService.modelId, 'some-model') + assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 200_000) + }) + }) + describe('IAM client source property', () => { it('sets source to Origin.IDE when using StreamingClientServiceIAM', async () => { const codeWhispererStreamingClientIAM = stubInterface() diff --git a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts index 4d74756320..8f1db91187 100644 --- a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts +++ b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts @@ -18,6 +18,7 @@ import { QErrorTransformer } from '../agenticChat/retry/errorTransformer' import { DelayNotification } from '../agenticChat/retry/delayInterceptor' import { MAX_REQUEST_ATTEMPTS } from '../agenticChat/constants/constants' import { TokenLimits, TokenLimitsCalculator } from '../agenticChat/utils/tokenLimitsCalculator' +import { Model } from '@aws/language-server-runtimes/protocol' export type ChatSessionServiceConfig = CodeWhispererStreamingClientConfig type FileChange = { before?: string; after?: string } @@ -29,8 +30,8 @@ type DeferredHandler = { export class ChatSessionService { public pairProgrammingMode: boolean = true public contextListSent: boolean = false - public modelId: string | undefined public isMemoryBankGeneration: boolean = false + #modelId: string | undefined #lsp?: Features['lsp'] #abortController?: AbortController #currentPromptId?: string @@ -145,6 +146,13 @@ export class ChatSessionService { this.#tokenLimits = TokenLimitsCalculator.calculate() } + /** + * Gets the model ID for this session + */ + public get modelId(): string | undefined { + return this.#modelId + } + /** * Gets the token limits for this session */ @@ -153,11 +161,15 @@ export class ChatSessionService { } /** - * Sets the token limits for this session - * @param limits The token limits to set + * Sets the model for this session, automatically calculating token limits. + * This encapsulates model ID and token limits as a single entity. + * @param modelId The model ID to set + * @param models Optional list of available models to look up token limits from */ - public setTokenLimits(limits: TokenLimits): void { - this.#tokenLimits = limits + public setModel(modelId: string | undefined, models?: Model[]): void { + this.#modelId = modelId + const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(models?.find(m => m.id === modelId)) + this.#tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens) } public async sendMessage(request: SendMessageCommandInput): Promise {