From 9738d78f4f7be2e56e486ef10e3e52910d5fff61 Mon Sep 17 00:00:00 2001
From: Jacob Chung <chungjac@amazon.com>
Date: Wed, 26 Nov 2025 12:29:29 -0800
Subject: [PATCH 1/3] feat: use dynamic token limits from listAvailableModels
 API

---
 .../agenticChat/agenticChatController.test.ts | 152 ++++++++++++++----
 .../agenticChat/agenticChatController.ts      |  88 +++++++---
 .../agenticChat/constants/constants.ts        |   7 -
 .../agenticChat/constants/modelSelection.ts   |   7 +-
 .../utils/tokenLimitsCalculator.test.ts       | 150 +++++++++++++++++
 .../utils/tokenLimitsCalculator.ts            |  64 ++++++++
 .../chat/chatSessionService.ts                |  20 +++
 7 files changed, 428 insertions(+), 60 deletions(-)
 create mode 100644 server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts
 create mode 100644 server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts

diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts
index f136047a7c..6004826534 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts
@@ -35,6 +35,7 @@ import {
     ChatUpdateParams,
     ConnectionMetadata,
 } from '@aws/language-server-runtimes/server-interface'
+import { Model } from '@aws/language-server-runtimes/protocol'
 import { TestFeatures } from '@aws/language-server-runtimes/testing'
 import * as assert from 'assert'
 import { createIterableResponse, setCredentialsForAmazonQTokenServiceManagerFactory } from '../../shared/testUtils'
@@ -58,7 +59,8 @@ import { LocalProjectContextController } from '../../shared/localProjectContextC
 import { CancellationError } from '@aws/lsp-core'
 import { ToolApprovalException } from './tools/toolShared'
 import * as constants from './constants/constants'
-import { GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT, GENERIC_ERROR_MS } from './constants/constants'
+import { GENERIC_ERROR_MS } from './constants/constants'
+import { TokenLimitsCalculator } from './utils/tokenLimitsCalculator'
 import { MISSING_BEARER_TOKEN_ERROR } from '../../shared/constants'
 import {
     AmazonQError,
@@ -1194,8 +1196,9 @@ describe('AgenticChatController', () => {
             assert.strictEqual(typedChatResult.body, errorMsg)
         })
 
-        it('truncate input to 500k character ', async function () {
-            const input = 'X'.repeat(GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT + 10)
+        it('truncate input to dynamic input limit', async function () {
+            const defaultLimits = TokenLimitsCalculator.calculate()
+            const input = 'X'.repeat(defaultLimits.inputLimit + 10)
             generateAssistantResponseStub.restore()
             generateAssistantResponseStub = sinon.stub(CodeWhispererStreaming.prototype, 'generateAssistantResponse')
             generateAssistantResponseStub.callsFake(() => {})
@@ -1205,7 +1208,7 @@ describe('AgenticChatController', () => {
                 generateAssistantResponseStub.firstCall.firstArg
             assert.deepStrictEqual(
                 calledRequestInput.conversationState?.currentMessage?.userInputMessage?.content?.length,
-                GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT
+                defaultLimits.inputLimit
             )
         })
         it('shows generic errorMsg on internal errors', async function () {
@@ -1576,23 +1579,27 @@ describe('AgenticChatController', () => {
         })
     })
     describe('truncateRequest', () => {
+        // Use dynamic input limit from TokenLimitsCalculator for all truncation tests
+        const defaultLimits = TokenLimitsCalculator.calculate()
+        const inputLimit = defaultLimits.inputLimit // 490_000 for default 200K tokens
+
         it('should truncate user input message if exceeds limit', () => {
             const request: GenerateAssistantResponseCommandInput = {
                 conversationState: {
                     currentMessage: {
                         userInputMessage: {
-                            content: 'a'.repeat(590_000),
+                            content: 'a'.repeat(inputLimit + 100_000),
                             userInputMessageContext: {
                                 editorState: {
                                     relevantDocuments: [
                                         {
                                             relativeFilePath: '',
-                                            text: 'a'.repeat(490_000),
+                                            text: 'a'.repeat(inputLimit - 10_000),
                                         },
                                     ],
                                     document: {
                                         relativeFilePath: '',
-                                        text: 'a'.repeat(490_000),
+                                        text: 'a'.repeat(inputLimit - 10_000),
                                     },
                                 },
                             },
@@ -1601,7 +1608,7 @@ describe('AgenticChatController', () => {
                     history: [
                         {
                             userInputMessage: {
-                                content: 'a'.repeat(490_000),
+                                content: 'a'.repeat(inputLimit - 10_000),
                             },
                         },
                     ],
@@ -1609,7 +1616,7 @@ describe('AgenticChatController', () => {
                 },
             }
             const result = chatController.truncateRequest(request)
-            assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, 500_000)
+            assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, inputLimit)
             assert.strictEqual(
                 request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState
                     ?.document?.text?.length || 0,
@@ -1641,11 +1648,13 @@ describe('AgenticChatController', () => {
         })
 
         it('should truncate relevant documents if combined length exceeds remaining budget', () => {
+            // Use content that leaves room for some docs but not all
+            const contentLength = 400_000
             const request: GenerateAssistantResponseCommandInput = {
                 conversationState: {
                     currentMessage: {
                         userInputMessage: {
-                            content: 'a'.repeat(400_000),
+                            content: 'a'.repeat(contentLength),
                             userInputMessageContext: {
                                 editorState: {
                                     relevantDocuments: [
@@ -1664,7 +1673,7 @@ describe('AgenticChatController', () => {
                                     ],
                                     document: {
                                         relativeFilePath: '',
-                                        text: 'a'.repeat(490_000),
+                                        text: 'a'.repeat(inputLimit - 10_000),
                                     },
                                 },
                             },
@@ -1673,7 +1682,7 @@ describe('AgenticChatController', () => {
                     history: [
                         {
                             userInputMessage: {
-                                content: 'a'.repeat(490_000),
+                                content: 'a'.repeat(inputLimit - 10_000),
                             },
                         },
                     ],
@@ -1681,7 +1690,10 @@ describe('AgenticChatController', () => {
                 },
             }
             const result = chatController.truncateRequest(request)
-            assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, 400_000)
+            assert.strictEqual(
+                request.conversationState?.currentMessage?.userInputMessage?.content?.length,
+                contentLength
+            )
             assert.strictEqual(
                 request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState
                     ?.document?.text?.length || 0,
@@ -1693,7 +1705,8 @@ describe('AgenticChatController', () => {
                 2
             )
             assert.strictEqual(request.conversationState?.history?.length || 0, 1)
-            assert.strictEqual(result, 99700)
+            // Remaining budget = inputLimit - contentLength - 100 - 200 = 490_000 - 400_000 - 300 = 89_700
+            assert.strictEqual(result, inputLimit - contentLength - 100 - 200)
         })
         it('should truncate current editor if combined length exceeds remaining budget', () => {
             const request: GenerateAssistantResponseCommandInput = {
@@ -1756,26 +1769,30 @@ describe('AgenticChatController', () => {
             assert.strictEqual(request.conversationState?.history?.length || 0, 3)
         })
         it('should return remaining budget for history', () => {
+            const contentLength = 100_000
+            const docLength = 100_000
+            const relevantDoc1Length = 1000
+            const relevantDoc2Length = 1000
             const request: GenerateAssistantResponseCommandInput = {
                 conversationState: {
                     currentMessage: {
                         userInputMessage: {
-                            content: 'a'.repeat(100_000),
+                            content: 'a'.repeat(contentLength),
                             userInputMessageContext: {
                                 editorState: {
                                     relevantDocuments: [
                                         {
                                             relativeFilePath: '',
-                                            text: 'a'.repeat(1000),
+                                            text: 'a'.repeat(relevantDoc1Length),
                                         },
                                         {
                                             relativeFilePath: '',
-                                            text: 'a'.repeat(1000),
+                                            text: 'a'.repeat(relevantDoc2Length),
                                         },
                                     ],
                                     document: {
                                         relativeFilePath: '',
-                                        text: 'a'.repeat(100_000),
+                                        text: 'a'.repeat(docLength),
                                     },
                                 },
                             },
@@ -1802,11 +1819,14 @@ describe('AgenticChatController', () => {
                 },
             }
             const result = chatController.truncateRequest(request)
-            assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.content?.length, 100_000)
+            assert.strictEqual(
+                request.conversationState?.currentMessage?.userInputMessage?.content?.length,
+                contentLength
+            )
             assert.strictEqual(
                 request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState
                     ?.document?.text?.length || 0,
-                100_000
+                docLength
             )
             assert.strictEqual(
                 request.conversationState?.currentMessage?.userInputMessage?.userInputMessageContext?.editorState
@@ -1814,15 +1834,20 @@ describe('AgenticChatController', () => {
                 2
             )
             assert.strictEqual(request.conversationState?.history?.length || 0, 3)
-            assert.strictEqual(result, 298000)
+            // Remaining budget = inputLimit - contentLength - relevantDoc1Length - relevantDoc2Length - docLength
+            // = 490_000 - 100_000 - 1000 - 1000 - 100_000 = 288_000
+            assert.strictEqual(result, inputLimit - contentLength - relevantDoc1Length - relevantDoc2Length - docLength)
         })
 
         it('should truncate images when they exceed budget', () => {
+            // Content that leaves small room for images
+            const contentLength = inputLimit - 6_600 // Leave room for small images but not large one
+            const smallImageChars = 3.3 // 1000 bytes * 3.3 / 1000
             const request: GenerateAssistantResponseCommandInput = {
                 conversationState: {
                     currentMessage: {
                         userInputMessage: {
-                            content: 'a'.repeat(493_400),
+                            content: 'a'.repeat(contentLength),
                             images: [
                                 {
                                     format: 'png',
@@ -1852,15 +1877,17 @@ describe('AgenticChatController', () => {
 
             // Should only keep the first and third images (small ones)
             assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.images?.length, 2)
-            assert.strictEqual(result, 500000 - 493400 - 3.3 - 3.3) // remaining budget after content and images
+            assert.strictEqual(result, inputLimit - contentLength - smallImageChars - smallImageChars) // remaining budget after content and images
         })
 
         it('should handle images without bytes', () => {
+            const contentLength = 400_000
+            const smallImageChars = 3.3 // 1000 bytes * 3.3 / 1000
             const request: GenerateAssistantResponseCommandInput = {
                 conversationState: {
                     currentMessage: {
                         userInputMessage: {
-                            content: 'a'.repeat(400_000),
+                            content: 'a'.repeat(contentLength),
                             images: [
                                 {
                                     format: 'png',
@@ -1884,7 +1911,7 @@ describe('AgenticChatController', () => {
 
             // Should keep both images since the first one has 0 chars
             assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.images?.length, 2)
-            assert.strictEqual(result, 500000 - 400000 - 3.3) // remaining budget after content and second image
+            assert.strictEqual(result, inputLimit - contentLength - smallImageChars) // remaining budget after content and second image
         })
 
         it('should truncate relevantDocuments and images together with equal priority', () => {
@@ -1925,7 +1952,7 @@ describe('AgenticChatController', () => {
                 1
             )
             assert.strictEqual(request.conversationState?.currentMessage?.userInputMessage?.images?.length, 1)
-            assert.strictEqual(result, 500000 - 400000 - 100 - 3.3)
+            assert.strictEqual(result, inputLimit - 400000 - 100 - 3.3)
         })
 
         it('should respect additionalContext order for mixed file and image truncation', () => {
@@ -2048,8 +2075,8 @@ describe('AgenticChatController', () => {
             assert.strictEqual(keptDoc?.relativeFilePath, 'file1.ts') // docs[0]
             assert.strictEqual(keptDoc?.text, 'a'.repeat(30_000))
 
-            // Remaining budget should be 20.5k (100k - 33k - 30k - 16.5k)
-            assert.strictEqual(result, 500000 - 400000 - 33000 - 30000 - 16500)
+            // Remaining budget = inputLimit - 400000 - 33000 - 30000 - 16500
+            assert.strictEqual(result, inputLimit - 400000 - 33000 - 30000 - 16500)
         })
     })
 
@@ -3001,6 +3028,59 @@ ${' '.repeat(8)}}
 
             setModelIdStub.restore()
         })
+
+        it('should recalculate token limits when model changes', () => {
+            const mockTabId = 'tab-1'
+            const initialModelId = 'model-1'
+            const newModelId = 'model-2'
+            const setModelIdStub = sinon.stub(ChatDatabase.prototype, 'setModelId')
+
+            // Mock getCachedModels to return models with different token limits
+            const cachedModels: Model[] = [
+                {
+                    id: 'model-1',
+                    name: 'Model 1',
+                    description: 'Test',
+                    tokenLimits: { maxInputTokens: 200000 },
+                },
+                {
+                    id: 'model-2',
+                    name: 'Model 2',
+                    description: 'Test',
+                    tokenLimits: { maxInputTokens: 300000 },
+                },
+            ]
+            const getCachedModelsStub = sinon.stub(ChatDatabase.prototype, 'getCachedModels').returns({
+                models: cachedModels,
+                defaultModelId: 'model-1',
+                timestamp: Date.now(),
+            })
+
+            // Create a session and set initial model
+            chatController.onTabAdd({ tabId: mockTabId })
+            const session = chatSessionManagementService.getSession(mockTabId).data!
+            session.modelId = initialModelId
+
+            // Get initial token limits (default 200K)
+            const initialLimits = session.tokenLimits
+            assert.strictEqual(initialLimits.maxInputTokens, 200000)
+
+            // Switch to a model with different token limits
+            chatController.onPromptInputOptionChange({
+                tabId: mockTabId,
+                optionsValues: { 'model-selection': newModelId },
+            })
+
+            // Verify token limits were recalculated based on new model's maxInputTokens (300K)
+            const newLimits = session.tokenLimits
+            assert.strictEqual(newLimits.maxInputTokens, 300000)
+            assert.strictEqual(newLimits.maxOverallCharacters, Math.floor(300000 * 3.5))
+            assert.strictEqual(newLimits.inputLimit, Math.floor(0.7 * newLimits.maxOverallCharacters))
+            assert.strictEqual(newLimits.compactionThreshold, Math.floor(0.7 * newLimits.maxOverallCharacters))
+
+            setModelIdStub.restore()
+            getCachedModelsStub.restore()
+        })
     })
 
     describe('onListAvailableModels', () => {
@@ -3137,11 +3217,13 @@ ${' '.repeat(8)}}
                             modelId: 'claude-3-sonnet',
                             modelName: 'Claude 3 Sonnet',
                             description: 'Advanced AI model',
+                            tokenLimits: { maxInputTokens: 200000 },
                         },
                         'claude-4-sonnet': {
                             modelId: 'claude-4-sonnet',
                             modelName: 'Claude 4 Sonnet',
                             description: 'Latest AI model',
+                            tokenLimits: { maxInputTokens: 300000 },
                         },
                     },
                     defaultModel: { modelId: 'claude-3-sonnet' },
@@ -3160,8 +3242,18 @@ ${' '.repeat(8)}}
                 assert.strictEqual(result.tabId, mockTabId)
                 assert.strictEqual(result.models.length, 2)
                 assert.deepStrictEqual(result.models, [
-                    { id: 'claude-3-sonnet', name: 'Claude 3 Sonnet', description: 'Advanced AI model' },
-                    { id: 'claude-4-sonnet', name: 'Claude 4 Sonnet', description: 'Latest AI model' },
+                    {
+                        id: 'claude-3-sonnet',
+                        name: 'Claude 3 Sonnet',
+                        description: 'Advanced AI model',
+                        tokenLimits: { maxInputTokens: 200000 },
+                    },
+                    {
+                        id: 'claude-4-sonnet',
+                        name: 'Claude 4 Sonnet',
+                        description: 'Latest AI model',
+                        tokenLimits: { maxInputTokens: 300000 },
+                    },
                 ])
 
                 // Verify cache was updated
diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts
index f9471c153f..a9543d336a 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts
@@ -167,6 +167,7 @@ import { ExecuteBash, ExecuteBashParams } from './tools/executeBash'
 import { ExplanatoryParams, InvokeOutput, ToolApprovalException } from './tools/toolShared'
 import { validatePathBasic, validatePathExists, validatePaths as validatePathsSync } from './utils/pathValidation'
 import { calculateModifiedLines } from './utils/fileModificationMetrics'
+import { TokenLimitsCalculator } from './utils/tokenLimitsCalculator'
 import { GrepSearch, SanitizedRipgrepOutput } from './tools/grepSearch'
 import { FileSearch, FileSearchParams, isFileSearchParams } from './tools/fileSearch'
 import { FsReplace, FsReplaceParams } from './tools/fsReplace'
@@ -175,7 +176,6 @@ import { diffLines } from 'diff'
 import {
     GENERIC_ERROR_MS,
     LOADING_THRESHOLD_MS,
-    GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT,
     OUTPUT_LIMIT_EXCEEDS_PARTIAL_MSG,
     RESPONSE_TIMEOUT_MS,
     RESPONSE_TIMEOUT_PARTIAL_MSG,
@@ -187,8 +187,6 @@ import {
     DEFAULT_WINDOW_REJECT_SHORTCUT,
     DEFAULT_MACOS_STOP_SHORTCUT,
     DEFAULT_WINDOW_STOP_SHORTCUT,
-    COMPACTION_CHARACTER_THRESHOLD,
-    MAX_OVERALL_CHARACTERS,
     FSREAD_MEMORY_BANK_MAX_PER_FILE,
     FSREAD_MEMORY_BANK_MAX_TOTAL,
 } from './constants/constants'
@@ -726,11 +724,18 @@ export class AgenticChatController implements ChatHandlers {
             // Wait for the response to be completed before proceeding
             this.#log('Model Response: ', JSON.stringify(responseResult, null, 2))
             if (responseResult.models) {
-                models = Object.values(responseResult.models).map(({ modelId, modelName, description }) => ({
-                    id: modelId ?? 'unknown',
-                    name: modelName ?? modelId ?? 'unknown',
-                    description: description ?? '',
-                }))
+                models = Object.values(responseResult.models).map(
+                    ({ modelId, modelName, description, tokenLimits }) => ({
+                        id: modelId ?? 'unknown',
+                        name: modelName ?? modelId ?? 'unknown',
+                        description: description ?? '',
+                        tokenLimits: tokenLimits
+                            ? {
+                                  maxInputTokens: tokenLimits.maxInputTokens,
+                              }
+                            : undefined,
+                    })
+                )
             }
             defaultModelId = responseResult.defaultModel?.modelId
 
@@ -775,6 +780,14 @@ export class AgenticChatController implements ChatHandlers {
 
         // Handle error cases by returning default model
         if (!success || errorFromAPI) {
+            // Even in error cases, calculate token limits from the default/fallback model
+            if (success && session) {
+                const fallbackModel = models.find(model => model.id === DEFAULT_MODEL_ID)
+                const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(fallbackModel)
+                const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens)
+                session.setTokenLimits(tokenLimits)
+                this.#log(`Token limits calculated for fallback model (error case): ${JSON.stringify(tokenLimits)}`)
+            }
             return {
                 tabId: params.tabId,
                 models: models,
@@ -818,6 +831,15 @@ export class AgenticChatController implements ChatHandlers {
         // Store the selected model in the session
         session.modelId = selectedModelId
 
+        // Extract maxInputTokens from the selected model and calculate token limits
+        const selectedModel = models.find(model => model.id === selectedModelId)
+        const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel)
+        const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens)
+        session.setTokenLimits(tokenLimits)
+        this.#log(
+            `Token limits calculated for initial model selection (${selectedModelId}): ${JSON.stringify(tokenLimits)}`
+        )
+
         return {
             tabId: params.tabId,
             models: models,
@@ -1195,8 +1217,8 @@ export class AgenticChatController implements ChatHandlers {
     /**
      * Runs the compaction, making requests and processing tool uses until completion
      */
-    #shouldCompact(currentRequestCount: number): boolean {
-        if (currentRequestCount > COMPACTION_CHARACTER_THRESHOLD) {
+    #shouldCompact(currentRequestCount: number, compactionThreshold: number): boolean {
+        if (currentRequestCount > compactionThreshold) {
             this.#debug(`Current request total character count is: ${currentRequestCount}, prompting user to compact`)
             return true
         } else {
@@ -1396,7 +1418,7 @@ export class AgenticChatController implements ChatHandlers {
                 throw new CancellationError('user')
             }
 
-            this.truncateRequest(currentRequestInput, additionalContext)
+            this.truncateRequest(currentRequestInput, additionalContext, session.tokenLimits.inputLimit)
             const currentMessage = currentRequestInput.conversationState?.currentMessage
             const conversationId = conversationIdentifier ?? ''
             if (!currentMessage || !conversationId) {
@@ -1672,13 +1694,17 @@ export class AgenticChatController implements ChatHandlers {
             currentRequestInput = this.#updateRequestInputWithToolResults(currentRequestInput, toolResults, content)
         }
 
-        if (this.#shouldCompact(currentRequestCount)) {
+        if (this.#shouldCompact(currentRequestCount, session.tokenLimits.compactionThreshold)) {
             this.#telemetryController.emitCompactNudge(
                 currentRequestCount,
                 this.#features.runtime.serverInfo.version ?? ''
             )
             const messageId = this.#getMessageIdForCompact(uuid())
-            const confirmationResult = this.#processCompactConfirmation(messageId, currentRequestCount)
+            const confirmationResult = this.#processCompactConfirmation(
+                messageId,
+                currentRequestCount,
+                session.tokenLimits.maxOverallCharacters
+            )
             const cachedButtonBlockId = await chatResultStream.writeResultBlock(confirmationResult)
             await this.waitForCompactApproval(messageId, chatResultStream, cachedButtonBlockId, session)
             // Get the compaction request input
@@ -1731,10 +1757,17 @@ export class AgenticChatController implements ChatHandlers {
      * performs truncation of request before sending to backend service.
      * Returns the remaining character budget for chat history.
      * @param request
+     * @param additionalContext
+     * @param inputLimit - The dynamic input limit from the session's token limits
      */
-    truncateRequest(request: ChatCommandInput, additionalContext?: AdditionalContentEntryAddition[]): number {
-        // TODO: Confirm if this limit applies to SendMessage and rename this constant
-        let remainingCharacterBudget = GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT
+    truncateRequest(
+        request: ChatCommandInput,
+        additionalContext?: AdditionalContentEntryAddition[],
+        inputLimit?: number
+    ): number {
+        // Use dynamic inputLimit from session, or fall back to default calculated value
+        const effectiveInputLimit = inputLimit ?? TokenLimitsCalculator.calculate().inputLimit
+        let remainingCharacterBudget = effectiveInputLimit
         if (!request?.conversationState?.currentMessage?.userInputMessage) {
             return remainingCharacterBudget
         }
@@ -1743,9 +1776,9 @@ export class AgenticChatController implements ChatHandlers {
         // 1. prioritize user input message
         let truncatedUserInputMessage = ''
         if (message) {
-            if (message.length > GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT) {
-                this.#debug(`Truncating userInputMessage to ${GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT} characters}`)
-                truncatedUserInputMessage = message.substring(0, GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT)
+            if (message.length > effectiveInputLimit) {
+                this.#debug(`Truncating userInputMessage to ${effectiveInputLimit} characters}`)
+                truncatedUserInputMessage = message.substring(0, effectiveInputLimit)
                 remainingCharacterBudget = remainingCharacterBudget - truncatedUserInputMessage.length
                 request.conversationState.currentMessage.userInputMessage.content = truncatedUserInputMessage
             } else {
@@ -2826,7 +2859,7 @@ export class AgenticChatController implements ChatHandlers {
         })
     }
 
-    #processCompactConfirmation(messageId: string, characterCount: number): ChatResult {
+    #processCompactConfirmation(messageId: string, characterCount: number, maxOverallCharacters: number): ChatResult {
         const buttons = [{ id: 'allow-tools', text: 'Allow', icon: 'ok', status: 'clear' }]
         const header = {
             icon: 'warning',
@@ -2834,7 +2867,7 @@ export class AgenticChatController implements ChatHandlers {
             body: COMPACTION_HEADER_BODY,
             buttons,
         } as any
-        const body = COMPACTION_BODY(Math.round((characterCount / MAX_OVERALL_CHARACTERS) * 100))
+        const body = COMPACTION_BODY(Math.round((characterCount / maxOverallCharacters) * 100))
         return {
             type: 'tool',
             messageId,
@@ -4643,8 +4676,19 @@ export class AgenticChatController implements ChatHandlers {
         }
 
         session.pairProgrammingMode = params.optionsValues['pair-programmer-mode'] === 'true'
-        session.modelId = params.optionsValues['model-selection']
+        const newModelId = params.optionsValues['model-selection']
+
+        // Recalculate token limits when model changes
+        if (newModelId && newModelId !== session.modelId) {
+            const cachedData = this.#chatHistoryDb.getCachedModels()
+            const selectedModel = cachedData?.models?.find(model => model.id === newModelId)
+            const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel)
+            const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens)
+            session.setTokenLimits(tokenLimits)
+            this.#log(`Token limits calculated for model switch (${newModelId}): ${JSON.stringify(tokenLimits)}`)
+        }
 
+        session.modelId = newModelId
         this.#chatHistoryDb.setModelId(session.modelId)
         this.#chatHistoryDb.setPairProgrammingMode(session.pairProgrammingMode)
     }
diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts
index fb710eae9b..901e08964b 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/constants.ts
@@ -10,14 +10,7 @@ export const RESPONSE_TIMEOUT_MS = 240_000
 export const SERVICE_MANAGER_TIMEOUT_MS = 10_000 //10 seconds
 export const SERVICE_MANAGER_POLL_INTERVAL_MS = 100
 
-// LLM Constants
-export const GENERATE_ASSISTANT_RESPONSE_INPUT_LIMIT = 500_000
-
 // Compaction
-// Maximum number of characters per request used for compaction prompt
-// 200K tokens * 3.5 = 700K characters, intentionally overestimating with 3.5:1 ratio
-export const MAX_OVERALL_CHARACTERS = 700_000
-export const COMPACTION_CHARACTER_THRESHOLD = 0.7 * MAX_OVERALL_CHARACTERS
 export const COMPACTION_BODY = (threshold: number) =>
     `The context window is almost full (${threshold}%) and exceeding it will clear your history. Amazon Q can compact your history instead.`
 export const COMPACTION_HEADER_BODY = 'Compact chat history?'
diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts
index 9e9927b10c..0bf0ea0693 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/constants/modelSelection.ts
@@ -10,12 +10,14 @@ export enum BedrockModel {
 type ModelDetails = {
     label: string
     description: string
+    maxInputTokens: number
 }
 
 export const FALLBACK_MODEL_RECORD: Record<BedrockModel, ModelDetails> = {
     [BedrockModel.CLAUDE_SONNET_4_20250514_V1_0]: {
         label: 'Claude Sonnet 4',
         description: 'Hybrid reasoning and coding for regular use',
+        maxInputTokens: 200_000,
     },
 }
 
@@ -24,9 +26,12 @@ export const BEDROCK_MODEL_TO_MODEL_ID: Record<BedrockModel, string> = {
 }
 
 export const FALLBACK_MODEL_OPTIONS: ListAvailableModelsResult['models'] = Object.entries(FALLBACK_MODEL_RECORD).map(
-    ([value, { label, description }]) => ({
+    ([value, { label, description, maxInputTokens }]) => ({
         id: value,
         name: label,
         description: description,
+        tokenLimits: {
+            maxInputTokens: maxInputTokens,
+        },
     })
 )
diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts
new file mode 100644
index 0000000000..abb0184986
--- /dev/null
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.test.ts
@@ -0,0 +1,150 @@
+/**
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import * as assert from 'assert'
+import {
+    TokenLimitsCalculator,
+    TOKENS_TO_CHARACTERS_RATIO,
+    INPUT_LIMIT_RATIO,
+    COMPACTION_THRESHOLD_RATIO,
+    DEFAULT_MAX_INPUT_TOKENS,
+} from './tokenLimitsCalculator'
+import { FALLBACK_MODEL_OPTIONS } from '../constants/modelSelection'
+
+describe('TokenLimitsCalculator', () => {
+    describe('calculate()', () => {
+        /**
+         * **Feature: dynamic-token-limits, Property 1: Character calculation consistency**
+         * **Validates: Requirements 1.2**
+         */
+        it('should calculate maxOverallCharacters as Math.floor(maxInputTokens * 3.5)', () => {
+            const testCases = [1, 100, 1000, 200_000, 500_000, 1_000_000]
+
+            for (const maxInputTokens of testCases) {
+                const result = TokenLimitsCalculator.calculate(maxInputTokens)
+                const expected = Math.floor(maxInputTokens * TOKENS_TO_CHARACTERS_RATIO)
+
+                assert.strictEqual(
+                    result.maxOverallCharacters,
+                    expected,
+                    `For maxInputTokens=${maxInputTokens}, expected maxOverallCharacters=${expected} but got ${result.maxOverallCharacters}`
+                )
+            }
+        })
+
+        /**
+         * **Feature: dynamic-token-limits, Property 2: Input limit calculation consistency**
+         * **Validates: Requirements 1.3, 1.4**
+         */
+        it('should calculate inputLimit and compactionThreshold as Math.floor(0.7 * maxOverallCharacters)', () => {
+            const testCases = [1, 100, 1000, 200_000, 500_000, 1_000_000]
+
+            for (const maxInputTokens of testCases) {
+                const result = TokenLimitsCalculator.calculate(maxInputTokens)
+                const expectedInputLimit = Math.floor(INPUT_LIMIT_RATIO * result.maxOverallCharacters)
+                const expectedCompactionThreshold = Math.floor(COMPACTION_THRESHOLD_RATIO * result.maxOverallCharacters)
+
+                assert.strictEqual(
+                    result.inputLimit,
+                    expectedInputLimit,
+                    `For maxInputTokens=${maxInputTokens}, expected inputLimit=${expectedInputLimit} but got ${result.inputLimit}`
+                )
+
+                assert.strictEqual(
+                    result.compactionThreshold,
+                    expectedCompactionThreshold,
+                    `For maxInputTokens=${maxInputTokens}, expected compactionThreshold=${expectedCompactionThreshold} but got ${result.compactionThreshold}`
+                )
+            }
+        })
+
+        it('should use DEFAULT_MAX_INPUT_TOKENS when no argument is provided', () => {
+            const result = TokenLimitsCalculator.calculate()
+
+            assert.strictEqual(result.maxInputTokens, DEFAULT_MAX_INPUT_TOKENS)
+            assert.strictEqual(
+                result.maxOverallCharacters,
+                Math.floor(DEFAULT_MAX_INPUT_TOKENS * TOKENS_TO_CHARACTERS_RATIO)
+            )
+        })
+
+        it('should return correct default values for 200K tokens', () => {
+            const result = TokenLimitsCalculator.calculate(200_000)
+            const expectedMaxOverallCharacters = Math.floor(200_000 * TOKENS_TO_CHARACTERS_RATIO)
+            const expectedInputLimit = Math.floor(INPUT_LIMIT_RATIO * expectedMaxOverallCharacters)
+            const expectedCompactionThreshold = Math.floor(COMPACTION_THRESHOLD_RATIO * expectedMaxOverallCharacters)
+
+            assert.strictEqual(result.maxInputTokens, 200_000)
+            assert.strictEqual(result.maxOverallCharacters, expectedMaxOverallCharacters)
+            assert.strictEqual(result.inputLimit, expectedInputLimit)
+            assert.strictEqual(result.compactionThreshold, expectedCompactionThreshold)
+        })
+    })
+
+    describe('extractMaxInputTokens()', () => {
+        /**
+         * **Feature: dynamic-token-limits, Property 3: Default fallback consistency**
+         * **Validates: Requirements 2.1, 2.3**
+         */
+        it('should return DEFAULT_MAX_INPUT_TOKENS for undefined model', () => {
+            const result = TokenLimitsCalculator.extractMaxInputTokens(undefined)
+            assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS)
+        })
+
+        it('should return DEFAULT_MAX_INPUT_TOKENS for model without tokenLimits', () => {
+            const result = TokenLimitsCalculator.extractMaxInputTokens({})
+            assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS)
+        })
+
+        it('should return DEFAULT_MAX_INPUT_TOKENS for model with undefined tokenLimits', () => {
+            const result = TokenLimitsCalculator.extractMaxInputTokens({ tokenLimits: undefined })
+            assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS)
+        })
+
+        it('should return DEFAULT_MAX_INPUT_TOKENS for model with tokenLimits but undefined maxInputTokens', () => {
+            const result = TokenLimitsCalculator.extractMaxInputTokens({ tokenLimits: {} })
+            assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS)
+        })
+
+        it('should return DEFAULT_MAX_INPUT_TOKENS for model with tokenLimits but null maxInputTokens', () => {
+            const result = TokenLimitsCalculator.extractMaxInputTokens({
+                tokenLimits: { maxInputTokens: null as unknown as undefined },
+            })
+            assert.strictEqual(result, DEFAULT_MAX_INPUT_TOKENS)
+        })
+
+        it('should return the actual maxInputTokens when provided', () => {
+            const result = TokenLimitsCalculator.extractMaxInputTokens({
+                tokenLimits: { maxInputTokens: 500_000 },
+            })
+            assert.strictEqual(result, 500_000)
+        })
+    })
+
+    describe('FALLBACK_MODEL_OPTIONS', () => {
+        /**
+         * Verify FALLBACK_MODEL_OPTIONS includes tokenLimits.maxInputTokens of 200,000
+         * **Validates: Requirements 2.4**
+         */
+        it('should include tokenLimits.maxInputTokens of 200,000 for all fallback models', () => {
+            assert.ok(FALLBACK_MODEL_OPTIONS.length > 0, 'FALLBACK_MODEL_OPTIONS should contain at least one model')
+
+            for (const model of FALLBACK_MODEL_OPTIONS) {
+                const modelWithTokenLimits = model as typeof model & {
+                    tokenLimits?: { maxInputTokens?: number }
+                }
+                assert.ok(
+                    modelWithTokenLimits.tokenLimits !== undefined,
+                    `Model ${model.id} should have tokenLimits defined`
+                )
+                assert.strictEqual(
+                    modelWithTokenLimits.tokenLimits?.maxInputTokens,
+                    DEFAULT_MAX_INPUT_TOKENS,
+                    `Model ${model.id} should have tokenLimits.maxInputTokens of ${DEFAULT_MAX_INPUT_TOKENS}`
+                )
+            }
+        })
+    })
+})
diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts
new file mode 100644
index 0000000000..ff53e2ee40
--- /dev/null
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/utils/tokenLimitsCalculator.ts
@@ -0,0 +1,64 @@
+/**
+ * Token limits calculator for dynamic LLM context window management.
+ *
+ * This utility calculates character limits based on the maxInputTokens value
+ * returned from the listAvailableModels API, replacing hardcoded constants.
+ */
+
+/**
+ * Interface representing calculated token and character limits for a model.
+ */
+export interface TokenLimits {
+    /** Raw token limit from API (default: 200,000) */
+    maxInputTokens: number
+    /** Maximum character count for overall context window: maxInputTokens * 3.5 */
+    maxOverallCharacters: number
+    /** Input character limit for assistant responses: 0.7 * maxOverallCharacters */
+    inputLimit: number
+    /** Threshold at which compaction is triggered: 0.7 * maxOverallCharacters */
+    compactionThreshold: number
+}
+
+/** Default maximum input tokens when API doesn't provide a value */
+export const DEFAULT_MAX_INPUT_TOKENS = 200_000
+
+/** Ratio for converting tokens to characters (approximately 3.5 characters per token) */
+export const TOKENS_TO_CHARACTERS_RATIO = 3.5
+
+/** Ratio of max overall characters used for input limit */
+export const INPUT_LIMIT_RATIO = 0.7
+
+/** Ratio of max overall characters used for compaction threshold */
+export const COMPACTION_THRESHOLD_RATIO = 0.7
+
+/**
+ * Utility class for calculating token and character limits based on model capabilities.
+ */
+export class TokenLimitsCalculator {
+    /**
+     * Calculate character limits from maxInputTokens
+     * @param maxInputTokens - The maximum input tokens from the model, defaults to 200K
+     * @returns TokenLimits object with all calculated values
+     */
+    static calculate(maxInputTokens: number = DEFAULT_MAX_INPUT_TOKENS): TokenLimits {
+        const maxOverallCharacters = Math.floor(maxInputTokens * TOKENS_TO_CHARACTERS_RATIO)
+        const inputLimit = Math.floor(INPUT_LIMIT_RATIO * maxOverallCharacters)
+        const compactionThreshold = Math.floor(COMPACTION_THRESHOLD_RATIO * maxOverallCharacters)
+
+        return {
+            maxInputTokens,
+            maxOverallCharacters,
+            inputLimit,
+            compactionThreshold,
+        }
+    }
+
+    /**
+     * Extract maxInputTokens from API response with fallback
+     * @param model - Model object from listAvailableModels response
+     * @returns maxInputTokens value or default (200,000)
+     */
+    static extractMaxInputTokens(model?: { tokenLimits?: { maxInputTokens?: number } }): number {
+        return model?.tokenLimits?.maxInputTokens ?? DEFAULT_MAX_INPUT_TOKENS
+    }
+}
diff --git a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts
index bb67a8aed0..4d74756320 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts
@@ -17,6 +17,7 @@ import { enabledModelSelection } from '../../shared/utils'
 import { QErrorTransformer } from '../agenticChat/retry/errorTransformer'
 import { DelayNotification } from '../agenticChat/retry/delayInterceptor'
 import { MAX_REQUEST_ATTEMPTS } from '../agenticChat/constants/constants'
+import { TokenLimits, TokenLimitsCalculator } from '../agenticChat/utils/tokenLimitsCalculator'
 
 export type ChatSessionServiceConfig = CodeWhispererStreamingClientConfig
 type FileChange = { before?: string; after?: string }
@@ -47,6 +48,7 @@ export class ChatSessionService {
     #logging?: Logging
     #origin?: Origin
     #errorTransformer: QErrorTransformer
+    #tokenLimits: TokenLimits
 
     public getConversationType(): string {
         return this.#conversationType
@@ -138,6 +140,24 @@ export class ChatSessionService {
 
         // Initialize Q-specific error transformation
         this.#errorTransformer = new QErrorTransformer(logging, () => this.isModelSelectionEnabled())
+
+        // Initialize token limits with default values
+        this.#tokenLimits = TokenLimitsCalculator.calculate()
+    }
+
+    /**
+     * Gets the token limits for this session
+     */
+    public get tokenLimits(): TokenLimits {
+        return this.#tokenLimits
+    }
+
+    /**
+     * Sets the token limits for this session
+     * @param limits The token limits to set
+     */
+    public setTokenLimits(limits: TokenLimits): void {
+        this.#tokenLimits = limits
     }
 
     public async sendMessage(request: SendMessageCommandInput): Promise<SendMessageCommandOutput> {

From 8874dbdaac4c1a5be082a6084e51d819f00f8010 Mon Sep 17 00:00:00 2001
From: Jacob Chung <chungjac@amazon.com>
Date: Tue, 2 Dec 2025 11:30:37 -0800
Subject: [PATCH 2/3] fix: dependency issues

---
 app/aws-lsp-partiql-runtimes/package.json     |  2 +-
 chat-client/package.json                      |  2 +-
 package-lock.json                             | 67 ++-----------------
 .../agenticChat/tools/mcp/mcpOauthClient.ts   |  1 -
 4 files changed, 8 insertions(+), 64 deletions(-)

diff --git a/app/aws-lsp-partiql-runtimes/package.json b/app/aws-lsp-partiql-runtimes/package.json
index 2c958f1945..36841189f7 100644
--- a/app/aws-lsp-partiql-runtimes/package.json
+++ b/app/aws-lsp-partiql-runtimes/package.json
@@ -11,7 +11,7 @@
         "package": "npm run compile && npm run compile:webpack"
     },
     "dependencies": {
-        "@aws/language-server-runtimes": "0.3.8",
+        "@aws/language-server-runtimes": "^0.3.8",
         "@aws/lsp-partiql": "0.0.20"
     },
     "devDependencies": {
diff --git a/chat-client/package.json b/chat-client/package.json
index a125bea5fd..7ca2bc9c87 100644
--- a/chat-client/package.json
+++ b/chat-client/package.json
@@ -26,7 +26,7 @@
     "dependencies": {
         "@aws/chat-client-ui-types": "0.1.68",
         "@aws/language-server-runtimes": "^0.3.8",
-        "@aws/language-server-runtimes-types": "0.1.62",
+        "@aws/language-server-runtimes-types": "^0.1.63",
         "@aws/mynah-ui": "^4.38.0"
     },
     "devDependencies": {
diff --git a/package-lock.json b/package-lock.json
index abe58d3538..f55eb416fc 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -157,7 +157,7 @@
             "name": "@aws/lsp-partiql-runtimes",
             "version": "0.0.1",
             "dependencies": {
-                "@aws/language-server-runtimes": "0.3.8",
+                "@aws/language-server-runtimes": "^0.3.8",
                 "@aws/lsp-partiql": "0.0.20"
             },
             "devDependencies": {
@@ -278,7 +278,7 @@
             "dependencies": {
                 "@aws/chat-client-ui-types": "0.1.68",
                 "@aws/language-server-runtimes": "^0.3.8",
-                "@aws/language-server-runtimes-types": "0.1.62",
+                "@aws/language-server-runtimes-types": "^0.1.63",
                 "@aws/mynah-ui": "^4.38.0"
             },
             "devDependencies": {
@@ -5627,9 +5627,9 @@
             }
         },
         "node_modules/@aws/language-server-runtimes-types": {
-            "version": "0.1.62",
-            "resolved": "https://registry.npmjs.org/@aws/language-server-runtimes-types/-/language-server-runtimes-types-0.1.62.tgz",
-            "integrity": "sha512-d/RSCZZzniaNeME+iM47l9Xx66vFvlQqGyLaWA5vFyKU0FkhN8/6CjPV4C4lxh3s8H4qOGsHm1w0y7t+zTgu4g==",
+            "version": "0.1.63",
+            "resolved": "https://registry.npmjs.org/@aws/language-server-runtimes-types/-/language-server-runtimes-types-0.1.63.tgz",
+            "integrity": "sha512-0Aeh0rQF4nOWXB0IlvroBoldlDaXsMvrZ4Ec3zgaU8wqlnh+WSDJiVPTgB1zCqPbDNybZxh7Z8nGh133hxk+FA==",
             "license": "Apache-2.0",
             "dependencies": {
                 "vscode-languageserver-textdocument": "^1.0.12",
@@ -5823,7 +5823,6 @@
             "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@babel/code-frame": "^7.27.1",
                 "@babel/generator": "^7.28.5",
@@ -6704,7 +6703,6 @@
                 }
             ],
             "license": "MIT",
-            "peer": true,
             "engines": {
                 "node": ">=18"
             },
@@ -6728,7 +6726,6 @@
                 }
             ],
             "license": "MIT",
-            "peer": true,
             "engines": {
                 "node": ">=18"
             }
@@ -9103,7 +9100,6 @@
             "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
             "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
             "license": "Apache-2.0",
-            "peer": true,
             "engines": {
                 "node": ">=8.0.0"
             }
@@ -11436,7 +11432,6 @@
             "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.1.tgz",
             "integrity": "sha512-LCCV0HdSZZZb34qifBsyWlUmok6W7ouER+oQIGBScS8EsZsQbrtFTUrDX4hOl+CS6p7cnNC4td+qrSVGSCTUfQ==",
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "undici-types": "~6.21.0"
             }
@@ -11667,7 +11662,6 @@
             "integrity": "sha512-fe0rz9WJQ5t2iaLfdbDc9T80GJy0AeO453q8C3YCilnGozvOyCG5t+EZtg7j7D88+c3FipfP/x+wzGnh1xp8ZA==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@eslint-community/regexpp": "^4.10.0",
                 "@typescript-eslint/scope-manager": "8.47.0",
@@ -11698,7 +11692,6 @@
             "integrity": "sha512-lJi3PfxVmo0AkEY93ecfN+r8SofEqZNGByvHAI3GBLrvt1Cw6H5k1IM02nSzu0RfUafr2EvFSw0wAsZgubNplQ==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@typescript-eslint/scope-manager": "8.47.0",
                 "@typescript-eslint/types": "8.47.0",
@@ -12188,7 +12181,6 @@
             "integrity": "sha512-i38o7wlipLllNrk2hzdDfAmk6nrqm3lR2MtAgWgtHbwznZAKkB84KpkNFfmUXw5Kg3iP1zKlSjwZpKqenuLc+Q==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "engines": {
                 "node": ">=18.20.0"
             },
@@ -12243,7 +12235,6 @@
             "integrity": "sha512-HdzDrRs+ywAqbXGKqe1i/bLtCv47plz4TvsHFH3j729OooT5VH38ctFn5aLXgECmiAKDkmH/A6kOq2Zh5DIxww==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "chalk": "^5.1.2",
                 "loglevel": "^1.6.0",
@@ -13110,7 +13101,6 @@
             "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
             "devOptional": true,
             "license": "MIT",
-            "peer": true,
             "bin": {
                 "acorn": "bin/acorn"
             },
@@ -13177,7 +13167,6 @@
             "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
             "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "fast-deep-equal": "^3.1.3",
                 "fast-uri": "^3.0.1",
@@ -14098,7 +14087,6 @@
             "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
             "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
             "license": "Apache-2.0",
-            "peer": true,
             "peerDependencies": {
                 "bare-abort-controller": "*"
             },
@@ -14630,7 +14618,6 @@
                 }
             ],
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "baseline-browser-mapping": "^2.8.25",
                 "caniuse-lite": "^1.0.30001754",
@@ -14969,7 +14956,6 @@
             "integrity": "sha512-RITGBfijLkBddZvnn8jdqoTypxvqbOLYQkGGxXzeFjVHvudaPw0HNFD9x928/eUwYWd2dPCugVqspGALTZZQKw==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "assertion-error": "^1.1.0",
                 "check-error": "^1.0.3",
@@ -15894,7 +15880,6 @@
             "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "env-paths": "^2.2.1",
                 "import-fresh": "^3.3.0",
@@ -17781,7 +17766,6 @@
             "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@eslint-community/eslint-utils": "^4.2.0",
                 "@eslint-community/regexpp": "^4.6.1",
@@ -18443,7 +18427,6 @@
             "integrity": "sha512-/XxRRR90gNSuNf++w1jOQjhC5LE9Ixf/iAQctVb/miEI3dwzPZTuG27/omoh5REfSLDoPXofM84vAH/ULtz35g==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@vitest/snapshot": "^3.2.4",
                 "deep-eql": "^5.0.2",
@@ -18750,7 +18733,6 @@
             "resolved": "https://registry.npmjs.org/express/-/express-5.1.0.tgz",
             "integrity": "sha512-DT9ck5YIRU+8GYzzU5kT3eHGA5iL+1Zd0EutOmTE9Dtk+Tvuzd23VBU+ec7HPNSTxXYO55gPV/hq4pSBJDjFpA==",
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "accepts": "^2.0.0",
                 "body-parser": "^2.2.0",
@@ -21619,7 +21601,6 @@
             "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@jest/core": "^29.7.0",
                 "@jest/types": "^29.6.3",
@@ -24041,7 +24022,6 @@
             "integrity": "sha512-mTT6RgopEYABzXWFx+GcJ+ZQ32kp4fMf0xvpZIIfSq9Z8lC/++MtcCnQ9t5FP2veYEP95FIYSvW+U9fV4xrlig==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "browser-stdout": "^1.3.1",
                 "chokidar": "^4.0.1",
@@ -26177,7 +26157,6 @@
             "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "bin": {
                 "prettier": "bin/prettier.cjs"
             },
@@ -29838,7 +29817,6 @@
             "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@cspotcode/source-map-support": "^0.8.0",
                 "@tsconfig/node10": "^1.0.7",
@@ -30027,39 +30005,11 @@
                 "node": ">=4"
             }
         },
-        "node_modules/tsconfig-paths": {
-            "version": "4.2.0",
-            "resolved": "https://registry.npmjs.org/tsconfig-paths/-/tsconfig-paths-4.2.0.tgz",
-            "integrity": "sha512-NoZ4roiN7LnbKn9QqE1amc9DJfzvZXxF4xDavcOWt1BPkdx+m+0gJuPM+S0vCe7zTJMYUP0R8pO2XMr+Y8oLIg==",
-            "dev": true,
-            "license": "MIT",
-            "optional": true,
-            "dependencies": {
-                "json5": "^2.2.2",
-                "minimist": "^1.2.6",
-                "strip-bom": "^3.0.0"
-            },
-            "engines": {
-                "node": ">=6"
-            }
-        },
-        "node_modules/tsconfig-paths/node_modules/strip-bom": {
-            "version": "3.0.0",
-            "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
-            "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==",
-            "dev": true,
-            "license": "MIT",
-            "optional": true,
-            "engines": {
-                "node": ">=4"
-            }
-        },
         "node_modules/tslib": {
             "version": "2.8.1",
             "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
             "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
-            "license": "0BSD",
-            "peer": true
+            "license": "0BSD"
         },
         "node_modules/tsx": {
             "version": "4.20.6",
@@ -30225,7 +30175,6 @@
             "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
             "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
             "license": "Apache-2.0",
-            "peer": true,
             "bin": {
                 "tsc": "bin/tsc",
                 "tsserver": "bin/tsserver"
@@ -30929,7 +30878,6 @@
             "integrity": "sha512-QVM/asb5sDESz37ow/BAOA0z2HtUJsuAjPKHdw+Vx92PaQP3EfHwTgxK2T5rgwa0WRNh+c+n/0nEqIvqBl01sA==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@types/node": "^20.11.30",
                 "@types/sinonjs__fake-timers": "^8.1.5",
@@ -31015,7 +30963,6 @@
             "integrity": "sha512-HU1JOuV1OavsZ+mfigY0j8d1TgQgbZ6M+J75zDkpEAwYeXjWSqrGJtgnPblJjd/mAyTNQ7ygw0MiKOn6etz8yw==",
             "devOptional": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@types/eslint-scope": "^3.7.7",
                 "@types/estree": "^1.0.8",
@@ -31065,7 +31012,6 @@
             "integrity": "sha512-MfwFQ6SfwinsUVi0rNJm7rHZ31GyTcpVE5pgVA3hwFRb7COD4TzjUUwhGWKfO50+xdc2MQPuEBBJoqIMGt3JDw==",
             "dev": true,
             "license": "MIT",
-            "peer": true,
             "dependencies": {
                 "@discoveryjs/json-ext": "^0.6.1",
                 "@webpack-cli/configtest": "^3.0.1",
@@ -32529,7 +32475,6 @@
             "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
             "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
             "license": "MIT",
-            "peer": true,
             "funding": {
                 "url": "https://github.com/sponsors/colinhacks"
             }
diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts
index 2e207c449e..82851a2fb9 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/tools/mcp/mcpOauthClient.ts
@@ -3,7 +3,6 @@
  * All Rights Reserved. SPDX-License-Identifier: Apache-2.0
  */
 
-import type { RequestInit } from 'node-fetch'
 import * as crypto from 'crypto'
 import * as path from 'path'
 import { spawn } from 'child_process'

From 29b84f002b6499e9be77020cccecb2a4d539ac42 Mon Sep 17 00:00:00 2001
From: Jacob Chung <chungjac@amazon.com>
Date: Fri, 5 Dec 2025 13:05:23 -0800
Subject: [PATCH 3/3] refactor: encapsulate model ID and token limits in
 session

---
 .../agenticChat/agenticChatController.test.ts |  8 ++--
 .../agenticChat/agenticChatController.ts      | 35 ++++++-----------
 .../chat/chatSessionService.test.ts           | 39 +++++++++++++++++++
 .../chat/chatSessionService.ts                | 22 ++++++++---
 4 files changed, 72 insertions(+), 32 deletions(-)

diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts
index 6004826534..749e889aad 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.test.ts
@@ -3059,7 +3059,7 @@ ${' '.repeat(8)}}
             // Create a session and set initial model
             chatController.onTabAdd({ tabId: mockTabId })
             const session = chatSessionManagementService.getSession(mockTabId).data!
-            session.modelId = initialModelId
+            session.setModel(initialModelId, cachedModels)
 
             // Get initial token limits (default 200K)
             const initialLimits = session.tokenLimits
@@ -3137,7 +3137,7 @@ ${' '.repeat(8)}}
                 getCachedModelsStub.returns(cachedData)
 
                 const session = chatSessionManagementService.getSession(mockTabId).data!
-                session.modelId = 'model1'
+                session.setModel('model1', cachedData.models)
 
                 const result = await chatController.onListAvailableModels({ tabId: mockTabId })
 
@@ -3324,7 +3324,7 @@ ${' '.repeat(8)}}
 
             it('should use defaultModelId from cache when session has no modelId', async () => {
                 const session = chatSessionManagementService.getSession(mockTabId).data!
-                session.modelId = undefined
+                session.setModel(undefined, undefined)
 
                 const result = await chatController.onListAvailableModels({ tabId: mockTabId })
 
@@ -3341,7 +3341,7 @@ ${' '.repeat(8)}}
                 })
 
                 const session = chatSessionManagementService.getSession(mockTabId).data!
-                session.modelId = undefined
+                session.setModel(undefined, undefined)
 
                 const result = await chatController.onListAvailableModels({ tabId: mockTabId })
 
diff --git a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts
index a9543d336a..543a640378 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/agenticChat/agenticChatController.ts
@@ -780,13 +780,12 @@ export class AgenticChatController implements ChatHandlers {
 
         // Handle error cases by returning default model
         if (!success || errorFromAPI) {
-            // Even in error cases, calculate token limits from the default/fallback model
+            // Even in error cases, set the model with token limits
             if (success && session) {
-                const fallbackModel = models.find(model => model.id === DEFAULT_MODEL_ID)
-                const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(fallbackModel)
-                const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens)
-                session.setTokenLimits(tokenLimits)
-                this.#log(`Token limits calculated for fallback model (error case): ${JSON.stringify(tokenLimits)}`)
+                session.setModel(DEFAULT_MODEL_ID, models)
+                this.#log(
+                    `Model set for fallback (error case): ${DEFAULT_MODEL_ID}, tokenLimits: ${JSON.stringify(session.tokenLimits)}`
+                )
             }
             return {
                 tabId: params.tabId,
@@ -828,16 +827,10 @@ export class AgenticChatController implements ChatHandlers {
             selectedModelId = defaultModelId || getMappedModelId(DEFAULT_MODEL_ID)
         }
 
-        // Store the selected model in the session
-        session.modelId = selectedModelId
-
-        // Extract maxInputTokens from the selected model and calculate token limits
-        const selectedModel = models.find(model => model.id === selectedModelId)
-        const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel)
-        const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens)
-        session.setTokenLimits(tokenLimits)
+        // Store the selected model in the session (automatically calculates token limits)
+        session.setModel(selectedModelId, models)
         this.#log(
-            `Token limits calculated for initial model selection (${selectedModelId}): ${JSON.stringify(tokenLimits)}`
+            `Model set for initial selection: ${selectedModelId}, tokenLimits: ${JSON.stringify(session.tokenLimits)}`
         )
 
         return {
@@ -4678,17 +4671,13 @@ export class AgenticChatController implements ChatHandlers {
         session.pairProgrammingMode = params.optionsValues['pair-programmer-mode'] === 'true'
         const newModelId = params.optionsValues['model-selection']
 
-        // Recalculate token limits when model changes
-        if (newModelId && newModelId !== session.modelId) {
+        // Set model (automatically recalculates token limits)
+        if (newModelId !== session.modelId) {
             const cachedData = this.#chatHistoryDb.getCachedModels()
-            const selectedModel = cachedData?.models?.find(model => model.id === newModelId)
-            const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(selectedModel)
-            const tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens)
-            session.setTokenLimits(tokenLimits)
-            this.#log(`Token limits calculated for model switch (${newModelId}): ${JSON.stringify(tokenLimits)}`)
+            session.setModel(newModelId, cachedData?.models)
+            this.#log(`Model set for model switch: ${newModelId}, tokenLimits: ${JSON.stringify(session.tokenLimits)}`)
         }
 
-        session.modelId = newModelId
         this.#chatHistoryDb.setModelId(session.modelId)
         this.#chatHistoryDb.setPairProgrammingMode(session.pairProgrammingMode)
     }
diff --git a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts
index bc776c2f85..6cc86b31c6 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.test.ts
@@ -329,6 +329,45 @@ describe('Chat Session Service', () => {
         })
     })
 
+    describe('setModel encapsulation', () => {
+        let chatSessionService: ChatSessionService
+
+        beforeEach(() => {
+            chatSessionService = new ChatSessionService()
+        })
+
+        it('should initialize with undefined modelId and default token limits', () => {
+            assert.strictEqual(chatSessionService.modelId, undefined)
+            assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 200_000)
+        })
+
+        it('should set modelId and calculate token limits together', () => {
+            const models = [
+                { id: 'model-1', name: 'Model 1', description: 'Test', tokenLimits: { maxInputTokens: 300_000 } },
+            ]
+
+            chatSessionService.setModel('model-1', models)
+
+            assert.strictEqual(chatSessionService.modelId, 'model-1')
+            assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 300_000)
+            assert.strictEqual(chatSessionService.tokenLimits.maxOverallCharacters, Math.floor(300_000 * 3.5))
+        })
+
+        it('should use default token limits when model not found in list', () => {
+            chatSessionService.setModel('unknown-model', [])
+
+            assert.strictEqual(chatSessionService.modelId, 'unknown-model')
+            assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 200_000)
+        })
+
+        it('should use default token limits when models list is undefined', () => {
+            chatSessionService.setModel('some-model', undefined)
+
+            assert.strictEqual(chatSessionService.modelId, 'some-model')
+            assert.strictEqual(chatSessionService.tokenLimits.maxInputTokens, 200_000)
+        })
+    })
+
     describe('IAM client source property', () => {
         it('sets source to Origin.IDE when using StreamingClientServiceIAM', async () => {
             const codeWhispererStreamingClientIAM = stubInterface<StreamingClientServiceIAM>()
diff --git a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts
index 4d74756320..8f1db91187 100644
--- a/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts
+++ b/server/aws-lsp-codewhisperer/src/language-server/chat/chatSessionService.ts
@@ -18,6 +18,7 @@ import { QErrorTransformer } from '../agenticChat/retry/errorTransformer'
 import { DelayNotification } from '../agenticChat/retry/delayInterceptor'
 import { MAX_REQUEST_ATTEMPTS } from '../agenticChat/constants/constants'
 import { TokenLimits, TokenLimitsCalculator } from '../agenticChat/utils/tokenLimitsCalculator'
+import { Model } from '@aws/language-server-runtimes/protocol'
 
 export type ChatSessionServiceConfig = CodeWhispererStreamingClientConfig
 type FileChange = { before?: string; after?: string }
@@ -29,8 +30,8 @@ type DeferredHandler = {
 export class ChatSessionService {
     public pairProgrammingMode: boolean = true
     public contextListSent: boolean = false
-    public modelId: string | undefined
     public isMemoryBankGeneration: boolean = false
+    #modelId: string | undefined
     #lsp?: Features['lsp']
     #abortController?: AbortController
     #currentPromptId?: string
@@ -145,6 +146,13 @@ export class ChatSessionService {
         this.#tokenLimits = TokenLimitsCalculator.calculate()
     }
 
+    /**
+     * Gets the model ID for this session
+     */
+    public get modelId(): string | undefined {
+        return this.#modelId
+    }
+
     /**
      * Gets the token limits for this session
      */
@@ -153,11 +161,15 @@ export class ChatSessionService {
     }
 
     /**
-     * Sets the token limits for this session
-     * @param limits The token limits to set
+     * Sets the model for this session, automatically calculating token limits.
+     * This encapsulates model ID and token limits as a single entity.
+     * @param modelId The model ID to set
+     * @param models Optional list of available models to look up token limits from
      */
-    public setTokenLimits(limits: TokenLimits): void {
-        this.#tokenLimits = limits
+    public setModel(modelId: string | undefined, models?: Model[]): void {
+        this.#modelId = modelId
+        const maxInputTokens = TokenLimitsCalculator.extractMaxInputTokens(models?.find(m => m.id === modelId))
+        this.#tokenLimits = TokenLimitsCalculator.calculate(maxInputTokens)
     }
 
     public async sendMessage(request: SendMessageCommandInput): Promise<SendMessageCommandOutput> {