11import { createOpenAI } from '@ai-sdk/openai'
2+ import { Ratelimit } from '@upstash/ratelimit'
3+ import { kv } from '@vercel/kv'
24import { ToolInvocation , convertToCoreMessages , streamText } from 'ai'
35import { codeBlock } from 'common-tags'
46import { convertToCoreTools , maxMessageContext , maxRowLimit , tools } from '~/lib/tools'
7+ import { createClient } from '~/utils/supabase/server'
58
69// Allow streaming responses up to 30 seconds
710export const maxDuration = 30
811
12+ const inputTokenRateLimit = new Ratelimit ( {
13+ redis : kv ,
14+ limiter : Ratelimit . fixedWindow ( 1000000 , '30m' ) ,
15+ prefix : 'ratelimit:tokens:input' ,
16+ } )
17+
18+ const outputTokenRateLimit = new Ratelimit ( {
19+ redis : kv ,
20+ limiter : Ratelimit . fixedWindow ( 10000 , '30m' ) ,
21+ prefix : 'ratelimit:tokens:output' ,
22+ } )
23+
924type Message = {
1025 role : 'user' | 'assistant'
1126 content : string
@@ -22,6 +37,24 @@ const openai = createOpenAI({
2237} )
2338
2439export async function POST ( req : Request ) {
40+ const supabase = createClient ( )
41+
42+ const { data, error } = await supabase . auth . getUser ( )
43+
44+ // We have middleware, so this should never happen (used for type narrowing)
45+ if ( error ) {
46+ return new Response ( 'Unauthorized' , { status : 401 } )
47+ }
48+
49+ const { user } = data
50+
51+ const { remaining : inputRemaining } = await inputTokenRateLimit . getRemaining ( user . id )
52+ const { remaining : outputRemaining } = await outputTokenRateLimit . getRemaining ( user . id )
53+
54+ if ( inputRemaining <= 0 || outputRemaining <= 0 ) {
55+ return new Response ( 'Rate limited' , { status : 429 } )
56+ }
57+
2558 const { messages } : { messages : Message [ ] } = await req . json ( )
2659
2760 // Trim the message context sent to the LLM to mitigate token abuse
@@ -73,6 +106,14 @@ export async function POST(req: Request) {
73106 model : openai ( chatModel ) ,
74107 messages : convertToCoreMessages ( trimmedMessageContext ) ,
75108 tools : convertToCoreTools ( tools ) ,
109+ async onFinish ( { usage } ) {
110+ await inputTokenRateLimit . limit ( user . id , {
111+ rate : usage . promptTokens ,
112+ } )
113+ await outputTokenRateLimit . limit ( user . id , {
114+ rate : usage . completionTokens ,
115+ } )
116+ } ,
76117 } )
77118
78119 return result . toAIStreamResponse ( )
0 commit comments