diff --git a/lightllm/server/detokenization/manager.py b/lightllm/server/detokenization/manager.py index 389171ba8a..4161f9d5f4 100644 --- a/lightllm/server/detokenization/manager.py +++ b/lightllm/server/detokenization/manager.py @@ -76,7 +76,10 @@ def handle_loop(self): for _ in range(recv_max_count): recv_obj: GroupReqIndexes = self.zmq_recv_socket.recv_pyobj(zmq.NOBLOCK) assert isinstance(recv_obj, GroupReqIndexes) - self._add_new_group_req_index(recv_obj=recv_obj) + try: + self._add_new_group_req_index(recv_obj=recv_obj) + except Exception: + logger.exception("add new group req index has exception") # 当队列中存在较多的请求时,将一次接受的数量上调 recv_max_count = min(int(recv_max_count * 1.3), 256) diff --git a/lightllm/server/httpserver/manager.py b/lightllm/server/httpserver/manager.py index e28e4c93ad..7ab8731a1c 100644 --- a/lightllm/server/httpserver/manager.py +++ b/lightllm/server/httpserver/manager.py @@ -284,6 +284,14 @@ async def generate( # 用于等待 pd_master 下发的交换信息 nixl_pd_event: asyncio.Event = None, ) -> AsyncGenerator[Tuple[int, str, dict, FinishStatus], None]: + if isinstance(prompt, str): + max_prompt_chars = self.max_req_total_len * 8 + if len(prompt) > max_prompt_chars: + raise ValueError( + f"prompt text length {len(prompt)} exceeds the character limit {max_prompt_chars}, " + f"the request is rejected before tokenization." + ) + start_time = time.time() request_headers = request.headers if request is not None else {} group_request_id = self.alloc_req_id(sampling_params, is_health_req)