WEIFENG2333
diff --git a/‎app/components/WhisperCppSettingWidget.py‎
Lines changed: 1 addition & 3 deletions b/‎app/components/WhisperCppSettingWidget.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎app/core/asr/base.py‎
Lines changed: 13 additions & 11 deletions b/‎app/core/asr/base.py‎
Lines changed: 13 additions & 11 deletions
diff --git a/‎app/core/asr/bcut.py‎
Lines changed: 2 additions & 2 deletions b/‎app/core/asr/bcut.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎app/core/asr/faster_whisper.py‎
Lines changed: 12 additions & 12 deletions b/‎app/core/asr/faster_whisper.py‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎app/core/asr/jianying.py‎
Lines changed: 6 additions & 6 deletions b/‎app/core/asr/jianying.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎app/core/asr/whisper_api.py‎
Lines changed: 4 additions & 4 deletions b/‎app/core/asr/whisper_api.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎app/core/asr/whisper_cpp.py‎
Lines changed: 14 additions & 13 deletions b/‎app/core/asr/whisper_cpp.py‎
Lines changed: 14 additions & 13 deletions
diff --git a/‎resource/translations/VideoCaptioner_en_US.ts‎
Lines changed: 1 addition & 1 deletion b/‎resource/translations/VideoCaptioner_en_US.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎resource/translations/VideoCaptioner_zh_CN.ts‎
Lines changed: 1 addition & 1 deletion b/‎resource/translations/VideoCaptioner_zh_CN.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎resource/translations/VideoCaptioner_zh_HK.ts‎
Lines changed: 1 addition & 1 deletion b/‎resource/translations/VideoCaptioner_zh_HK.ts‎
Lines changed: 1 addition & 1 deletion
@@ -527,9 +527,7 @@ def setup_ui(self):
         self.container.setStyleSheet("QWidget{background: transparent}")
         self.containerLayout = QVBoxLayout(self.container)
 
-        self.setting_group = SettingCardGroup(
-            self.tr("Whisper CPP 设置（不稳定 🤔）"), self
-        )
+        self.setting_group = SettingCardGroup(self.tr("Whisper CPP 设置"), self)
 
         # 模型选择
         self.model_card = ComboBoxSettingCard(
 
@@ -36,18 +36,18 @@ class BaseASR:
 
     def __init__(
         self,
-        audio_path: Optional[Union[str, bytes]] = None,
+        audio_input: Optional[Union[str, bytes]] = None,
         use_cache: bool = False,
         need_word_time_stamp: bool = False,
     ):
         """Initialize ASR with audio data.
 
         Args:
-            audio_path: Path to audio file or raw audio bytes
+            audio_input: Path to audio file or raw audio bytes
             use_cache: Whether to cache recognition results
             need_word_time_stamp: Whether to return word-level timestamps
         """
-        self.audio_path = audio_path
+        self.audio_input = audio_input
         self.file_binary = None
         self.use_cache = use_cache
         self._set_data()
@@ -56,25 +56,27 @@ def __init__(
 
     def _set_data(self):
         """Load audio data and compute CRC32 hash for cache key."""
-        if isinstance(self.audio_path, bytes):
-            self.file_binary = self.audio_path
-        elif isinstance(self.audio_path, str):
-            ext = self.audio_path.split(".")[-1].lower()
+        if isinstance(self.audio_input, bytes):
+            self.file_binary = self.audio_input
+        elif isinstance(self.audio_input, str):
+            ext = self.audio_input.split(".")[-1].lower()
             assert (
                 ext in self.SUPPORTED_SOUND_FORMAT
             ), f"Unsupported sound format: {ext}"
-            assert os.path.exists(self.audio_path), f"File not found: {self.audio_path}"
-            with open(self.audio_path, "rb") as f:
+            assert os.path.exists(
+                self.audio_input
+            ), f"File not found: {self.audio_input}"
+            with open(self.audio_input, "rb") as f:
                 self.file_binary = f.read()
         else:
-            raise ValueError("audio_path must be provided as string or bytes")
+            raise ValueError("audio_input must be provided as string or bytes")
         crc32_value = zlib.crc32(self.file_binary) & 0xFFFFFFFF
         self.crc32_hex = format(crc32_value, "08x")
 
     def _get_audio_duration(self) -> float:
         """Get audio duration in seconds using pydub."""
         if not self.file_binary:
-            return 0.0
+            return 0.01
         try:
             audio = AudioSegment.from_file(BytesIO(self.file_binary))
             return audio.duration_seconds
 
@@ -30,11 +30,11 @@ class BcutASR(BaseASR):
 
     def __init__(
         self,
-        audio_path: Union[str, bytes],
+        audio_input: Union[str, bytes],
         use_cache: bool = True,
         need_word_time_stamp: bool = False,
     ):
-        super().__init__(audio_path, use_cache=use_cache)
+        super().__init__(audio_input, use_cache=use_cache)
         self.session = requests.Session()
         self.task_id: Optional[str] = None
         self.__etags: List[str] = []
 
@@ -5,7 +5,7 @@
 import subprocess
 import tempfile
 from pathlib import Path
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, List, Optional, Union
 
 import GPUtil
 
@@ -27,7 +27,7 @@ class FasterWhisperASR(BaseASR):
 
     def __init__(
         self,
-        audio_path: str,
+        audio_input: Union[str, bytes],
         faster_whisper_program: str,
         whisper_model: str,
         model_dir: str,
@@ -52,7 +52,7 @@ def __init__(
         max_comma_cent: int = 50,
         prompt: Optional[str] = None,
     ):
-        super().__init__(audio_path, use_cache)
+        super().__init__(audio_input, use_cache)
 
         # 基本参数
         self.model_path = whisper_model
@@ -112,7 +112,7 @@ def __init__(
                 )
             self.faster_whisper_program = "faster-whisper-xxl"
 
-    def _build_command(self, audio_path: str) -> List[str]:
+    def _build_command(self, audio_input: str) -> List[str]:
         """Build command line arguments for faster-whisper."""
 
         cmd = [
@@ -129,7 +129,7 @@ def _build_command(self, audio_path: str) -> List[str]:
 
         cmd.extend(
             [
-                str(audio_path),
+                str(audio_input),
                 "-l",
                 self.language,
                 "-d",
@@ -204,7 +204,7 @@ def _build_command(self, audio_path: str) -> List[str]:
 
     def _make_segments(self, resp_data: str) -> List[ASRDataSeg]:
         asr_data = ASRData.from_srt(resp_data)
-        
+
         # 幻觉文本关键词列表
         hallucination_keywords = [
             "请不吝点赞 订阅 转发",
@@ -214,17 +214,17 @@ def _make_segments(self, resp_data: str) -> List[ASRDataSeg]:
         filtered_segments = []
         for seg in asr_data.segments:
             text = seg.text.strip()
-            
+
             # 跳过音乐标记
             if text.startswith(("【", "[", "(", "（")):
                 continue
-            
+
             # 跳过包含幻觉关键词的文本
             if any(keyword in text for keyword in hallucination_keywords):
                 continue
-            
+
             filtered_segments.append(seg)
-        
+
         return filtered_segments
 
     def _run(
@@ -241,8 +241,8 @@ def _default_callback(x, y):
             wav_path = temp_dir / "audio.wav"
             output_path = wav_path.with_suffix(".srt")
 
-            if isinstance(self.audio_path, str):
-                shutil.copy2(self.audio_path, wav_path)
+            if isinstance(self.audio_input, str):
+                shutil.copy2(self.audio_input, wav_path)
             else:
                 if self.file_binary:
                     wav_path.write_bytes(self.file_binary)
 
@@ -24,14 +24,14 @@ class JianYingASR(BaseASR):
 
     def __init__(
         self,
-        audio_path: Union[str, bytes],
+        audio_input: Union[str, bytes],
         use_cache: bool = False,
         need_word_time_stamp: bool = False,
         start_time: float = 0,
         end_time: float = 6000,
     ):
-        super().__init__(audio_path, use_cache)
-        self.audio_path = audio_path
+        super().__init__(audio_input, use_cache)
+        self.audio_input = audio_input
         self.end_time = end_time
         self.start_time = start_time
 
@@ -221,10 +221,10 @@ def _upload_sign(self):
 
     def _upload_auth(self):
         """Get upload authorization"""
-        if isinstance(self.audio_path, bytes):
-            file_size = len(self.audio_path)
+        if isinstance(self.audio_input, bytes):
+            file_size = len(self.audio_input)
         else:
-            file_size = os.path.getsize(self.audio_path)
+            file_size = os.path.getsize(self.audio_input)
         request_parameters = f"Action=ApplyUploadInner&FileSize={file_size}&FileType=object&IsInner=1&SpaceName=lv-mac-recognition&Version=2020-11-19&s=5y0udbjapi"
 
         t = datetime.datetime.utcnow()
 
@@ -1,4 +1,4 @@
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, List, Optional, Union
 
 from openai import OpenAI
 
@@ -19,7 +19,7 @@ class WhisperAPI(BaseASR):
 
     def __init__(
         self,
-        audio_path: str,
+        audio_input: Union[str, bytes],
         whisper_model: str,
         need_word_time_stamp: bool = False,
         language: str = "zh",
@@ -31,7 +31,7 @@ def __init__(
         """Initialize Whisper API.
 
         Args:
-            audio_path: Path to audio file
+            audio_input: Path to audio file or raw audio bytes
             whisper_model: Model name
             need_word_time_stamp: Return word-level timestamps
             language: Language code (default: zh)
@@ -40,7 +40,7 @@ def __init__(
             api_key: API key
             use_cache: Enable caching
         """
-        super().__init__(audio_path, use_cache)
+        super().__init__(audio_input, use_cache)
 
         self.base_url = normalize_base_url(base_url)
         self.api_key = api_key.strip()
 
@@ -5,7 +5,7 @@
 import tempfile
 import time
 from pathlib import Path
-from typing import Any, Callable, List, Optional
+from typing import Any, Callable, List, Optional, Union
 
 from ...config import MODEL_PATH
 from ..utils.logger import setup_logger
@@ -25,16 +25,20 @@ class WhisperCppASR(BaseASR):
 
     def __init__(
         self,
-        audio_path,
+        audio_input: Union[str, bytes],
         language="en",
         whisper_cpp_path=None,
         whisper_model=None,
         use_cache: bool = False,
         need_word_time_stamp: bool = False,
     ):
-        super().__init__(audio_path, use_cache)
-        assert os.path.exists(audio_path), f"Audio file not found: {audio_path}"
-        assert audio_path.endswith(".wav"), f"Audio must be WAV format: {audio_path}"
+        super().__init__(audio_input, use_cache)
+
+        if isinstance(audio_input, str):
+            assert os.path.exists(audio_input), f"Audio file not found: {audio_input}"
+            assert audio_input.endswith(
+                ".wav"
+            ), f"Audio must be WAV format: {audio_input}"
 
         # Auto-detect whisper executable if not provided
         if whisper_cpp_path is None:
@@ -116,13 +120,13 @@ def _default_callback(_progress: int, _message: str) -> None:
 
         with tempfile.TemporaryDirectory() as temp_path:
             temp_dir = Path(temp_path)
-            wav_path = temp_dir / "audio.wav"
+            wav_path = temp_dir / "whisper_cpp_audio.wav"
             output_path = wav_path.with_suffix(".srt")
 
             try:
                 # 复制音频文件
-                if isinstance(self.audio_path, str):
-                    shutil.copy2(self.audio_path, wav_path)
+                if isinstance(self.audio_input, str):
+                    shutil.copy2(self.audio_input, wav_path)
                 else:
                     if self.file_binary:
                         wav_path.write_bytes(self.file_binary)
@@ -136,10 +140,7 @@ def _default_callback(_progress: int, _message: str) -> None:
                 logger.info("Whisper.cpp command: %s", " ".join(whisper_params))
 
                 # Get audio duration
-                if isinstance(self.audio_path, str):
-                    total_duration = self.get_audio_duration(self.audio_path)
-                else:
-                    total_duration = 600
+                total_duration = self.audio_duration
                 logger.info("Audio duration: %d seconds", total_duration)
 
                 # Start process
@@ -272,7 +273,7 @@ def detect_whisper_executable() -> str:
 if __name__ == "__main__":
     # 简短示例
     asr = WhisperCppASR(
-        audio_path="audio.mp3",
+        audio_input="audio.mp3",
         whisper_model="tiny",
         whisper_cpp_path="bin/whisper-cpp.exe",
         language="en",
 
@@ -2499,7 +2499,7 @@ Note: When using small LLM models, it is recommended to keep the script within 1
     <name>WhisperCppSettingWidget</name>
     <message>
         <location filename="../../app/components/WhisperCppSettingWidget.py" line="530" />
-        <source>Whisper CPP 设置（不稳定 🤔）</source>
+        <source>Whisper CPP 设置</source>
         <translation>Whisper CPP Settings (unstable 🤔)</translation>
     </message>
     <message>
 
@@ -2482,7 +2482,7 @@
     <name>WhisperCppSettingWidget</name>
     <message>
         <location filename="../../app/components/WhisperCppSettingWidget.py" line="530"/>
-        <source>Whisper CPP 设置（不稳定 🤔）</source>
+        <source>Whisper CPP 设置</source>
         <translation type="unfinished"></translation>
     </message>
     <message>
 
@@ -2503,7 +2503,7 @@
     <name>WhisperCppSettingWidget</name>
     <message>
         <location filename="../../app/components/WhisperCppSettingWidget.py" line="530"/>
-        <source>Whisper CPP 设置（不稳定 🤔）</source>
+        <source>Whisper CPP 设置</source>
         <translation>Whisper CPP 設置（不穩定 🤔）</translation>
     </message>
     <message>