diff --git a/cecli/args.py b/cecli/args.py index eb05b51231e..83286782492 100644 --- a/cecli/args.py +++ b/cecli/args.py @@ -276,7 +276,10 @@ def get_parser(default_config_files, git_root): group.add_argument( "--retries", metavar="RETRIES_JSON", - help="Specify LLM retry configuration as a JSON string", + help=( + 'Specify LLM retry configuration as a JSON/YAML string (e.g., \'{"retry_on_empty": ' + "true}')" + ), default=None, ) diff --git a/cecli/args_formatter.py b/cecli/args_formatter.py index 01b9bc94094..aaa9463c3b3 100644 --- a/cecli/args_formatter.py +++ b/cecli/args_formatter.py @@ -132,6 +132,16 @@ def _format_action(self, action): break switch = switch.lstrip("-") + if switch == "retries": + parts.append(f"## {action.help}") + parts.append("#retries:") + parts.append("# retry-timeout: 60") + parts.append("# retry-backoff-factor: 2.0") + parts.append("# retry-on-unavailable: true") + parts.append("# retry-on-empty: false") + parts.append("") + return "\n".join(parts) + if isinstance(action, argparse._StoreTrueAction): default = False elif isinstance(action, argparse._StoreConstAction): diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py index c28dc866cc6..7372cca1277 100755 --- a/cecli/coders/base_coder.py +++ b/cecli/coders/base_coder.py @@ -91,6 +91,10 @@ class FinishReasonLength(Exception): pass +class EmptyResponseError(Exception): + pass + + def wrap_fence(name): return f"<{name}>", f"" @@ -2399,9 +2403,43 @@ async def format_in_executor(): try: while True: try: + self.empty_response = False async for chunk in self.send(messages, tools=self.get_tool_list()): yield chunk break + except EmptyResponseError: + self.io.tool_warning(self.empty_llm_tool_warning()) + + retry_on_empty = False + retries_config = self.get_active_model().retries + if isinstance(retries_config, str): + try: + retries_config = json.loads(retries_config) + except json.JSONDecodeError: + self.io.tool_warning( + f"Could not parse retries config: {retries_config}" + ) + retries_config = {} + if isinstance(retries_config, dict): + retry_on_empty = retries_config.get("retry_on_empty", False) + + if not retry_on_empty: + break + + retry_delay *= 2 + if retry_delay > RETRY_TIMEOUT: + self.io.tool_error("Retry timeout exceeded on empty response.") + break + + self.io.tool_output(f"Retrying in {retry_delay:.1f} seconds...") + + _res, interrupted_sleep = await coroutines.interruptible( + asyncio.sleep(retry_delay), self.interrupt_event + ) + if interrupted_sleep: + interrupted = True + break + continue except litellm_ex.exceptions_tuple() as err: ex_info = litellm_ex.get_ex_info(err) @@ -3302,6 +3340,9 @@ async def send(self, messages, model=None, functions=None, tools=None): else: await self.show_send_output(completion) + if self.empty_response: + raise EmptyResponseError + response, func_err, content_err = self.consolidate_chunks() if response: @@ -3382,7 +3423,8 @@ async def show_send_output(self, completion): and not len(self.partial_response_tool_calls) and not len(self.partial_response_reasoning_content) ): - self.io.tool_warning(self.empty_llm_tool_warning()) + self.empty_response = True + return self.io.assistant_output(show_resp, pretty=self.show_pretty()) @@ -3539,7 +3581,8 @@ async def show_send_output_stream(self, completion): return if not received_content and len(self.partial_response_tool_calls) == 0: - self.io.tool_warning(self.empty_llm_tool_warning()) + self.empty_response = True + return def consolidate_chunks(self): if self.partial_response_consolidated: