Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cecli/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,10 @@ def get_parser(default_config_files, git_root):
group.add_argument(
"--retries",
metavar="RETRIES_JSON",
help="Specify LLM retry configuration as a JSON string",
help=(
'Specify LLM retry configuration as a JSON/YAML string (e.g., \'{"retry_on_empty": '
"true}')"
),
default=None,
)

Expand Down
10 changes: 10 additions & 0 deletions cecli/args_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,16 @@ def _format_action(self, action):
break
switch = switch.lstrip("-")

if switch == "retries":
parts.append(f"## {action.help}")
parts.append("#retries:")
parts.append("# retry-timeout: 60")
parts.append("# retry-backoff-factor: 2.0")
parts.append("# retry-on-unavailable: true")
parts.append("# retry-on-empty: false")
parts.append("")
return "\n".join(parts)

if isinstance(action, argparse._StoreTrueAction):
default = False
elif isinstance(action, argparse._StoreConstAction):
Expand Down
47 changes: 45 additions & 2 deletions cecli/coders/base_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ class FinishReasonLength(Exception):
pass


class EmptyResponseError(Exception):
pass


def wrap_fence(name):
return f"<{name}>", f"</{name}>"

Expand Down Expand Up @@ -2399,9 +2403,43 @@ async def format_in_executor():
try:
while True:
try:
self.empty_response = False
async for chunk in self.send(messages, tools=self.get_tool_list()):
yield chunk
break
except EmptyResponseError:
self.io.tool_warning(self.empty_llm_tool_warning())

retry_on_empty = False
retries_config = self.get_active_model().retries
if isinstance(retries_config, str):
try:
retries_config = json.loads(retries_config)
except json.JSONDecodeError:
self.io.tool_warning(
f"Could not parse retries config: {retries_config}"
)
retries_config = {}
if isinstance(retries_config, dict):
retry_on_empty = retries_config.get("retry_on_empty", False)

if not retry_on_empty:
break

retry_delay *= 2
if retry_delay > RETRY_TIMEOUT:
self.io.tool_error("Retry timeout exceeded on empty response.")
break

self.io.tool_output(f"Retrying in {retry_delay:.1f} seconds...")

_res, interrupted_sleep = await coroutines.interruptible(
asyncio.sleep(retry_delay), self.interrupt_event
)
if interrupted_sleep:
interrupted = True
break
continue
except litellm_ex.exceptions_tuple() as err:
ex_info = litellm_ex.get_ex_info(err)

Expand Down Expand Up @@ -3302,6 +3340,9 @@ async def send(self, messages, model=None, functions=None, tools=None):
else:
await self.show_send_output(completion)

if self.empty_response:
raise EmptyResponseError

response, func_err, content_err = self.consolidate_chunks()

if response:
Expand Down Expand Up @@ -3382,7 +3423,8 @@ async def show_send_output(self, completion):
and not len(self.partial_response_tool_calls)
and not len(self.partial_response_reasoning_content)
):
self.io.tool_warning(self.empty_llm_tool_warning())
self.empty_response = True
return

self.io.assistant_output(show_resp, pretty=self.show_pretty())

Expand Down Expand Up @@ -3539,7 +3581,8 @@ async def show_send_output_stream(self, completion):
return

if not received_content and len(self.partial_response_tool_calls) == 0:
self.io.tool_warning(self.empty_llm_tool_warning())
self.empty_response = True
return

def consolidate_chunks(self):
if self.partial_response_consolidated:
Expand Down
Loading