diff --git a/cecli/args.py b/cecli/args.py
index eb05b51231e..83286782492 100644
--- a/cecli/args.py
+++ b/cecli/args.py
@@ -276,7 +276,10 @@ def get_parser(default_config_files, git_root):
     group.add_argument(
         "--retries",
         metavar="RETRIES_JSON",
-        help="Specify LLM retry configuration as a JSON string",
+        help=(
+            'Specify LLM retry configuration as a JSON/YAML string (e.g., \'{"retry_on_empty": '
+            "true}')"
+        ),
         default=None,
     )
 
diff --git a/cecli/args_formatter.py b/cecli/args_formatter.py
index 01b9bc94094..aaa9463c3b3 100644
--- a/cecli/args_formatter.py
+++ b/cecli/args_formatter.py
@@ -132,6 +132,16 @@ def _format_action(self, action):
                 break
         switch = switch.lstrip("-")
 
+        if switch == "retries":
+            parts.append(f"## {action.help}")
+            parts.append("#retries:")
+            parts.append("#  retry-timeout: 60")
+            parts.append("#  retry-backoff-factor: 2.0")
+            parts.append("#  retry-on-unavailable: true")
+            parts.append("#  retry-on-empty: false")
+            parts.append("")
+            return "\n".join(parts)
+
         if isinstance(action, argparse._StoreTrueAction):
             default = False
         elif isinstance(action, argparse._StoreConstAction):
diff --git a/cecli/coders/base_coder.py b/cecli/coders/base_coder.py
index c28dc866cc6..7372cca1277 100755
--- a/cecli/coders/base_coder.py
+++ b/cecli/coders/base_coder.py
@@ -91,6 +91,10 @@ class FinishReasonLength(Exception):
     pass
 
 
+class EmptyResponseError(Exception):
+    pass
+
+
 def wrap_fence(name):
     return f"<{name}>", f"</{name}>"
 
@@ -2399,9 +2403,43 @@ async def format_in_executor():
         try:
             while True:
                 try:
+                    self.empty_response = False
                     async for chunk in self.send(messages, tools=self.get_tool_list()):
                         yield chunk
                     break
+                except EmptyResponseError:
+                    self.io.tool_warning(self.empty_llm_tool_warning())
+
+                    retry_on_empty = False
+                    retries_config = self.get_active_model().retries
+                    if isinstance(retries_config, str):
+                        try:
+                            retries_config = json.loads(retries_config)
+                        except json.JSONDecodeError:
+                            self.io.tool_warning(
+                                f"Could not parse retries config: {retries_config}"
+                            )
+                            retries_config = {}
+                    if isinstance(retries_config, dict):
+                        retry_on_empty = retries_config.get("retry_on_empty", False)
+
+                    if not retry_on_empty:
+                        break
+
+                    retry_delay *= 2
+                    if retry_delay > RETRY_TIMEOUT:
+                        self.io.tool_error("Retry timeout exceeded on empty response.")
+                        break
+
+                    self.io.tool_output(f"Retrying in {retry_delay:.1f} seconds...")
+
+                    _res, interrupted_sleep = await coroutines.interruptible(
+                        asyncio.sleep(retry_delay), self.interrupt_event
+                    )
+                    if interrupted_sleep:
+                        interrupted = True
+                        break
+                    continue
                 except litellm_ex.exceptions_tuple() as err:
                     ex_info = litellm_ex.get_ex_info(err)
 
@@ -3302,6 +3340,9 @@ async def send(self, messages, model=None, functions=None, tools=None):
             else:
                 await self.show_send_output(completion)
 
+            if self.empty_response:
+                raise EmptyResponseError
+
             response, func_err, content_err = self.consolidate_chunks()
 
             if response:
@@ -3382,7 +3423,8 @@ async def show_send_output(self, completion):
             and not len(self.partial_response_tool_calls)
             and not len(self.partial_response_reasoning_content)
         ):
-            self.io.tool_warning(self.empty_llm_tool_warning())
+            self.empty_response = True
+            return
 
         self.io.assistant_output(show_resp, pretty=self.show_pretty())
 
@@ -3539,7 +3581,8 @@ async def show_send_output_stream(self, completion):
             return
 
         if not received_content and len(self.partial_response_tool_calls) == 0:
-            self.io.tool_warning(self.empty_llm_tool_warning())
+            self.empty_response = True
+            return
 
     def consolidate_chunks(self):
         if self.partial_response_consolidated: