docker · Priyanshu-sde · Jul 2, 2026 · Jul 3, 2026
@@ -184,7 +184,7 @@
       "properties": {
         "provider": {
           "type": "string",
-          "description": "The underlying provider type. Defaults to \"openai\" when not set. Supported values: openai, anthropic, google, amazon-bedrock, dmr, and any built-in alias (requesty, openrouter, azure, xai, ollama, mistral, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, etc.).",
+          "description": "The underlying provider type. Defaults to \"openai\" when not set. Supported values: openai, anthropic, google, amazon-bedrock, dmr, and any built-in alias (requesty, openrouter, azure, xai, ollama, mistral, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, nvidia, etc.).",
           "examples": [
             "openai",
             "anthropic",

@@ -79,6 +79,7 @@ for details.
 | Mistral             | `mistral`        | Mistral models                       | `MISTRAL_API_KEY`                   |
 | xAI                 | `xai`            | Grok models                          | `XAI_API_KEY`                       |
 | Nebius              | `nebius`         | Open-source and specialised models   | `NEBIUS_API_KEY`                    |
+| NVIDIA NIM          | `nvidia`         | Nemotron, Llama, Qwen, DeepSeek (open models) | `NVIDIA_API_KEY`               |
 | MiniMax             | `minimax`        | MiniMax models                       | `MINIMAX_API_KEY`                   |
 | Baseten             | `baseten`        | DeepSeek, Kimi, GLM, Llama models    | `BASETEN_API_KEY`                   |
 | OVHcloud            | `ovhcloud`       | Qwen, Llama, Mistral, DeepSeek (EU-hosted) | `OVH_AI_ENDPOINTS_ACCESS_TOKEN` |

@@ -17,7 +17,7 @@ models:
     first_available: [list] # Optional: candidate model refs, tried in order by available credentials.
                             # Mutually exclusive with other model settings.
     provider: string # Required unless using first_available. One of: openai, anthropic, google, amazon-bedrock,
-                     # dmr, mistral, xai, nebius, minimax, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, requesty, openrouter,
+                     # dmr, mistral, xai, nebius, nvidia, minimax, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, requesty, openrouter,
                      # azure, ollama, github-copilot, or a named provider defined
                      # under the top-level `providers:` section.
     model: string # Required: model identifier
@@ -48,7 +48,7 @@ models:
 | Property              | Type       | Required | Description                                                                           |
 | --------------------- | ---------- | -------- | ------------------------------------------------------------------------------------- |
 | `first_available`     | array      | ✗        | Candidate model references tried in order; selects the first whose credentials are configured. Mutually exclusive with other model settings. |
-| `provider`            | string     | ✓/✗      | Required for regular model definitions; omitted for `first_available` selectors. Provider: `openai`, `anthropic`, `google`, `amazon-bedrock`, `dmr`, `mistral`, `xai`, `nebius`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `azure`, `ollama`, `github-copilot`, or any [named provider](../../providers/custom/index.md). |
+| `provider`            | string     | ✓/✗      | Required for regular model definitions; omitted for `first_available` selectors. Provider: `openai`, `anthropic`, `google`, `amazon-bedrock`, `dmr`, `mistral`, `xai`, `nebius`, `nvidia`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `azure`, `ollama`, `github-copilot`, or any [named provider](../../providers/custom/index.md). |
 | `model`               | string     | ✓/✗      | Required for regular model definitions; omitted for `first_available` selectors. Model name (e.g., `gpt-4o`, `claude-sonnet-4-5`, `gemini-3.5-flash`) |
 | `temperature`         | float      | ✗        | Sampling randomness. Range is provider-dependent — typically `0.0–2.0` (Anthropic caps at `1.0`). `0.0` is deterministic. |
 | `max_tokens`          | int        | ✗        | Maximum response length in tokens                                                     |
@@ -400,7 +400,7 @@ See the [Anthropic provider page](../../providers/anthropic/index.md#thinking-di
 ## Custom HTTP Headers
 
 For OpenAI-compatible providers (`openai`, `github-copilot`, `mistral`, `xai`,
-`nebius`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `ollama`, and any custom provider using the OpenAI API),
+`nebius`, `nvidia`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `ollama`, and any custom provider using the OpenAI API),
 `provider_opts.http_headers` adds arbitrary HTTP headers to every outgoing
 request:
 

@@ -0,0 +1,114 @@
+---
+title: "NVIDIA NIM"
+description: "Use NVIDIA NIM models with docker-agent."
+keywords: docker agent, ai agents, model providers, llm, nvidia, nim, nemotron
+weight: 290
+---
+
+_Use NVIDIA NIM models with docker-agent._
+
+## Overview
+
+NVIDIA provides access to Nemotron and many other open-weight models through
+[build.nvidia.com](https://build.nvidia.com/) (with a free tier) via an
+OpenAI-compatible API. docker-agent includes built-in support for NVIDIA as an
+alias provider. The same alias also works against a self-hosted
+[NVIDIA NIM](https://docs.nvidia.com/nim/) deployment by overriding `base_url`.
+
+## Setup
+
+1. Get an API key from [build.nvidia.com](https://build.nvidia.com/)
+2. Set the environment variable:
+
+   ```bash
+   export NVIDIA_API_KEY=your-api-key
+   ```
+
+## Usage
+
+### Inline Syntax
+
+The simplest way to use NVIDIA NIM:
+
+```yaml
+agents:
+  root:
+    model: nvidia/nvidia/nemotron-3-super-120b-a12b
+    description: Assistant using NVIDIA NIM
+    instruction: You are a helpful assistant.
+```
+
+### Named Model
+
+For more control over parameters:
+
+```yaml
+models:
+  nemotron:
+    provider: nvidia
+    model: nvidia/nemotron-3-super-120b-a12b
+    temperature: 0.7
+    max_tokens: 8192
+
+agents:
+  root:
+    model: nemotron
+    description: Assistant using NVIDIA NIM
+    instruction: You are a helpful assistant.
+```
+
+## Available Models
+
+NVIDIA NIM hosts Nemotron alongside many other open models (Llama, Qwen,
+DeepSeek, Mistral, ...). Check the [NVIDIA API catalog](https://build.nvidia.com/)
+for the current model list.
+
+| Model                                       | Description                       |
+| -------------------------------------------- | ---------------------------------- |
+| `nvidia/nemotron-3-super-120b-a12b`         | Nemotron 3 Super, reasoning + tool calling |
+| `nvidia/nemotron-3-nano-30b-a3b`            | Nemotron 3 Nano, smaller/faster    |
+| `meta/llama-3.3-70b-instruct`               | Llama 3.3 70B instruction-tuned    |
+| `qwen/qwen3-coder-480b-a35b-instruct`       | Qwen3 Coder, code-focused          |
+
+## On-Prem / Self-Hosted NIM
+
+For self-hosted NIM deployments, point `base_url` at your own endpoint instead
+of the hosted `integrate.api.nvidia.com` API:
+
+```yaml
+models:
+  local_nim:
+    provider: nvidia
+    model: meta/llama-3.3-70b-instruct
+    base_url: http://localhost:8000/v1
+```
+
+## How It Works
+
+NVIDIA is implemented as a built-in alias in docker-agent:
+
+- **API Type:** OpenAI-compatible (`openai_chatcompletions`)
+- **Base URL:** `https://integrate.api.nvidia.com/v1`
+- **Token Variable:** `NVIDIA_API_KEY`
+
+Because NIM fronts open-weight models whose chat templates often only accept
+a single system message, docker-agent coalesces the agent instruction and any
+toolset instructions into one leading system message before sending the
+request.
+
+## Example: Code Assistant
+
+```yaml
+agents:
+  coder:
+    model: nvidia/nvidia/nemotron-3-super-120b-a12b
+    description: Code assistant using Nemotron
+    instruction: |
+      You are an expert programmer using NVIDIA Nemotron.
+      Write clean, well-documented code.
+      Follow best practices for the language being used.
+    toolsets:
+      - type: filesystem
+      - type: shell
+      - type: think
+```
@@ -40,6 +40,7 @@ docker-agent also includes built-in aliases for these providers:
 | Mistral        | `mistral`        | `MISTRAL_API_KEY`                   |
 | xAI (Grok)     | `xai`            | `XAI_API_KEY`                       |
 | Nebius         | `nebius`         | `NEBIUS_API_KEY`                    |
+| NVIDIA NIM     | `nvidia`         | `NVIDIA_API_KEY`                    |
 | MiniMax        | `minimax`        | `MINIMAX_API_KEY`                   |
 | Baseten        | `baseten`        | `BASETEN_API_KEY`                   |
 | OVHcloud       | `ovhcloud`       | `OVH_AI_ENDPOINTS_ACCESS_TOKEN`     |

@@ -0,0 +1,17 @@
+# yaml-language-server: $schema=../agent-schema.json
+
+models:
+  nemotron:
+    provider: nvidia
+    model: nvidia/nemotron-3-super-120b-a12b
+
+agents:
+  root:
+    model: nemotron
+    description: Assistant using NVIDIA NIM
+    instruction: |
+      You are a helpful assistant.
+    toolsets:
+      - type: filesystem
+      - type: shell
+      - type: think
@@ -52,6 +52,11 @@ var Aliases = map[string]Alias{
 		BaseURL:     "https://api.studio.nebius.com/v1",
 		TokenEnvVar: "NEBIUS_API_KEY",
 	},
+	"nvidia": {
+		APIType:     "openai",
+		BaseURL:     "https://integrate.api.nvidia.com/v1",
+		TokenEnvVar: "NVIDIA_API_KEY",
+	},
 	"openrouter": {
 		APIType:     "openai",
 		BaseURL:     "https://openrouter.ai/api/v1",

@@ -46,6 +46,7 @@ func TestCatalogAliases(t *testing.T) {
 		"together":    {APIType: "openai", BaseURL: "https://api.together.xyz/v1", TokenEnvVar: "TOGETHER_API_KEY"},
 		"huggingface": {APIType: "openai", BaseURL: "https://router.huggingface.co/v1", TokenEnvVar: "HF_TOKEN"},
 		"moonshot":    {APIType: "openai", BaseURL: "https://api.moonshot.ai/v1", TokenEnvVar: "MOONSHOT_API_KEY"},
+		"nvidia":      {APIType: "openai", BaseURL: "https://integrate.api.nvidia.com/v1", TokenEnvVar: "NVIDIA_API_KEY"},
 		"vercel":      {APIType: "openai", BaseURL: "https://ai-gateway.vercel.sh/v1", TokenEnvVar: "AI_GATEWAY_API_KEY"},
 	}
 

@@ -212,6 +212,7 @@ var openModelHostProviders = map[string]bool{
 	"ovhcloud":    true,
 	"openrouter":  true,
 	"nebius":      true,
+	"nvidia":      true,
 	"cerebras":    true,
 	"fireworks":   true,
 	"together":    true,

@@ -88,6 +88,17 @@ var openAIAliasProviders = []openAIAliasProvider{
 		greeting:             "Hello from Hugging Face",
 		mergesSystemMessages: true,
 	},
+	{
+		// NVIDIA NIM fronts open-weight models (Nemotron, Llama, Qwen, ...),
+		// so its per-source system messages are coalesced like the other
+		// open-model hosts.
+		provider:             "nvidia",
+		envVar:               "NVIDIA_API_KEY",
+		testKey:              "nvapi-test-nvidia-key",
+		model:                "nvidia/llama-3.1-nemotron-70b-instruct",
+		greeting:             "Hello from NVIDIA NIM",
+		mergesSystemMessages: true,
+	},
 	{
 		// Moonshot AI is a first-party API serving its own Kimi lineup, so its
 		// per-source system messages are left untouched (mergesSystemMessages