diff --git a/agent-schema.json b/agent-schema.json index 1cc25f642..4e2a959f5 100644 --- a/agent-schema.json +++ b/agent-schema.json @@ -184,7 +184,7 @@ "properties": { "provider": { "type": "string", - "description": "The underlying provider type. Defaults to \"openai\" when not set. Supported values: openai, anthropic, google, amazon-bedrock, dmr, and any built-in alias (requesty, openrouter, azure, xai, ollama, mistral, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, etc.).", + "description": "The underlying provider type. Defaults to \"openai\" when not set. Supported values: openai, anthropic, google, amazon-bedrock, dmr, and any built-in alias (requesty, openrouter, azure, xai, ollama, mistral, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, nvidia, etc.).", "examples": [ "openai", "anthropic", diff --git a/docs/concepts/models/index.md b/docs/concepts/models/index.md index 85fb24725..e948ebf7b 100644 --- a/docs/concepts/models/index.md +++ b/docs/concepts/models/index.md @@ -79,6 +79,7 @@ for details. | Mistral | `mistral` | Mistral models | `MISTRAL_API_KEY` | | xAI | `xai` | Grok models | `XAI_API_KEY` | | Nebius | `nebius` | Open-source and specialised models | `NEBIUS_API_KEY` | +| NVIDIA NIM | `nvidia` | Nemotron, Llama, Qwen, DeepSeek (open models) | `NVIDIA_API_KEY` | | MiniMax | `minimax` | MiniMax models | `MINIMAX_API_KEY` | | Baseten | `baseten` | DeepSeek, Kimi, GLM, Llama models | `BASETEN_API_KEY` | | OVHcloud | `ovhcloud` | Qwen, Llama, Mistral, DeepSeek (EU-hosted) | `OVH_AI_ENDPOINTS_ACCESS_TOKEN` | diff --git a/docs/configuration/models/index.md b/docs/configuration/models/index.md index 086a0562c..1c02dbcc7 100644 --- a/docs/configuration/models/index.md +++ b/docs/configuration/models/index.md @@ -17,7 +17,7 @@ models: first_available: [list] # Optional: candidate model refs, tried in order by available credentials. # Mutually exclusive with other model settings. provider: string # Required unless using first_available. One of: openai, anthropic, google, amazon-bedrock, - # dmr, mistral, xai, nebius, minimax, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, requesty, openrouter, + # dmr, mistral, xai, nebius, nvidia, minimax, baseten, ovhcloud, groq, fireworks, deepseek, cerebras, together, huggingface, moonshot, vercel, cloudflare-workers-ai, cloudflare-ai-gateway, requesty, openrouter, # azure, ollama, github-copilot, or a named provider defined # under the top-level `providers:` section. model: string # Required: model identifier @@ -48,7 +48,7 @@ models: | Property | Type | Required | Description | | --------------------- | ---------- | -------- | ------------------------------------------------------------------------------------- | | `first_available` | array | ✗ | Candidate model references tried in order; selects the first whose credentials are configured. Mutually exclusive with other model settings. | -| `provider` | string | ✓/✗ | Required for regular model definitions; omitted for `first_available` selectors. Provider: `openai`, `anthropic`, `google`, `amazon-bedrock`, `dmr`, `mistral`, `xai`, `nebius`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `azure`, `ollama`, `github-copilot`, or any [named provider](../../providers/custom/index.md). | +| `provider` | string | ✓/✗ | Required for regular model definitions; omitted for `first_available` selectors. Provider: `openai`, `anthropic`, `google`, `amazon-bedrock`, `dmr`, `mistral`, `xai`, `nebius`, `nvidia`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `azure`, `ollama`, `github-copilot`, or any [named provider](../../providers/custom/index.md). | | `model` | string | ✓/✗ | Required for regular model definitions; omitted for `first_available` selectors. Model name (e.g., `gpt-4o`, `claude-sonnet-4-5`, `gemini-3.5-flash`) | | `temperature` | float | ✗ | Sampling randomness. Range is provider-dependent — typically `0.0–2.0` (Anthropic caps at `1.0`). `0.0` is deterministic. | | `max_tokens` | int | ✗ | Maximum response length in tokens | @@ -400,7 +400,7 @@ See the [Anthropic provider page](../../providers/anthropic/index.md#thinking-di ## Custom HTTP Headers For OpenAI-compatible providers (`openai`, `github-copilot`, `mistral`, `xai`, -`nebius`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `ollama`, and any custom provider using the OpenAI API), +`nebius`, `nvidia`, `minimax`, `baseten`, `ovhcloud`, `groq`, `fireworks`, `deepseek`, `cerebras`, `together`, `huggingface`, `moonshot`, `vercel`, `cloudflare-workers-ai`, `cloudflare-ai-gateway`, `requesty`, `openrouter`, `ollama`, and any custom provider using the OpenAI API), `provider_opts.http_headers` adds arbitrary HTTP headers to every outgoing request: diff --git a/docs/providers/nvidia/index.md b/docs/providers/nvidia/index.md new file mode 100644 index 000000000..51569fdaf --- /dev/null +++ b/docs/providers/nvidia/index.md @@ -0,0 +1,114 @@ +--- +title: "NVIDIA NIM" +description: "Use NVIDIA NIM models with docker-agent." +keywords: docker agent, ai agents, model providers, llm, nvidia, nim, nemotron +weight: 290 +--- + +_Use NVIDIA NIM models with docker-agent._ + +## Overview + +NVIDIA provides access to Nemotron and many other open-weight models through +[build.nvidia.com](https://build.nvidia.com/) (with a free tier) via an +OpenAI-compatible API. docker-agent includes built-in support for NVIDIA as an +alias provider. The same alias also works against a self-hosted +[NVIDIA NIM](https://docs.nvidia.com/nim/) deployment by overriding `base_url`. + +## Setup + +1. Get an API key from [build.nvidia.com](https://build.nvidia.com/) +2. Set the environment variable: + + ```bash + export NVIDIA_API_KEY=your-api-key + ``` + +## Usage + +### Inline Syntax + +The simplest way to use NVIDIA NIM: + +```yaml +agents: + root: + model: nvidia/nvidia/nemotron-3-super-120b-a12b + description: Assistant using NVIDIA NIM + instruction: You are a helpful assistant. +``` + +### Named Model + +For more control over parameters: + +```yaml +models: + nemotron: + provider: nvidia + model: nvidia/nemotron-3-super-120b-a12b + temperature: 0.7 + max_tokens: 8192 + +agents: + root: + model: nemotron + description: Assistant using NVIDIA NIM + instruction: You are a helpful assistant. +``` + +## Available Models + +NVIDIA NIM hosts Nemotron alongside many other open models (Llama, Qwen, +DeepSeek, Mistral, ...). Check the [NVIDIA API catalog](https://build.nvidia.com/) +for the current model list. + +| Model | Description | +| -------------------------------------------- | ---------------------------------- | +| `nvidia/nemotron-3-super-120b-a12b` | Nemotron 3 Super, reasoning + tool calling | +| `nvidia/nemotron-3-nano-30b-a3b` | Nemotron 3 Nano, smaller/faster | +| `meta/llama-3.3-70b-instruct` | Llama 3.3 70B instruction-tuned | +| `qwen/qwen3-coder-480b-a35b-instruct` | Qwen3 Coder, code-focused | + +## On-Prem / Self-Hosted NIM + +For self-hosted NIM deployments, point `base_url` at your own endpoint instead +of the hosted `integrate.api.nvidia.com` API: + +```yaml +models: + local_nim: + provider: nvidia + model: meta/llama-3.3-70b-instruct + base_url: http://localhost:8000/v1 +``` + +## How It Works + +NVIDIA is implemented as a built-in alias in docker-agent: + +- **API Type:** OpenAI-compatible (`openai_chatcompletions`) +- **Base URL:** `https://integrate.api.nvidia.com/v1` +- **Token Variable:** `NVIDIA_API_KEY` + +Because NIM fronts open-weight models whose chat templates often only accept +a single system message, docker-agent coalesces the agent instruction and any +toolset instructions into one leading system message before sending the +request. + +## Example: Code Assistant + +```yaml +agents: + coder: + model: nvidia/nvidia/nemotron-3-super-120b-a12b + description: Code assistant using Nemotron + instruction: | + You are an expert programmer using NVIDIA Nemotron. + Write clean, well-documented code. + Follow best practices for the language being used. + toolsets: + - type: filesystem + - type: shell + - type: think +``` diff --git a/docs/providers/overview/index.md b/docs/providers/overview/index.md index c3b09ee5d..0df56853d 100644 --- a/docs/providers/overview/index.md +++ b/docs/providers/overview/index.md @@ -40,6 +40,7 @@ docker-agent also includes built-in aliases for these providers: | Mistral | `mistral` | `MISTRAL_API_KEY` | | xAI (Grok) | `xai` | `XAI_API_KEY` | | Nebius | `nebius` | `NEBIUS_API_KEY` | +| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` | | MiniMax | `minimax` | `MINIMAX_API_KEY` | | Baseten | `baseten` | `BASETEN_API_KEY` | | OVHcloud | `ovhcloud` | `OVH_AI_ENDPOINTS_ACCESS_TOKEN` | diff --git a/examples/nvidia.yaml b/examples/nvidia.yaml new file mode 100644 index 000000000..22bf17b92 --- /dev/null +++ b/examples/nvidia.yaml @@ -0,0 +1,17 @@ +# yaml-language-server: $schema=../agent-schema.json + +models: + nemotron: + provider: nvidia + model: nvidia/nemotron-3-super-120b-a12b + +agents: + root: + model: nemotron + description: Assistant using NVIDIA NIM + instruction: | + You are a helpful assistant. + toolsets: + - type: filesystem + - type: shell + - type: think diff --git a/pkg/model/provider/aliases.go b/pkg/model/provider/aliases.go index 3eddaa476..125515eba 100644 --- a/pkg/model/provider/aliases.go +++ b/pkg/model/provider/aliases.go @@ -52,6 +52,11 @@ var Aliases = map[string]Alias{ BaseURL: "https://api.studio.nebius.com/v1", TokenEnvVar: "NEBIUS_API_KEY", }, + "nvidia": { + APIType: "openai", + BaseURL: "https://integrate.api.nvidia.com/v1", + TokenEnvVar: "NVIDIA_API_KEY", + }, "openrouter": { APIType: "openai", BaseURL: "https://openrouter.ai/api/v1", diff --git a/pkg/model/provider/aliases_test.go b/pkg/model/provider/aliases_test.go index f991c2f9f..e0c48a1b5 100644 --- a/pkg/model/provider/aliases_test.go +++ b/pkg/model/provider/aliases_test.go @@ -46,6 +46,7 @@ func TestCatalogAliases(t *testing.T) { "together": {APIType: "openai", BaseURL: "https://api.together.xyz/v1", TokenEnvVar: "TOGETHER_API_KEY"}, "huggingface": {APIType: "openai", BaseURL: "https://router.huggingface.co/v1", TokenEnvVar: "HF_TOKEN"}, "moonshot": {APIType: "openai", BaseURL: "https://api.moonshot.ai/v1", TokenEnvVar: "MOONSHOT_API_KEY"}, + "nvidia": {APIType: "openai", BaseURL: "https://integrate.api.nvidia.com/v1", TokenEnvVar: "NVIDIA_API_KEY"}, "vercel": {APIType: "openai", BaseURL: "https://ai-gateway.vercel.sh/v1", TokenEnvVar: "AI_GATEWAY_API_KEY"}, } diff --git a/pkg/model/provider/openai/client.go b/pkg/model/provider/openai/client.go index 44956f1d1..96763a163 100644 --- a/pkg/model/provider/openai/client.go +++ b/pkg/model/provider/openai/client.go @@ -212,6 +212,7 @@ var openModelHostProviders = map[string]bool{ "ovhcloud": true, "openrouter": true, "nebius": true, + "nvidia": true, "cerebras": true, "fireworks": true, "together": true, diff --git a/pkg/model/provider/openai_alias_providers_test.go b/pkg/model/provider/openai_alias_providers_test.go index 10c26da35..8aedaab24 100644 --- a/pkg/model/provider/openai_alias_providers_test.go +++ b/pkg/model/provider/openai_alias_providers_test.go @@ -88,6 +88,17 @@ var openAIAliasProviders = []openAIAliasProvider{ greeting: "Hello from Hugging Face", mergesSystemMessages: true, }, + { + // NVIDIA NIM fronts open-weight models (Nemotron, Llama, Qwen, ...), + // so its per-source system messages are coalesced like the other + // open-model hosts. + provider: "nvidia", + envVar: "NVIDIA_API_KEY", + testKey: "nvapi-test-nvidia-key", + model: "nvidia/llama-3.1-nemotron-70b-instruct", + greeting: "Hello from NVIDIA NIM", + mergesSystemMessages: true, + }, { // Moonshot AI is a first-party API serving its own Kimi lineup, so its // per-source system messages are left untouched (mergesSystemMessages