diff --git a/cmd/root/flags.go b/cmd/root/flags.go index 5f126b279..d802a69b2 100644 --- a/cmd/root/flags.go +++ b/cmd/root/flags.go @@ -103,6 +103,13 @@ func addGatewayFlags(cmd *cobra.Command, runConfig *config.RuntimeConfig, loadUs env := runConfig.EnvProvider() + // A missing or malformed --env-from-file must abort the run: the flag + // is the documented way to supply credentials, so silently continuing + // without it leads to confusing "env var must be set" errors later. + if err := runConfig.EnvFilesError(); err != nil { + return fmt.Errorf("--env-from-file: %w", err) + } + // Precedence: CLI flag > environment variable > user config if runConfig.ModelsGateway == "" { if gateway, _ := env.Get(ctx, envModelsGateway); gateway != "" { diff --git a/cmd/root/flags_test.go b/cmd/root/flags_test.go index 0f9de6405..09d83d8f6 100644 --- a/cmd/root/flags_test.go +++ b/cmd/root/flags_test.go @@ -2,6 +2,8 @@ package root import ( "context" + "os" + "path/filepath" "testing" "github.com/spf13/cobra" @@ -327,3 +329,59 @@ func TestDefaultModelLogic(t *testing.T) { }) } } + +// A missing or malformed --env-from-file must abort the run instead of being +// logged and skipped: the flag is the documented way to supply credentials +// (issue #3442). +func TestEnvFromFileErrorsAbortPreRun(t *testing.T) { + t.Parallel() + + loadUserConfig := func() (*userconfig.Config, error) { + return &userconfig.Config{}, nil + } + + t.Run("missing file", func(t *testing.T) { + t.Parallel() + + cmd := &cobra.Command{ + SilenceErrors: true, + SilenceUsage: true, + RunE: func(*cobra.Command, []string) error { return nil }, + } + runConfig := config.RuntimeConfig{ + Config: config.Config{EnvFiles: []string{filepath.Join(t.TempDir(), "missing.env")}}, + } + addGatewayFlags(cmd, &runConfig, loadUserConfig) + + cmd.SetArgs(nil) + err := cmd.Execute() + + require.Error(t, err) + assert.Contains(t, err.Error(), "--env-from-file") + assert.Contains(t, err.Error(), "missing.env") + }) + + t.Run("malformed file", func(t *testing.T) { + t.Parallel() + + bad := filepath.Join(t.TempDir(), "bad.env") + require.NoError(t, os.WriteFile(bad, []byte("NOT_A_PAIR\n"), 0o600)) + + cmd := &cobra.Command{ + SilenceErrors: true, + SilenceUsage: true, + RunE: func(*cobra.Command, []string) error { return nil }, + } + runConfig := config.RuntimeConfig{ + Config: config.Config{EnvFiles: []string{bad}}, + } + addGatewayFlags(cmd, &runConfig, loadUserConfig) + + cmd.SetArgs(nil) + err := cmd.Execute() + + require.Error(t, err) + assert.Contains(t, err.Error(), "--env-from-file") + assert.Contains(t, err.Error(), "bad.env") + }) +} diff --git a/pkg/cli/runner.go b/pkg/cli/runner.go index 6ca9de94c..95ca85de9 100644 --- a/pkg/cli/runner.go +++ b/pkg/cli/runner.go @@ -213,8 +213,10 @@ func Run(ctx context.Context, out *Printer, cfg Config, rt runtime.Runtime, sess if strings.Contains(lowerErr, "context cancel") && ctx.Err() != nil { // treat Ctrl+C cancellations as non-errors lastErr = nil } else { + // Not printed here: the error is returned to the command layer, + // which prints it exactly once. Printing here too showed every + // failure twice in --exec runs. lastErr = fmt.Errorf("%s", e.Error) - out.PrintError(lastErr) } case *runtime.MaxIterationsReachedEvent: switch handleMaxIterationsAutoApprove(cfg.AutoApprove, &autoExtensions, e.MaxIterations) { @@ -274,6 +276,9 @@ func Run(ctx context.Context, out *Printer, cfg Config, rt runtime.Runtime, sess if err != nil { return fmt.Errorf("failed to read from stdin: %w", err) } + if strings.TrimSpace(string(buf)) == "" { + return errors.New(`no message received on stdin: with "-" the prompt is read from stdin, e.g. echo "Hello" | docker agent run -`) + } if err := oneLoop(string(buf), os.Stdin); err != nil { return err @@ -291,6 +296,11 @@ func Run(ctx context.Context, out *Printer, cfg Config, rt runtime.Runtime, sess if err != nil { return fmt.Errorf("failed to read from stdin: %w", err) } + // Exiting 0 with no output on empty input (e.g. bare `docker agent` + // in CI) would be a silent no-op; fail with the next steps instead. + if strings.TrimSpace(string(buf)) == "" { + return errors.New("no message provided and stdin is not a terminal; pass a message (docker agent run \"\"), pipe one on stdin, or run from an interactive terminal to use the chat UI") + } if err := oneLoop(string(buf), os.Stdin); err != nil { return err diff --git a/pkg/cli/runner_test.go b/pkg/cli/runner_test.go index 0c246f025..cebc7c83a 100644 --- a/pkg/cli/runner_test.go +++ b/pkg/cli/runner_test.go @@ -4,6 +4,8 @@ import ( "bytes" "context" "errors" + "os" + "strings" "sync" "testing" @@ -481,3 +483,88 @@ func TestPrepareUserMessage_CommandResolution(t *testing.T) { assert.Equal(t, "Fix the file main.go", msg.Message.Content, "Command should be resolved with args") } + +// swapStdin replaces os.Stdin with a pipe carrying the given content for the +// duration of the test. A pipe is never a terminal, so it also exercises the +// non-TTY stdin paths. Tests using it must not run in parallel: os.Stdin is +// process-global state. +func swapStdin(t *testing.T, content string) { + t.Helper() + + r, w, err := os.Pipe() + if err != nil { + t.Fatalf("creating stdin pipe: %v", err) + } + if _, err := w.WriteString(content); err != nil { + t.Fatalf("writing stdin pipe: %v", err) + } + if err := w.Close(); err != nil { + t.Fatalf("closing stdin pipe: %v", err) + } + + old := os.Stdin + os.Stdin = r + t.Cleanup(func() { + os.Stdin = old + _ = r.Close() + }) +} + +// A run that would silently do nothing (no message, empty non-TTY stdin, e.g. +// bare `docker agent` in CI) must fail with an actionable error instead of +// exiting 0 with no output (issue #3442). +func TestRunEmptyStdinNonTTYFails(t *testing.T) { + swapStdin(t, "") + + rt := &mockRuntime{} + var buf bytes.Buffer + sess := session.New() + + err := Run(t.Context(), NewPrinter(&buf), Config{}, rt, sess, nil) + assert.ErrorContains(t, err, "no message provided and stdin is not a terminal") + assert.ErrorContains(t, err, "interactive terminal") +} + +func TestRunEmptyStdinWithDashFails(t *testing.T) { + swapStdin(t, "") + + rt := &mockRuntime{} + var buf bytes.Buffer + sess := session.New() + + err := Run(t.Context(), NewPrinter(&buf), Config{}, rt, sess, []string{"-"}) + assert.ErrorContains(t, err, "no message received on stdin") +} + +func TestRunPipedStdinStillWorks(t *testing.T) { + swapStdin(t, "hello agent\n") + + rt := &mockRuntime{ + events: []runtime.Event{&runtime.AgentChoiceEvent{Content: "4"}}, + } + var buf bytes.Buffer + sess := session.New() + + err := Run(t.Context(), NewPrinter(&buf), Config{}, rt, sess, nil) + assert.NilError(t, err) + assert.Equal(t, len(sess.GetAllMessages()) > 0, true) +} + +// An ErrorEvent must surface exactly once: returned to the command layer +// (which prints it), never also printed by the runner (issue #3442). +func TestErrorEventReturnedNotPrinted(t *testing.T) { + t.Parallel() + + rt := &mockRuntime{ + events: []runtime.Event{runtime.Error("model failed: HTTP 404")}, + } + var buf bytes.Buffer + sess := session.New() + + err := Run(t.Context(), NewPrinter(&buf), Config{}, rt, sess, []string{"hello"}) + assert.ErrorContains(t, err, "model failed: HTTP 404") + + var runtimeErr RuntimeError + assert.Equal(t, errors.As(err, &runtimeErr), true) + assert.Equal(t, strings.Contains(buf.String(), "model failed"), false) +} diff --git a/pkg/config/config.go b/pkg/config/config.go index d569db4b6..e1d4f62e9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -146,7 +146,7 @@ func CheckRequiredEnvVars(ctx context.Context, cfg *latest.Config, modelsGateway } } - missing, err := gatherMissingEnvVars(ctx, cfg, modelsGateway, env) + missing, missingModelCreds, err := gatherMissingEnvVars(ctx, cfg, modelsGateway, env) if err != nil { // If there's a tool preflight error, log it but continue slog.WarnContext(ctx, "Failed to preflight toolset environment variables; continuing", "error", err) @@ -155,7 +155,8 @@ func CheckRequiredEnvVars(ctx context.Context, cfg *latest.Config, modelsGateway // Return error if there are missing environment variables if len(missing) > 0 { return &environment.RequiredEnvError{ - Missing: missing, + Missing: missing, + MissingModelCredentials: missingModelCreds, } } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 0e323b4b2..030950698 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -283,6 +283,7 @@ func TestCheckRequiredEnvVars(t *testing.T) { var reqErr *environment.RequiredEnvError require.ErrorAs(t, err, &reqErr) assert.Equal(t, test.expectedMissing, reqErr.Missing) + assert.True(t, reqErr.MissingModelCredentials, "missing vars are model credentials, so the local-model hint must be offered") } }) } diff --git a/pkg/config/first_available.go b/pkg/config/first_available.go index abc70aea6..4c74df612 100644 --- a/pkg/config/first_available.go +++ b/pkg/config/first_available.go @@ -174,10 +174,15 @@ func (e *firstAvailableMissingEnvError) Error() string { fmt.Fprintln(&msg, "No 'first_available' candidate has credentials configured.") fmt.Fprintln(&msg, "Set the environment variables for at least one candidate:") - for _, candidate := range e.Candidates { + example := "OPENAI_API_KEY" + for i, candidate := range e.Candidates { fmt.Fprintf(&msg, " - %s: %s\n", candidate.Ref, strings.Join(candidate.Missing, ", ")) + if i == 0 && len(candidate.Missing) > 0 { + example = candidate.Missing[0] + } } - fmt.Fprintln(&msg, "\nEither:\n - Set one of those groups of environment variables before running docker agent\n - Run docker agent with --env-from-file\n - Store those secrets using one of the built-in environment variable providers.") + msg.WriteString("\n") + msg.WriteString(environment.SecretSourcesHelp(example)) return msg.String() } diff --git a/pkg/config/first_available_test.go b/pkg/config/first_available_test.go index 409ad0794..685406b73 100644 --- a/pkg/config/first_available_test.go +++ b/pkg/config/first_available_test.go @@ -180,8 +180,9 @@ func TestResolveFirstAvailableModels_NoCandidateAvailable(t *testing.T) { assert.Contains(t, err.Error(), "Set the environment variables for at least one candidate:") assert.Contains(t, err.Error(), " - anthropic/claude-sonnet-4-6: ANTHROPIC_API_KEY") assert.Contains(t, err.Error(), " - openai/gpt-5: OPENAI_API_KEY") - assert.Contains(t, err.Error(), "Set one of those groups of environment variables") - assert.Contains(t, err.Error(), "Run docker agent with --env-from-file") + assert.Contains(t, err.Error(), "export ANTHROPIC_API_KEY=") + assert.Contains(t, err.Error(), "--env-from-file") + assert.Contains(t, err.Error(), environment.SecretsDocsURL) } func TestResolveFirstAvailableModels_InvalidCandidate(t *testing.T) { diff --git a/pkg/config/gather.go b/pkg/config/gather.go index c41d0271f..dd39ae0e3 100644 --- a/pkg/config/gather.go +++ b/pkg/config/gather.go @@ -16,24 +16,26 @@ import ( ) // gatherMissingEnvVars finds out which environment variables are required by the models and tools. -// It returns the missing variables and any non-fatal error encountered during tool discovery. -func gatherMissingEnvVars(ctx context.Context, cfg *latest.Config, modelsGateway string, env environment.Provider) (missing []string, toolErr error) { +// It returns the missing variables, whether any of them is a model-provider +// credential (as opposed to a tool secret), and any non-fatal error +// encountered during tool discovery. +func gatherMissingEnvVars(ctx context.Context, cfg *latest.Config, modelsGateway string, env environment.Provider) (missing []string, missingModelCreds bool, toolErr error) { requiredEnv := map[string]bool{} + modelEnv := map[string]bool{} // Models + var modelNames []string if modelsGateway == "" { - names := GatherEnvVarsForModels(ctx, cfg, env) - for _, e := range names { - requiredEnv[e] = true - } + modelNames = GatherEnvVarsForModels(ctx, cfg, env) } else { // A gateway supplies credentials for routed models, but models that // bypass it dial their provider directly and still need their own // credentials present. - names := gatherEnvVarsForModels(ctx, cfg, env, true) - for _, e := range names { - requiredEnv[e] = true - } + modelNames = gatherEnvVarsForModels(ctx, cfg, env, true) + } + for _, e := range modelNames { + requiredEnv[e] = true + modelEnv[e] = true } // Tools @@ -52,10 +54,11 @@ func gatherMissingEnvVars(ctx context.Context, cfg *latest.Config, modelsGateway for _, e := range sortedKeys(requiredEnv) { if v, _ := env.Get(ctx, e); v == "" { missing = append(missing, e) + missingModelCreds = missingModelCreds || modelEnv[e] } } - return missing, toolErr + return missing, missingModelCreds, toolErr } func GatherEnvVarsForModels(ctx context.Context, cfg *latest.Config, env environment.Provider) []string { diff --git a/pkg/config/runtime.go b/pkg/config/runtime.go index 7bf2e0f8b..bbc7f21a0 100644 --- a/pkg/config/runtime.go +++ b/pkg/config/runtime.go @@ -19,6 +19,7 @@ type RuntimeConfig struct { EnvProviderForTests environment.Provider envProviderCached environment.Provider envProviderOnce sync.Once + envFilesErr error ModelsDevStoreOverride *modelsdev.Store modelsDevStore *modelsdev.Store @@ -82,6 +83,7 @@ func (runConfig *RuntimeConfig) Clone() *RuntimeConfig { Config: runConfig.Config, EnvProviderForTests: runConfig.EnvProviderForTests, envProviderCached: env, + envFilesErr: runConfig.envFilesErr, ModelsDevStoreOverride: runConfig.ModelsDevStoreOverride, modelsDevStore: store, modelsDevStoreErr: storeErr, @@ -140,6 +142,15 @@ func (runConfig *RuntimeConfig) EnvProvider() environment.Provider { return runConfig.envProviderCached } +// EnvFilesError reports a failure to resolve, read, or parse the configured +// env files (--env-from-file). The provider chain silently falls back to the +// default sources in that case, so callers that must not run without the +// file's variables (e.g. the CLI) should surface this error and abort. +func (runConfig *RuntimeConfig) EnvFilesError() error { + runConfig.EnvProvider() // materialize the provider chain + return runConfig.envFilesErr +} + func (runConfig *RuntimeConfig) computedEnvProvider() environment.Provider { defaultEnv := environment.NewDefaultProvider() @@ -148,12 +159,14 @@ func (runConfig *RuntimeConfig) computedEnvProvider() environment.Provider { runConfig.EnvFiles, err = environment.AbsolutePaths(runConfig.WorkingDir, runConfig.EnvFiles) if err != nil { slog.Error("Failed to make env file paths absolute", "error", err) + runConfig.envFilesErr = err return defaultEnv } envFilesProviders, err := environment.NewEnvFilesProvider(runConfig.EnvFiles) if err != nil { slog.Error("Failed to read env files", "error", err) + runConfig.envFilesErr = err return defaultEnv } diff --git a/pkg/config/runtime_test.go b/pkg/config/runtime_test.go index b5923c4ff..dcda15a6e 100644 --- a/pkg/config/runtime_test.go +++ b/pkg/config/runtime_test.go @@ -1,9 +1,12 @@ package config import ( + "os" + "path/filepath" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestClone_ChangeWorkingDir(t *testing.T) { @@ -24,3 +27,47 @@ func TestClone_ChangeWorkingDir(t *testing.T) { assert.Equal(t, "/newapp", original.WorkingDir) assert.Equal(t, "/cloneapp", clone.WorkingDir) } + +func TestEnvFilesError(t *testing.T) { + t.Parallel() + + t.Run("missing file", func(t *testing.T) { + t.Parallel() + missing := filepath.Join(t.TempDir(), "missing.env") + rc := &RuntimeConfig{Config: Config{EnvFiles: []string{missing}}} + + err := rc.EnvFilesError() + require.Error(t, err) + assert.Contains(t, err.Error(), "missing.env") + }) + + t.Run("malformed file", func(t *testing.T) { + t.Parallel() + bad := filepath.Join(t.TempDir(), "bad.env") + require.NoError(t, os.WriteFile(bad, []byte("NOT_A_PAIR\n"), 0o600)) + rc := &RuntimeConfig{Config: Config{EnvFiles: []string{bad}}} + + err := rc.EnvFilesError() + require.Error(t, err) + assert.Contains(t, err.Error(), "bad.env") + }) + + t.Run("valid file", func(t *testing.T) { + t.Parallel() + ok := filepath.Join(t.TempDir(), "ok.env") + require.NoError(t, os.WriteFile(ok, []byte("SOME_TEST_ONLY_VAR=some-value\n"), 0o600)) + rc := &RuntimeConfig{Config: Config{EnvFiles: []string{ok}}} + + require.NoError(t, rc.EnvFilesError()) + v, _ := rc.EnvProvider().Get(t.Context(), "SOME_TEST_ONLY_VAR") + assert.Equal(t, "some-value", v) + }) + + t.Run("clone preserves error", func(t *testing.T) { + t.Parallel() + missing := filepath.Join(t.TempDir(), "missing.env") + rc := &RuntimeConfig{Config: Config{EnvFiles: []string{missing}}} + + require.Error(t, rc.Clone().EnvFilesError()) + }) +} diff --git a/pkg/environment/env_files.go b/pkg/environment/env_files.go index 32d58fce0..c625c9eb2 100644 --- a/pkg/environment/env_files.go +++ b/pkg/environment/env_files.go @@ -92,7 +92,7 @@ func ReadEnvFile(absolutePath string) ([]KeyValuePair, error) { k, v, ok := strings.Cut(line, "=") if !ok { - return nil, fmt.Errorf("invalid env file line: %s", line) + return nil, fmt.Errorf("invalid line in env file %s: %q (expected KEY=VALUE)", absolutePath, line) } k = strings.TrimSpace(k) diff --git a/pkg/environment/env_files_test.go b/pkg/environment/env_files_test.go index 3a2ecce6f..d5f2c25c8 100644 --- a/pkg/environment/env_files_test.go +++ b/pkg/environment/env_files_test.go @@ -157,6 +157,9 @@ func TestReadEnvFileInvalid(t *testing.T) { require.Error(t, err) assert.Empty(t, lines) + // The error names the offending file so multi-file runs stay debuggable. + assert.Contains(t, err.Error(), filepath.Join(temp, ".invalid")) + assert.Contains(t, err.Error(), "KEY=VALUE") } func write(t *testing.T, path, content string) { diff --git a/pkg/environment/errors.go b/pkg/environment/errors.go index 3ee7d94ac..c4c05b6e7 100644 --- a/pkg/environment/errors.go +++ b/pkg/environment/errors.go @@ -5,8 +5,17 @@ import ( "strings" ) +// SecretsDocsURL is the documentation page describing every built-in secret +// source and how to configure it. +const SecretsDocsURL = "https://docs.docker.com/ai/docker-agent/guides/secrets/" + type RequiredEnvError struct { Missing []string + + // MissingModelCredentials reports whether at least one missing variable is + // a model-provider credential, in which case the error also suggests + // running a local model instead of configuring an API key. + MissingModelCredentials bool } var _ error = &RequiredEnvError{} @@ -18,7 +27,34 @@ func (e *RequiredEnvError) Error() string { for _, v := range e.Missing { fmt.Fprintf(&msg, " - %s\n", v) } - fmt.Fprintln(&msg, "\nEither:\n - Set those environment variables before running docker agent\n - Run docker agent with --env-from-file\n - Store those secrets using one of the built-in environment variable providers.") + + example := "OPENAI_API_KEY" + if len(e.Missing) > 0 { + example = e.Missing[0] + } + msg.WriteString("\n") + msg.WriteString(SecretSourcesHelp(example)) + + if e.MissingModelCredentials { + msg.WriteString("\nNo API key? Run a local model instead: docker agent run --model dmr/ai/qwen3 ...\nModels already pulled in Docker Model Runner are detected automatically by the default `auto` model.\n(`docker agent models` lists the models available to you)\n") + } return msg.String() } + +// SecretSourcesHelp returns guidance naming every built-in secret source with +// a one-line example for the given variable, plus a link to the docs. It is +// shared by the errors that report missing credentials so the advice never +// drifts between them. +func SecretSourcesHelp(exampleVar string) string { + var b strings.Builder + b.WriteString("Provide them using any of these sources:\n") + fmt.Fprintf(&b, " - Shell environment: export %s=\n", exampleVar) + b.WriteString(" - Env file: docker agent run --env-from-file ...\n") + fmt.Fprintf(&b, " - macOS Keychain: security add-generic-password -a \"$USER\" -s %s -w\n", exampleVar) + fmt.Fprintf(&b, " - pass: pass insert %s\n", exampleVar) + b.WriteString(" - Docker Desktop: keys stored in Docker Desktop are picked up when you are signed in\n") + b.WriteString(" - Credential helper: set `credential_helper` in ~/.config/cagent/config.yaml\n") + fmt.Fprintf(&b, "\nSee %s for details.\n", SecretsDocsURL) + return b.String() +} diff --git a/pkg/environment/errors_test.go b/pkg/environment/errors_test.go new file mode 100644 index 000000000..611503691 --- /dev/null +++ b/pkg/environment/errors_test.go @@ -0,0 +1,45 @@ +package environment + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRequiredEnvError_NamesEverySecretSource(t *testing.T) { + t.Parallel() + + err := &RequiredEnvError{Missing: []string{"ANTHROPIC_API_KEY", "GITHUB_PERSONAL_ACCESS_TOKEN"}} + msg := err.Error() + + assert.Contains(t, msg, "environment variables must be set") + assert.Contains(t, msg, " - ANTHROPIC_API_KEY") + assert.Contains(t, msg, " - GITHUB_PERSONAL_ACCESS_TOKEN") + + // Every secret source comes with a concrete example using the first + // missing variable. + assert.Contains(t, msg, "export ANTHROPIC_API_KEY=") + assert.Contains(t, msg, "--env-from-file") + assert.Contains(t, msg, `security add-generic-password -a "$USER" -s ANTHROPIC_API_KEY -w`) + assert.Contains(t, msg, "pass insert ANTHROPIC_API_KEY") + assert.Contains(t, msg, "Docker Desktop") + assert.Contains(t, msg, "credential_helper") + assert.Contains(t, msg, SecretsDocsURL) + + // The local-model alternative only applies to model credentials. + assert.NotContains(t, msg, "dmr/ai/qwen3") +} + +func TestRequiredEnvError_SuggestsLocalModelForModelCredentials(t *testing.T) { + t.Parallel() + + err := &RequiredEnvError{ + Missing: []string{"OPENAI_API_KEY"}, + MissingModelCredentials: true, + } + msg := err.Error() + + assert.Contains(t, msg, "--model dmr/ai/qwen3") + assert.Contains(t, msg, "Models already pulled in Docker Model Runner are detected automatically") + assert.Contains(t, msg, "docker agent models") +} diff --git a/pkg/model/provider/dmr/available.go b/pkg/model/provider/dmr/available.go new file mode 100644 index 000000000..16bf0f566 --- /dev/null +++ b/pkg/model/provider/dmr/available.go @@ -0,0 +1,81 @@ +package dmr + +import ( + "context" + "fmt" + "net/http" + "slices" + "strings" +) + +// errIndicatesNotInstalled reports whether a `docker model` invocation failed +// because the Docker installation predates Model Runner (the CLI rejects the +// --json flag). Matching on content rather than the exact message keeps the +// detection stable across docker CLI usage-text changes. +func errIndicatesNotInstalled(err error) bool { + return err != nil && strings.Contains(err.Error(), "unknown flag: --json") +} + +// ModelNotAvailableError is returned at client-creation time when the +// requested model is not pulled in Docker Model Runner and the `docker model` +// CLI could not be used to offer a pull. Failing here replaces the raw HTTP +// 404 the chat endpoint would otherwise return at message time. +type ModelNotAvailableError struct { + Model string +} + +func (e *ModelNotAvailableError) Error() string { + return fmt.Sprintf("model %s is not available in Docker Model Runner\n\nTo resolve this, you can:\n - Pull it first: docker model pull %s\n - Or choose a model that is already available (see `docker model ls` or `docker agent models`)", e.Model, e.Model) +} + +// ModelPullErrorSummary is the concise one-liner used when this error is +// nested as the cause of another error (e.g. config.AutoModelFallbackError), +// so the full multi-line guidance is not duplicated. +func (e *ModelNotAvailableError) ModelPullErrorSummary() string { + return fmt.Sprintf("model %s is not pulled in Docker Model Runner", e.Model) +} + +// checkModelAvailable verifies through the DMR HTTP API that a model is +// pulled locally. It is the fallback used when `docker model status` fails +// (missing or broken CLI plugin) and the CLI-based inspect/pull flow is +// therefore unusable. +func checkModelAvailable(ctx context.Context, httpClient *http.Client, baseURL, model string) error { + available, err := listModelsAt(ctx, httpClient, baseURL) + if err != nil { + return fmt.Errorf("cannot query Docker Model Runner at %s (is it installed and running? https://docs.docker.com/ai/model-runner/get-started/): %w", baseURL, err) + } + if !modelAvailable(available, model) { + return &ModelNotAvailableError{Model: model} + } + return nil +} + +// modelAvailable reports whether model matches one of the locally-available +// IDs. An untagged model (e.g. "ai/qwen3") matches any tag of the same +// repository, mirroring how `docker model` resolves references; a model with +// an explicit tag requires an exact match. +func modelAvailable(available []string, model string) bool { + if slices.Contains(available, model) { + return true + } + if modelRepo(model) != model { + return false + } + for _, id := range available { + if modelRepo(id) == model { + return true + } + } + return false +} + +// modelRepo returns the repository portion of a DMR model ID, dropping a +// trailing ":" suffix. A trailing colon is only treated as a tag +// separator when the suffix has no slash, so a registry host:port like +// "registry:5000/ai/x" is preserved. +func modelRepo(id string) string { + if i := strings.LastIndex(id, ":"); i >= 0 && !strings.Contains(id[i+1:], "/") { + return id[:i] + } + return id +} diff --git a/pkg/model/provider/dmr/available_test.go b/pkg/model/provider/dmr/available_test.go new file mode 100644 index 000000000..72c4094e4 --- /dev/null +++ b/pkg/model/provider/dmr/available_test.go @@ -0,0 +1,98 @@ +package dmr + +import ( + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestErrIndicatesNotInstalled(t *testing.T) { + t.Parallel() + + assert.False(t, errIndicatesNotInstalled(nil)) + assert.False(t, errIndicatesNotInstalled(errors.New("dial tcp: connection refused"))) + + // The exact message the docker CLI printed when the test suite was + // written, and a variant with different usage text around it: detection + // must not depend on the full message staying byte-identical. + assert.True(t, errIndicatesNotInstalled(errors.New("unknown flag: --json\n\nUsage: docker [OPTIONS] COMMAND [ARG...]\n\nRun 'docker --help' for more information"))) + assert.True(t, errIndicatesNotInstalled(errors.New("some prefix\nunknown flag: --json\nsome other usage text"))) +} + +func TestModelAvailable(t *testing.T) { + t.Parallel() + + available := []string{"ai/embeddinggemma:latest", "ai/qwen3:latest", "registry:5000/ai/foo:latest"} + + tests := []struct { + model string + want bool + }{ + {"ai/qwen3:latest", true}, + {"ai/qwen3", true}, // untagged matches any tag of the repository + {"ai/qwen3:Q4_K_M", false}, + {"ai/other", false}, + {"registry:5000/ai/foo", true}, // host:port colon is not a tag separator + {"", false}, + } + + for _, tt := range tests { + t.Run(tt.model, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, modelAvailable(available, tt.model)) + }) + } + + assert.False(t, modelAvailable(nil, "ai/qwen3")) +} + +func TestCheckModelAvailable(t *testing.T) { + t.Parallel() + + newServer := func(body string) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, "/models", r.URL.Path) + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(body)) + })) + } + + t.Run("model pulled", func(t *testing.T) { + t.Parallel() + server := newServer(`{"data":[{"id":"ai/qwen3:latest"}]}`) + defer server.Close() + + err := checkModelAvailable(t.Context(), server.Client(), server.URL+"/", "ai/qwen3") + require.NoError(t, err) + }) + + t.Run("model not pulled", func(t *testing.T) { + t.Parallel() + server := newServer(`{"data":[{"id":"ai/gemma3:latest"}]}`) + defer server.Close() + + err := checkModelAvailable(t.Context(), server.Client(), server.URL+"/", "ai/qwen3") + require.Error(t, err) + + var notAvailable *ModelNotAvailableError + require.ErrorAs(t, err, ¬Available) + assert.Equal(t, "ai/qwen3", notAvailable.Model) + assert.Contains(t, err.Error(), "not available in Docker Model Runner") + assert.Contains(t, err.Error(), "docker model pull ai/qwen3") + }) + + t.Run("endpoint unreachable", func(t *testing.T) { + t.Parallel() + server := newServer(`{"data":[]}`) + server.Close() // unreachable from the start + + err := checkModelAvailable(t.Context(), &http.Client{}, server.URL+"/", "ai/qwen3") + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot query Docker Model Runner") + assert.Contains(t, err.Error(), "https://docs.docker.com/ai/model-runner/get-started/") + }) +} diff --git a/pkg/model/provider/dmr/client.go b/pkg/model/provider/dmr/client.go index 3dbe1c443..dfdcee95b 100644 --- a/pkg/model/provider/dmr/client.go +++ b/pkg/model/provider/dmr/client.go @@ -82,21 +82,27 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt // Skip docker model status query when BaseURL is explicitly provided. // This avoids unnecessary exec calls and speeds up tests/CI scenarios. var endpoint, engine string + verifyViaAPI := false if cfg.BaseURL == "" && os.Getenv("MODEL_RUNNER_HOST") == "" { var err error endpoint, engine, err = getDockerModelEndpointAndEngine(ctx) - if err != nil { - if err.Error() == "unknown flag: --json\n\nUsage: docker [OPTIONS] COMMAND [ARG...]\n\nRun 'docker --help' for more information" { - slog.DebugContext(ctx, "docker model status query failed", "error", err) - return nil, ErrNotInstalled - } - slog.ErrorContext(ctx, "docker model status query failed", "error", err) - } else { + switch { + case err == nil: // Auto-pull the model if needed if err := pullDockerModelIfNeeded(ctx, cfg.Model); err != nil { slog.DebugContext(ctx, "docker model pull failed", "error", err) return nil, err } + case errIndicatesNotInstalled(err): + slog.DebugContext(ctx, "docker model status query failed", "error", err) + return nil, ErrNotInstalled + default: + // The `docker model` CLI is unusable (broken plugin, docker not on + // PATH, ...) but the DMR endpoint may still be up: check model + // availability through the HTTP API below so a missing model fails + // here instead of as a raw HTTP 404 at message time. + slog.ErrorContext(ctx, "docker model status query failed", "error", err) + verifyViaAPI = true } } @@ -107,6 +113,12 @@ func NewClient(ctx context.Context, cfg *latest.ModelConfig, opts ...options.Opt httpClient = &http.Client{} } + if verifyViaAPI { + if err := checkModelAvailable(ctx, httpClient, baseURL, cfg.Model); err != nil { + return nil, err + } + } + clientOptions = append(clientOptions, option.WithBaseURL(baseURL), option.WithAPIKey("")) // DMR doesn't need auth parsed, err := parseDMRProviderOpts(engine, cfg) diff --git a/pkg/model/provider/dmr/client_test.go b/pkg/model/provider/dmr/client_test.go index f250177e5..fac6e223e 100644 --- a/pkg/model/provider/dmr/client_test.go +++ b/pkg/model/provider/dmr/client_test.go @@ -58,6 +58,34 @@ func TestNewClientReturnsErrNotInstalledWhenDockerModelUnsupported(t *testing.T) require.ErrorIs(t, err, ErrNotInstalled) } +// The "not installed" detection must not depend on the docker CLI usage text +// staying byte-identical: any failure mentioning the unknown --json flag means +// the installation predates Model Runner (issue #3442). +func TestNewClientReturnsErrNotInstalledWithVariantUsageText(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Skipping docker CLI shim test on Windows") + } + + tempDir := t.TempDir() + dockerPath := filepath.Join(tempDir, "docker") + script := "#!/bin/sh\n" + + "printf 'unknown flag: --json\\n\\nUsage: docker model COMMAND\\n\\nSee docker model --help\\n' >&2\n" + + "exit 1\n" + require.NoError(t, os.WriteFile(dockerPath, []byte(script), 0o755)) + + t.Setenv("PATH", tempDir+string(os.PathListSeparator)+os.Getenv("PATH")) + t.Setenv("MODEL_RUNNER_HOST", "") + + cfg := &latest.ModelConfig{ + Provider: "dmr", + Model: "ai/qwen3", + } + + _, err := NewClient(t.Context(), cfg) + require.Error(t, err) + require.ErrorIs(t, err, ErrNotInstalled) +} + func TestGetDMRFallbackURLs(t *testing.T) { t.Parallel() diff --git a/pkg/model/provider/dmr/list.go b/pkg/model/provider/dmr/list.go index 7ac652f2b..ae2878449 100644 --- a/pkg/model/provider/dmr/list.go +++ b/pkg/model/provider/dmr/list.go @@ -36,7 +36,7 @@ func ListModels(ctx context.Context) ([]string, error) { // Mirror NewClient: the unknown "--json" flag is the signal that // the Docker installation predates Model Runner, i.e. DMR is not // installed at all. - if strings.Contains(err.Error(), "unknown flag: --json") { + if errIndicatesNotInstalled(err) { return nil, ErrNotInstalled } // Otherwise the docker CLI plugin may simply be unavailable while diff --git a/pkg/teamloader/teamloader.go b/pkg/teamloader/teamloader.go index 83e8dd2ea..b3b9a8a6a 100644 --- a/pkg/teamloader/teamloader.go +++ b/pkg/teamloader/teamloader.go @@ -266,11 +266,12 @@ func LoadWithConfig(ctx context.Context, agentSource config.Source, runConfig *c } else { models, err := getModelsForAgent(ctx, cfg, &agentConfig, autoModel, dmrFallbackSelectors, runConfig, loadOpts.providerRegistry) if err != nil { - // Return auto model fallback errors, DMR not installed errors, and - // DMR pull failures directly without wrapping to provide cleaner, - // actionable messages. + // Return auto model fallback errors, DMR not installed errors, + // DMR pull failures, and DMR model-not-available errors directly + // without wrapping to provide cleaner, actionable messages. _, isPull := errors.AsType[*dmr.PullFailedError](err) - if _, ok := errors.AsType[*config.AutoModelFallbackError](err); ok || errors.Is(err, dmr.ErrNotInstalled) || isPull { + _, isNotAvailable := errors.AsType[*dmr.ModelNotAvailableError](err) + if _, ok := errors.AsType[*config.AutoModelFallbackError](err); ok || errors.Is(err, dmr.ErrNotInstalled) || isPull || isNotAvailable { return nil, err } return nil, fmt.Errorf("failed to get models: %w", err)