diff --git a/cmd/root/a2a.go b/cmd/root/a2a.go index f9fa46c2bd..6f7436d5a7 100644 --- a/cmd/root/a2a.go +++ b/cmd/root/a2a.go @@ -1,14 +1,11 @@ package root import ( - "path/filepath" - "github.com/spf13/cobra" "github.com/docker/docker-agent/pkg/a2a" "github.com/docker/docker-agent/pkg/cli" "github.com/docker/docker-agent/pkg/config" - "github.com/docker/docker-agent/pkg/paths" "github.com/docker/docker-agent/pkg/telemetry" ) @@ -34,7 +31,7 @@ func newA2ACmd() *cobra.Command { cmd.PersistentFlags().StringVarP(&flags.agentName, "agent", "a", "", "Name of the agent to run (defaults to the team's first agent)") cmd.PersistentFlags().StringVarP(&flags.listenAddr, "listen", "l", "127.0.0.1:8082", "Address to listen on") - cmd.PersistentFlags().StringVarP(&flags.sessionDB, "session-db", "s", filepath.Join(paths.GetHomeDir(), ".cagent", "session.db"), "Path to the session database") + cmd.PersistentFlags().StringVarP(&flags.sessionDB, "session-db", "s", "", "Path to the session database (default: /session.db)") addRuntimeConfigFlags(cmd, &flags.runConfig) return cmd @@ -57,5 +54,5 @@ func (f *a2aFlags) runA2ACommand(cmd *cobra.Command, args []string) (commandErr defer cleanup() out.Println("Listening on", ln.Addr().String()) - return a2a.Run(ctx, agentFilename, f.agentName, f.sessionDB, &f.runConfig, ln) + return a2a.Run(ctx, agentFilename, f.agentName, defaultSessionDB(f.sessionDB), &f.runConfig, ln) } diff --git a/cmd/root/acp.go b/cmd/root/acp.go index 0044fe1da8..321abd9bfd 100644 --- a/cmd/root/acp.go +++ b/cmd/root/acp.go @@ -1,14 +1,11 @@ package root import ( - "path/filepath" - "github.com/spf13/cobra" "github.com/docker/docker-agent/pkg/acp" "github.com/docker/docker-agent/pkg/config" pathx "github.com/docker/docker-agent/pkg/path" - "github.com/docker/docker-agent/pkg/paths" "github.com/docker/docker-agent/pkg/telemetry" ) @@ -31,7 +28,7 @@ func newACPCmd() *cobra.Command { RunE: flags.runACPCommand, } - cmd.Flags().StringVarP(&flags.sessionDB, "session-db", "s", filepath.Join(paths.GetHomeDir(), ".cagent", "session.db"), "Path to the session database") + cmd.Flags().StringVarP(&flags.sessionDB, "session-db", "s", "", "Path to the session database (default: /session.db)") addRuntimeConfigFlags(cmd, &flags.runConfig) return cmd @@ -47,7 +44,7 @@ func (f *acpFlags) runACPCommand(cmd *cobra.Command, args []string) (commandErr agentFilename := args[0] // Expand tilde in session database path - sessionDB, err := pathx.ExpandHomeDir(f.sessionDB) + sessionDB, err := pathx.ExpandHomeDir(defaultSessionDB(f.sessionDB)) if err != nil { return err } diff --git a/cmd/root/payload.go b/cmd/root/payload.go index d6ca936e06..de2c562dba 100644 --- a/cmd/root/payload.go +++ b/cmd/root/payload.go @@ -22,7 +22,7 @@ func (f *runExecFlags) createSessionRequest(workingDir string) runtime.CreateSes AgentName: f.agentName, ToolsApproved: f.autoApprove, HideToolResults: f.hideToolResults, - SessionDB: f.sessionDB, + SessionDB: defaultSessionDB(f.sessionDB), ResumeSessionID: f.sessionID, SnapshotsEnabled: f.snapshotsEnabled, GlobalPermissions: f.globalPermissions, diff --git a/cmd/root/root.go b/cmd/root/root.go index 7302ca5ce6..ad45870879 100644 --- a/cmd/root/root.go +++ b/cmd/root/root.go @@ -58,6 +58,10 @@ func NewRootCmd() *cobra.Command { paths.SetDataDir(dir) } + // Relocate legacy ~/.cagent and ~/.config/cagent to the + // XDG/native dirs. After the overrides above so they're honoured. + paths.MigrateLegacy() + // Set the version for automatic telemetry initialization telemetry.SetGlobalTelemetryVersion(version.Version) @@ -143,10 +147,10 @@ We collect anonymous usage data to help improve docker agent. To disable: // Add persistent debug flag available to all commands cmd.PersistentFlags().BoolVarP(&flags.debugMode, "debug", "d", false, "Enable debug logging") cmd.PersistentFlags().BoolVarP(&flags.enableOtel, "otel", "o", false, "Enable OpenTelemetry tracing") - cmd.PersistentFlags().StringVar(&flags.logFilePath, "log-file", "", "Path to debug log file (default: ~/.cagent/cagent.debug.log; only used with --debug)") - cmd.PersistentFlags().StringVar(&flags.cacheDir, "cache-dir", "", "Override the cache directory (default: ~/Library/Caches/cagent on macOS)") - cmd.PersistentFlags().StringVar(&flags.configDir, "config-dir", "", "Override the config directory (default: ~/.config/cagent)") - cmd.PersistentFlags().StringVar(&flags.dataDir, "data-dir", "", "Override the data directory (default: ~/.cagent)") + cmd.PersistentFlags().StringVar(&flags.logFilePath, "log-file", "", "Path to debug log file (default: /cagent.debug.log; only used with --debug)") + cmd.PersistentFlags().StringVar(&flags.cacheDir, "cache-dir", "", "Override the cache directory (default: $XDG_CACHE_HOME/cagent, or the OS-native cache dir)") + cmd.PersistentFlags().StringVar(&flags.configDir, "config-dir", "", "Override the config directory (default: $XDG_CONFIG_HOME/cagent, or the OS-native config dir)") + cmd.PersistentFlags().StringVar(&flags.dataDir, "data-dir", "", "Override the data directory (default: $XDG_DATA_HOME/cagent, or the OS-native data dir)") // Define groups cmd.AddGroup( diff --git a/cmd/root/run.go b/cmd/root/run.go index 793cda92cf..c300eb8144 100644 --- a/cmd/root/run.go +++ b/cmd/root/run.go @@ -133,6 +133,16 @@ func newRunCmd() *cobra.Command { return cmd } +// defaultSessionDB returns the explicit --session-db value, or +// /session.db. Resolved at run time (not flag-registration time) so +// --data-dir and [paths.SetRoot] are honoured. +func defaultSessionDB(explicit string) string { + if explicit != "" { + return explicit + } + return filepath.Join(paths.GetDataDir(), "session.db") +} + func addRunOrExecFlags(cmd *cobra.Command, flags *runExecFlags) { cmd.PersistentFlags().StringVarP(&flags.agentName, "agent", "a", "", "Name of the agent to run (defaults to the team's first agent)") cmd.PersistentFlags().BoolVar(&flags.autoApprove, "yolo", false, "Automatically approve all tool calls without prompting") @@ -142,7 +152,7 @@ func addRunOrExecFlags(cmd *cobra.Command, flags *runExecFlags) { cmd.PersistentFlags().StringArrayVar(&flags.modelOverrides, "model", nil, "Override agent model: [agent=]provider/model (repeatable)") cmd.PersistentFlags().BoolVar(&flags.dryRun, "dry-run", false, "Initialize the agent without executing anything") cmd.PersistentFlags().StringVar(&flags.remoteAddress, "remote", "", "Use remote runtime with specified address") - cmd.PersistentFlags().StringVarP(&flags.sessionDB, "session-db", "s", filepath.Join(paths.GetHomeDir(), ".cagent", "session.db"), "Path to the session database") + cmd.PersistentFlags().StringVarP(&flags.sessionDB, "session-db", "s", "", "Path to the session database (default: /session.db)") cmd.PersistentFlags().StringVar(&flags.sessionID, "session", "", "Continue from a previous session by ID or relative offset (e.g., -1 for last session). An explicit ID that does not exist yet is created with that ID.") cmd.PersistentFlags().StringVar(&flags.fakeResponses, "fake", "", "Replay AI responses from cassette file (for testing)") cmd.PersistentFlags().IntVar(&flags.fakeStreamDelay, "fake-stream", 0, "Simulate streaming with delay in ms between chunks (default 15ms if no value given)") diff --git a/cmd/root/sandbox.go b/cmd/root/sandbox.go index 286ee221cd..5688c02842 100644 --- a/cmd/root/sandbox.go +++ b/cmd/root/sandbox.go @@ -260,6 +260,10 @@ func dockerAgentArgs(cmd *cobra.Command, args []string, configDir string) []stri "sbx": true, "config-dir": true, "no-kit": true, + // Only the config dir is bind-mounted into the sandbox. Forwarding a + // host data/cache path would point the inner agent at a missing mount. + "data-dir": true, + "cache-dir": true, } var dockerAgentArgs []string diff --git a/cmd/root/session_db_test.go b/cmd/root/session_db_test.go new file mode 100644 index 0000000000..ae8bc6c0c4 --- /dev/null +++ b/cmd/root/session_db_test.go @@ -0,0 +1,26 @@ +package root + +import ( + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/docker/docker-agent/pkg/paths" +) + +// TestDefaultSessionDB checks the session DB path is resolved lazily from the +// data dir so --data-dir and SetRoot are honoured (regression: the old flag +// default baked ~/.cagent/session.db at registration time, splitting sessions). +func TestDefaultSessionDB(t *testing.T) { + // Mutates the process-global data-dir override; cannot run in parallel. + dataDir := t.TempDir() + paths.SetDataDir(dataDir) + t.Cleanup(func() { paths.SetDataDir("") }) + + assert.Equal(t, filepath.Join(dataDir, "session.db"), defaultSessionDB(""), + "empty value must resolve to /session.db after an override") + + assert.Equal(t, "/explicit/path.db", defaultSessionDB("/explicit/path.db"), + "an explicit --session-db value must be returned unchanged") +} diff --git a/docs/community/opentelemetry/index.md b/docs/community/opentelemetry/index.md index 49eb98d2ff..6f259c9d1f 100644 --- a/docs/community/opentelemetry/index.md +++ b/docs/community/opentelemetry/index.md @@ -98,7 +98,7 @@ docker agent run agent.yaml --otel ## Inspecting traces locally -Use `--debug` to print telemetry activity to the debug log (`~/.cagent/cagent.debug.log` by default) without standing up a backend: +Use `--debug` to print telemetry activity to the debug log (`/cagent.debug.log` by default) without standing up a backend: ```bash docker agent run agent.yaml --otel --debug diff --git a/docs/community/troubleshooting/index.md b/docs/community/troubleshooting/index.md index 680a0c92df..1beebb7a90 100644 --- a/docs/community/troubleshooting/index.md +++ b/docs/community/troubleshooting/index.md @@ -52,7 +52,7 @@ agents: The first step for any issue is enabling debug logging. This provides detailed information about what docker-agent is doing internally. ```bash -# Enable debug logging (writes to ~/.cagent/cagent.debug.log) +# Enable debug logging (writes to /cagent.debug.log) $ docker agent run config.yaml --debug # Write debug logs to a custom file @@ -69,6 +69,10 @@ $ docker agent run config.yaml --otel +### Where docker-agent stores files + +State moved out of `~/.cagent` (data) into the XDG data directory (`~/.local/share/cagent` on Linux; the OS-native data dir on macOS and Windows). On first run docker-agent automatically relocates existing `~/.cagent` and `~/.config/cagent` contents to the new locations. The `--data-dir`, `--config-dir`, and `--cache-dir` flags and the `XDG_*` environment variables override these defaults. + ## Agent Not Responding ### API keys not set diff --git a/docs/configuration/overview/index.md b/docs/configuration/overview/index.md index 788b809132..9aa3d3df70 100644 --- a/docs/configuration/overview/index.md +++ b/docs/configuration/overview/index.md @@ -193,7 +193,7 @@ API keys and secrets are read from environment variables — never stored in con | Variable | Description | | --------------------- | --------------------------------------------------------------- | | `DOCKER_AGENT_AUTO_INSTALL` | Set to `false` to disable automatic tool installation | -| `DOCKER_AGENT_TOOLS_DIR` | Override the base directory for installed tools (default: `~/.cagent/tools/`) | +| `DOCKER_AGENT_TOOLS_DIR` | Override the base directory for installed tools (default: `/tools/`) | **Runtime overrides:** diff --git a/docs/configuration/tools/index.md b/docs/configuration/tools/index.md index c9bf99ee10..dea7dea733 100644 --- a/docs/configuration/tools/index.md +++ b/docs/configuration/tools/index.md @@ -136,7 +136,7 @@ When configuring MCP or LSP tools that require a binary command, docker agent ca ### How It Works 1. When a toolset with a `command` is loaded, docker agent checks if the command is available in your `PATH` -2. If not found, it checks the docker agent tools directory (`~/.cagent/tools/bin/`) +2. If not found, it checks the docker agent tools directory (`~/.local/share/cagent/tools/bin/`) 3. If still not found, it looks up the command in the aqua registry and installs it automatically ### Explicit Package Reference @@ -202,10 +202,10 @@ export DOCKER_AGENT_AUTO_INSTALL=false | Variable | Default | Description | | ---------------------------- | ------------------ | ------------------------------------------------ | | `DOCKER_AGENT_AUTO_INSTALL` | (enabled) | Set to `false` to disable all auto-installation | -| `DOCKER_AGENT_TOOLS_DIR` | `~/.cagent/tools/` | Base directory for installed tools | +| `DOCKER_AGENT_TOOLS_DIR` | `/tools/` | Base directory for installed tools | | `GITHUB_TOKEN` | — | GitHub token to raise API rate limits (optional) | -Installed binaries are placed in `~/.cagent/tools/bin/` and cached so they are only downloaded once. +Installed binaries are placed in `~/.local/share/cagent/tools/bin/` and cached so they are only downloaded once.
Tip diff --git a/docs/features/acp/index.md b/docs/features/acp/index.md index fd80e5aa46..816945ba05 100644 --- a/docs/features/acp/index.md +++ b/docs/features/acp/index.md @@ -69,7 +69,7 @@ docker agent serve acp | [flags] | Flag | Default | Description | | --------------------------------- | ---------------------- | -------------------------------------------------------------------------------------------------------------------- | -| `-s, --session-db ` | `~/.cagent/session.db` | Path to the SQLite session database. | +| `-s, --session-db ` | `/session.db` | Path to the SQLite session database. | | `--working-dir ` | current dir | Working directory the agent runs in. | | `--env-from-file ` | (none) | Load additional environment variables from a `.env` file (repeatable). | | `--models-gateway ` | (none) | Route all provider traffic through a models gateway URL. | diff --git a/docs/features/cli/index.md b/docs/features/cli/index.md index 3eb5386617..1a51a1dbe4 100644 --- a/docs/features/cli/index.md +++ b/docs/features/cli/index.md @@ -31,7 +31,7 @@ $ docker agent run [config] [message...] [flags] | `--yolo` | Auto-approve all tool calls | | `--model ` | Override model(s). Use `provider/model` for all agents, or `agent=provider/model` for specific agents. Comma-separate multiple overrides. | | `--session ` | Resume a previous session. Supports relative refs (`-1` = last, `-2` = second to last). An explicit ID that does not exist yet is created with that ID, so a supervisor can own the session ID upfront and reuse it across runs. | -| `-s, --session-db ` | Path to the SQLite session database (default: `~/.cagent/session.db`) | +| `-s, --session-db ` | Path to the SQLite session database (default: `/session.db`) | | `--session-read-only` | Open the TUI in read-only mode: conversation history is displayed but no new messages can be sent to the LLM. Cannot be used with `--exec`. | | `--prompt-file ` | Include file contents as additional system context (repeatable) | | `--attach ` | Attach an image file to the initial message | @@ -294,7 +294,7 @@ $ docker agent serve acp [flags] | Flag | Default | Description | | ------------------------- | --------------------------- | ------------------------------------------------- | -| `-s, --session-db ` | `~/.cagent/session.db` | Path to the SQLite session database. | +| `-s, --session-db ` | `/session.db` | Path to the SQLite session database. | All [runtime configuration flags](#runtime-configuration-flags) are also accepted. @@ -521,14 +521,16 @@ These flags are available on every `docker agent` command: | Flag | Description | | ------------------------- | -------------------------------------------------------------------------------------- | -| `-d, --debug` | Enable debug logging (default location: `~/.cagent/cagent.debug.log`) | +| `-d, --debug` | Enable debug logging (default location: `/cagent.debug.log`) | | `--log-file ` | Custom debug log location (only used with `--debug`) | | `-o, --otel` | Enable OpenTelemetry observability: traces, metrics, and logs. Requires `OTEL_EXPORTER_OTLP_ENDPOINT` to export to a collector. | | `--cache-dir ` | Override the cache directory (default: `~/Library/Caches/cagent` on macOS) | -| `--config-dir ` | Override the config directory (default: `~/.config/cagent`) | -| `--data-dir ` | Override the data directory (default: `~/.cagent`) | +| `--config-dir ` | Override the config directory (default: `$XDG_CONFIG_HOME/cagent (or OS-native config dir)`) | +| `--data-dir ` | Override the data directory (default: `$XDG_DATA_HOME/cagent (or OS-native data dir)`) | | `--help` | Show help for any command | +docker-agent follows the XDG Base Directory Specification: set `XDG_DATA_HOME`/`XDG_CONFIG_HOME`/`XDG_CACHE_HOME` to relocate state; otherwise the OS-native directories are used. + ### OpenTelemetry environment variables When `--otel` is enabled, the standard [OTel SDK env vars](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/) are honored (`OTEL_EXPORTER_OTLP_ENDPOINT`, `OTEL_RESOURCE_ATTRIBUTES`, etc.). Two additional docker-agent-specific variables control GenAI instrumentation: diff --git a/docs/features/snapshots/index.md b/docs/features/snapshots/index.md index 4562260cdd..459b639445 100644 --- a/docs/features/snapshots/index.md +++ b/docs/features/snapshots/index.md @@ -86,7 +86,7 @@ when snapshots are turned off. - **Shadow repository.** The first time a snapshot is taken for a worktree, docker-agent initializes a separate shadow git directory under the data - directory (`~/.cagent/snapshot/...` by default), keyed by a hash of the + directory (`~/.local/share/cagent/snapshot/...` by default), keyed by a hash of the worktree path. The shadow repo stores tree objects only — it never writes commits and never touches your source repository's `.git`. - **Ignore rules are mirrored.** Before each capture, the source repository's diff --git a/docs/features/tui/index.md b/docs/features/tui/index.md index 3381d74c58..a68966dc95 100644 --- a/docs/features/tui/index.md +++ b/docs/features/tui/index.md @@ -334,10 +334,10 @@ Customize the TUI appearance with built-in or custom themes: ### Custom Themes -Create theme files in `~/.cagent/themes/` as YAML. Theme files are **partial overrides** — you only need to specify the colors you want to change. Any omitted keys fall back to the built-in default theme values. +Create theme files in `~/.local/share/cagent/themes/` as YAML. Theme files are **partial overrides** — you only need to specify the colors you want to change. Any omitted keys fall back to the built-in default theme values. ```yaml -# ~/.cagent/themes/my-theme.yaml +# ~/.local/share/cagent/themes/my-theme.yaml name: "My Custom Theme" colors: @@ -380,7 +380,7 @@ markdown: ```yaml settings: - theme: my-theme # References ~/.cagent/themes/my-theme.yaml + theme: my-theme # References ~/.local/share/cagent/themes/my-theme.yaml ``` **At launch:** Pass `--theme ` to `docker agent run` to preselect a theme for that session. This overrides `settings.theme` in your config but is not saved. Invalid theme names print an error at startup listing the available options. Has no effect in `--exec` mode. @@ -397,7 +397,7 @@ settings:
Partial overrides
-

All user themes are applied on top of the default theme. If you want to customize a built-in theme (e.g., dracula), copy its full YAML from the built-in themes on GitHub into ~/.cagent/themes/ and edit the copy. Otherwise, omitted values will use default colors, not the original theme's colors.

+

All user themes are applied on top of the default theme. If you want to customize a built-in theme (e.g., dracula), copy its full YAML from the built-in themes on GitHub into ~/.local/share/cagent/themes/ and edit the copy. Otherwise, omitted values will use default colors, not the original theme's colors.

diff --git a/docs/guides/tips/index.md b/docs/guides/tips/index.md index 0bc50d3795..5972e584e5 100644 --- a/docs/guides/tips/index.md +++ b/docs/guides/tips/index.md @@ -333,7 +333,7 @@ agents: Use the `--debug` flag to see detailed execution logs: ```bash -# Default log location: ~/.cagent/cagent.debug.log +# Default log location: /cagent.debug.log $ docker agent run agent.yaml --debug # Custom log location diff --git a/docs/tools/memory/index.md b/docs/tools/memory/index.md index b98f5efbda..8e90b596ba 100644 --- a/docs/tools/memory/index.md +++ b/docs/tools/memory/index.md @@ -12,7 +12,7 @@ _Persistent key-value storage backed by SQLite for cross-session recall._ The memory tool provides persistent key-value storage backed by SQLite. Data survives across sessions, allowing agents to remember facts, user preferences, project context, and past decisions. Memories can be organized with categories and searched by keyword. -By default, the database is stored at `~/.cagent/memory//memory.db`, where `` is derived from the loaded configuration (typically the YAML file name) and falls back to `default` when unavailable. When the agent is loaded from an OCI reference (e.g. `docker/my-agent:latest`), characters that are reserved in filesystem paths (such as `:`) are sanitised in the `` segment — the agent's display name elsewhere is unchanged. Agents declared in the same configuration share this database by default; set an explicit `path` per toolset to isolate them. +By default, the database is stored at `~/.local/share/cagent/memory//memory.db`, where `` is derived from the loaded configuration (typically the YAML file name) and falls back to `default` when unavailable. When the agent is loaded from an OCI reference (e.g. `docker/my-agent:latest`), characters that are reserved in filesystem paths (such as `:`) are sanitised in the `` segment — the agent's display name elsewhere is unchanged. Agents declared in the same configuration share this database by default; set an explicit `path` per toolset to isolate them. ## Available Tools @@ -35,7 +35,7 @@ toolsets: | Property | Type | Default | Description | | -------- | ------ | ----------------------------------------- | -------------------------------- | -| `path` | string | `~/.cagent/memory//memory.db` | Path to the SQLite database file | +| `path` | string | `~/.local/share/cagent/memory//memory.db` | Path to the SQLite database file | ### Custom Database Path diff --git a/docs/tools/plan/index.md b/docs/tools/plan/index.md index 32d06feff2..94497b101f 100644 --- a/docs/tools/plan/index.md +++ b/docs/tools/plan/index.md @@ -12,7 +12,7 @@ _Shared persistent scratchpad for multi-agent collaboration._ The plan tool gives agents a shared, persistent scratchpad of named documents. Any agent in a multi-agent config that loads the `plan` toolset can read and write the same plans, and those plans survive across sessions. This makes it straightforward to wire a planner agent that sketches work and one or more executor agents that consume it without any custom tool wiring. -Plans are stored as JSON files in the docker-agent data directory (`~/.cagent/plans/` by default). All agents that share a process serialize on a single mutex so concurrent reads and writes are safe. Writes are atomic (temp file + rename), so a reader never observes partial content. +Plans are stored as JSON files in the docker-agent data directory (`~/.local/share/cagent/plans/` by default). All agents that share a process serialize on a single mutex so concurrent reads and writes are safe. Writes are atomic (temp file + rename), so a reader never observes partial content. ## Configuration diff --git a/docs/tools/session_plan/index.md b/docs/tools/session_plan/index.md index 64f9df6ed6..25bc4f6044 100644 --- a/docs/tools/session_plan/index.md +++ b/docs/tools/session_plan/index.md @@ -17,7 +17,7 @@ Different from the [`plan` toolset](/tools/plan/) — `plan` is for shared, name Plans live as Markdown files under: ``` -~/.cagent/session_plans/.md +~/.local/share/cagent/session_plans/.md ``` The tool surface is three tools: diff --git a/pkg/config/main_test.go b/pkg/config/main_test.go new file mode 100644 index 0000000000..7f9efe2de9 --- /dev/null +++ b/pkg/config/main_test.go @@ -0,0 +1,18 @@ +package config + +import ( + "os" + "testing" + + "github.com/docker/docker-agent/pkg/internal/xdgtest" +) + +// TestMain restores $HOME-based isolation for this package's tests. Several of +// them write user-config aliases (including "default") through userconfig while +// isolating only via t.Setenv("HOME", ...); without clearing the XDG variables +// those writes would leak into a shared config dir and pollute other packages' +// test runs (e.g. resolving "default" to an OCI ref). See pkg/internal/xdgtest. +func TestMain(m *testing.M) { + xdgtest.Clear() + os.Exit(m.Run()) +} diff --git a/pkg/content/store.go b/pkg/content/store.go index bd6bdaa9b2..407ec7c236 100644 --- a/pkg/content/store.go +++ b/pkg/content/store.go @@ -17,6 +17,8 @@ import ( "github.com/google/go-containerregistry/pkg/crane" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/tarball" + + "github.com/docker/docker-agent/pkg/paths" ) // ErrInvalidDigest indicates that an identifier shaped like a digest @@ -73,12 +75,7 @@ func NewStore(opts ...Opt) (*Store, error) { } if store.baseDir == "" { - homeDir, err := os.UserHomeDir() - if err != nil { - return nil, fmt.Errorf("getting home directory: %w", err) - } - - store.baseDir = filepath.Join(homeDir, ".cagent", "store") + store.baseDir = filepath.Join(paths.GetDataDir(), "store") } if err := os.MkdirAll(store.baseDir, 0o700); err != nil { diff --git a/pkg/history/history.go b/pkg/history/history.go index bd4b350742..62d81fb968 100644 --- a/pkg/history/history.go +++ b/pkg/history/history.go @@ -11,6 +11,8 @@ import ( "strings" "github.com/docker/portcullis" + + "github.com/docker/docker-agent/pkg/paths" ) // History is the in-memory view of a persistent message history. The cursor @@ -23,18 +25,15 @@ type History struct { current int } -// New loads the history stored under baseDir/.cagent/history. If baseDir is -// empty, the user's home directory is used. -func New(baseDir string) (*History, error) { - if baseDir == "" { - var err error - if baseDir, err = os.UserHomeDir(); err != nil { - return nil, err - } +// New loads the history stored under dir/history. If dir is empty, the +// docker-agent data directory ([paths.GetDataDir]) is used. +func New(dir string) (*History, error) { + if dir == "" { + dir = paths.GetDataDir() } - h := &History{path: filepath.Join(baseDir, ".cagent", "history")} - if err := h.migrateOldHistory(baseDir); err != nil { + h := &History{path: filepath.Join(dir, "history")} + if err := h.migrateOldHistory(dir); err != nil { return nil, err } if err := h.load(); err != nil && !os.IsNotExist(err) { @@ -44,10 +43,11 @@ func New(baseDir string) (*History, error) { return h, nil } -// NewAtDir loads the history stored at dir/history, without the ".cagent" -// path segment New inserts. It is for embedders that keep the agent's -// state under their own directory layout and must not mix prompt history -// with a docker-agent installation on the same machine. +// NewAtDir loads the history stored at dir/history from an explicit directory, +// skipping both the [paths.GetDataDir] default and the legacy history.json +// import that New performs. It is for embedders that keep the agent's state +// under their own directory layout and must not mix prompt history with a +// docker-agent installation on the same machine. func NewAtDir(dir string) (*History, error) { h := &History{path: filepath.Join(dir, "history")} if err := h.load(); err != nil && !os.IsNotExist(err) { @@ -191,8 +191,8 @@ func (h *History) load() error { // migrateOldHistory imports messages from the legacy history.json file (if it // exists) into the new line-oriented format and removes the old file. -func (h *History) migrateOldHistory(baseDir string) error { - oldPath := filepath.Join(baseDir, ".cagent", "history.json") +func (h *History) migrateOldHistory(dir string) error { + oldPath := filepath.Join(dir, "history.json") data, err := os.ReadFile(oldPath) if os.IsNotExist(err) { diff --git a/pkg/history/history_test.go b/pkg/history/history_test.go index d2fbe71343..474982643b 100644 --- a/pkg/history/history_test.go +++ b/pkg/history/history_test.go @@ -146,10 +146,8 @@ func TestHistory_MultilineMessage(t *testing.T) { func TestHistory_MigrateOldFormat(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - err := os.MkdirAll(filepath.Join(tmpDir, ".cagent"), 0o755) - require.NoError(t, err) - oldHistFile := filepath.Join(tmpDir, ".cagent", "history.json") - newHistFile := filepath.Join(tmpDir, ".cagent", "history") + oldHistFile := filepath.Join(tmpDir, "history.json") + newHistFile := filepath.Join(tmpDir, "history") require.NoError(t, os.WriteFile(oldHistFile, []byte(`{"messages":["old1","old2","old3"]}`), 0o644)) @@ -497,7 +495,7 @@ func TestHistory_RedactsOnAdd(t *testing.T) { assert.Contains(t, stored, portcullis.Marker) // On-disk file must also be redacted. - data, err := os.ReadFile(filepath.Join(tmpDir, ".cagent", "history")) + data, err := os.ReadFile(filepath.Join(tmpDir, "history")) require.NoError(t, err) assert.NotContains(t, string(data), pat, "persisted history must not contain the secret") assert.Contains(t, string(data), portcullis.Marker) @@ -506,8 +504,7 @@ func TestHistory_RedactsOnAdd(t *testing.T) { func TestHistory_RedactsOnLoad(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - require.NoError(t, os.MkdirAll(filepath.Join(tmpDir, ".cagent"), 0o700)) - histFile := filepath.Join(tmpDir, ".cagent", "history") + histFile := filepath.Join(tmpDir, "history") pat := portcullistest.FakeGitHubPAT("cxLeRrvbJfmYdUtr70xnNE3Q7Gvli4") // Simulate a pre-existing history file written before redaction was wired in. @@ -524,8 +521,7 @@ func TestHistory_RedactsOnLoad(t *testing.T) { func TestHistory_RedactsOnMigrate(t *testing.T) { t.Parallel() tmpDir := t.TempDir() - require.NoError(t, os.MkdirAll(filepath.Join(tmpDir, ".cagent"), 0o700)) - oldHistFile := filepath.Join(tmpDir, ".cagent", "history.json") + oldHistFile := filepath.Join(tmpDir, "history.json") pat := portcullistest.FakeGitHubPAT("cxLeRrvbJfmYdUtr70xnNE3Q7Gvli4") require.NoError(t, os.WriteFile(oldHistFile, []byte(`{"messages":["leak `+pat+`"]}`), 0o600)) @@ -537,7 +533,7 @@ func TestHistory_RedactsOnMigrate(t *testing.T) { assert.NotContains(t, h.Messages[0], pat, "migrated history must not expose the secret") assert.Contains(t, h.Messages[0], portcullis.Marker) - data, err := os.ReadFile(filepath.Join(tmpDir, ".cagent", "history")) + data, err := os.ReadFile(filepath.Join(tmpDir, "history")) require.NoError(t, err) assert.NotContains(t, string(data), pat, "migrated on-disk history must not contain the secret") } diff --git a/pkg/internal/xdgtest/xdgtest.go b/pkg/internal/xdgtest/xdgtest.go new file mode 100644 index 0000000000..da8761266a --- /dev/null +++ b/pkg/internal/xdgtest/xdgtest.go @@ -0,0 +1,23 @@ +// Package xdgtest neutralises the XDG base-directory environment variables so +// that tests isolating docker-agent state via a temporary $HOME keep working. +// +// Tests point $HOME at a t.TempDir() and expect paths.Get{Config,Data,Cache}Dir +// to resolve under it. That only holds while those getters derive from $HOME; +// they now also honour $XDG_CONFIG_HOME / $XDG_DATA_HOME / $XDG_CACHE_HOME (via +// os.UserConfigDir and friends). On a machine that exports any of them (GitHub +// runners do), config/data/cache resolve to the real, shared directory instead +// of the test's $HOME, so writes leak out (and reads pick up unrelated state) — +// e.g. an alias written by one package's test surfaces in another package's run. +// +// Call Clear from a package's TestMain to restore $HOME-based isolation for the +// whole test binary. +package xdgtest + +import "os" + +// Clear unsets the XDG base-directory variables for the current process. +func Clear() { + for _, v := range []string{"XDG_CONFIG_HOME", "XDG_DATA_HOME", "XDG_CACHE_HOME"} { + _ = os.Unsetenv(v) + } +} diff --git a/pkg/modelsdev/store.go b/pkg/modelsdev/store.go index 433cdaa3fa..14df66acb1 100644 --- a/pkg/modelsdev/store.go +++ b/pkg/modelsdev/store.go @@ -14,6 +14,7 @@ import ( "sync" "time" + "github.com/docker/docker-agent/pkg/paths" "github.com/docker/docker-agent/pkg/remote" ) @@ -101,8 +102,8 @@ func WithKnownProvider(fn func(string) bool) Opt { } // NewStore creates a new Store backed by an on-disk cache. By default the -// cache lives at ~/.cagent/models_dev.json; use WithCache to override the -// location. +// cache lives in the user cache directory (e.g. ~/.cache/cagent/models_dev.json +// on Linux); use WithCache to override the location. // Callers should create one Store and share it rather than calling NewStore // repeatedly. RuntimeConfig.ModelsDevStore() is the standard way to obtain // a shared instance. @@ -114,11 +115,7 @@ func NewStore(opts ...Opt) (*Store, error) { cacheFile := options.cacheFile if cacheFile == "" { - homeDir, err := os.UserHomeDir() - if err != nil { - return nil, fmt.Errorf("failed to get user home directory: %w", err) - } - cacheFile = filepath.Join(homeDir, ".cagent", CacheFileName) + cacheFile = filepath.Join(paths.GetCacheDir(), CacheFileName) } if err := os.MkdirAll(filepath.Dir(cacheFile), 0o700); err != nil { diff --git a/pkg/paths/migrate.go b/pkg/paths/migrate.go new file mode 100644 index 0000000000..118507488b --- /dev/null +++ b/pkg/paths/migrate.go @@ -0,0 +1,300 @@ +package paths + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "io" + "log/slog" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +var migrateOnce sync.Once + +// MigrateLegacy relocates state from the historical layout (~/.cagent for data, +// ~/.config/cagent for config) to the resolved XDG/native directories. It runs +// once per process and is idempotent across runs. Overridden directories +// (via [SetConfigDir]/[SetDataDir]/[SetRoot] or the CLI flags) are skipped, so +// embedders and explicit --data-dir/--config-dir users are untouched. +func MigrateLegacy() { + migrateOnce.Do(func() { + if !configDirOverride.isSet() { + migrateDir(legacyConfigDir(), xdgConfigDir()) + } + if !dataDirOverride.isSet() { + migrateDir(legacyDataDir(), xdgDataDir()) + } + }) +} + +// migrateTmpPrefix is the reserved name prefix for in-progress copies inside +// the destination directory. An entry only appears under its final name once +// fully copied and synced, so a crash can never leave a truncated entry that +// looks migrated. +const migrateTmpPrefix = ".cagent-migrate-" + +// renameEntry is a seam for tests to simulate cross-device rename failures. +var renameEntry = os.Rename + +// migrationLock{Wait,Poll} bound how long a process waits for a concurrent +// migration before skipping this run. Vars so tests can shorten them. +var ( + migrationLockWait = 5 * time.Second + migrationLockPoll = 100 * time.Millisecond +) + +// migrateDir moves src's entries into dst. Entries are moved with os.Rename +// when possible, falling back to a crash-safe copy-then-delete when rename +// fails (typically EXDEV, when src and dst are on different filesystems). +// Existing dst entries are never clobbered (on macOS config and data share one +// dir, so it must merge), and src is removed only once empty. A failed move is +// left in place and dst is removed if it was created but stayed empty, so the +// getters' legacy fallback keeps the data reachable, no loss. +func migrateDir(src, dst string) { + if src == "" || src == dst || !dirExists(src) { + return + } + + unlock, acquired := acquireMigrationLock(dst) + if !acquired { + return + } + defer unlock() + + // Re-check under the lock: a concurrent process may have completed the + // migration while we waited. + if !dirExists(src) { + return + } + + entries, err := os.ReadDir(src) + if err != nil { + slog.Warn("xdg migration: cannot read legacy directory", "dir", src, "error", err) + return + } + + dstExisted := pathExists(dst) + if err := os.MkdirAll(dst, 0o700); err != nil { + slog.Warn("xdg migration: cannot create destination directory", "dir", dst, "error", err) + return + } + removeStaleTmp(dst) + + var moved int + for _, e := range entries { + from := filepath.Join(src, e.Name()) + to := filepath.Join(dst, e.Name()) + if pathExists(to) { + continue + } + if err := renameEntry(from, to); err == nil { + moved++ + continue + } + if err := copyEntry(from, to); err != nil { + if !errors.Is(err, errIrregular) { + slog.Warn("xdg migration: could not move entry, leaving it in place", + "from", from, "to", to, "error", err) + } + continue + } + moved++ + } + + if rem, err := os.ReadDir(src); err == nil && len(rem) == 0 { + _ = os.Remove(src) + } + // If dst was created here but nothing could move into it, remove it (only + // succeeds when empty) so the getters keep resolving the legacy directory + // and existing state stays reachable. + if !dstExisted { + _ = os.Remove(dst) + } + if moved > 0 { + slog.Info("relocated docker-agent state to XDG directory", "from", src, "to", dst, "entries", moved) + } +} + +// acquireMigrationLock serializes concurrent docker-agent processes around the +// migration of dst so entry moves and source deletions cannot interleave. The +// lock file lives in the OS temp dir, keyed by the hashed destination, and is +// deliberately never unlinked: recreating it would let two processes hold +// locks on different inodes of the same path. +// +// The wait is bounded: if another process still holds the lock after +// migrationLockWait, this run skips the migration (acquired=false) and the +// legacy fallback keeps state reachable until the next start retries. If the +// lock file itself cannot be used (e.g. permissions), migration proceeds +// unlocked since skipping would disable it forever. +func acquireMigrationLock(dst string) (unlock func(), acquired bool) { + path := migrationLockPath(dst) + + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + slog.Warn("xdg migration: proceeding without cross-process lock", "path", path, "error", err) + return func() {}, true + } + + deadline := time.Now().Add(migrationLockWait) + for { + locked, err := tryLockExclusive(f) + if err != nil { + _ = f.Close() + slog.Warn("xdg migration: proceeding without cross-process lock", "path", path, "error", err) + return func() {}, true + } + if locked { + return func() { + _ = unlockFile(f) + _ = f.Close() + }, true + } + if time.Now().After(deadline) { + _ = f.Close() + slog.Info("xdg migration: another process is migrating, skipping this run", "path", path) + return nil, false + } + time.Sleep(migrationLockPoll) + } +} + +// migrationLockPath keys the cross-process lock file on the hashed +// destination so concurrent migrations of the same dir share one lock. +func migrationLockPath(dst string) string { + sum := sha256.Sum256([]byte(dst)) + return filepath.Join(os.TempDir(), "cagent-migrate-"+hex.EncodeToString(sum[:8])+".lock") +} + +// removeStaleTmp deletes leftover in-progress copies from a previous run that +// crashed mid-copy. Only migration creates names with migrateTmpPrefix in dst. +func removeStaleTmp(dst string) { + entries, err := os.ReadDir(dst) + if err != nil { + return + } + for _, e := range entries { + if strings.HasPrefix(e.Name(), migrateTmpPrefix) { + _ = os.RemoveAll(filepath.Join(dst, e.Name())) + } + } +} + +// errIrregular marks a top-level socket/FIFO/device that a cross-filesystem +// copy cannot replicate; the entry is deliberately left in place. +var errIrregular = errors.New("irregular file cannot be copied") + +// copyEntry replicates from into to via a temporary sibling so the final name +// only ever appears complete: copy fully (fsynced), rename atomically, flush +// the parent, and only then delete the source. A crash at any point leaves +// either the source intact or both copies, never a partial destination under +// its final name. +func copyEntry(from, to string) error { + info, err := os.Lstat(from) + if err != nil { + return err + } + // A top-level socket/FIFO/device cannot be copied across filesystems; + // leave it in place rather than delete something we could not replicate. + if !info.IsDir() && info.Mode()&os.ModeSymlink == 0 && !info.Mode().IsRegular() { + slog.Warn("xdg migration: leaving irregular file in place", "path", from, "mode", info.Mode().String()) + return errIrregular + } + + tmp := filepath.Join(filepath.Dir(to), migrateTmpPrefix+filepath.Base(to)) + if err := os.RemoveAll(tmp); err != nil { + return err + } + if err := copyTree(from, tmp); err != nil { + _ = os.RemoveAll(tmp) + return err + } + if err := os.Rename(tmp, to); err != nil { + _ = os.RemoveAll(tmp) + return err + } + // Durability barrier: the rename must hit disk before the source goes + // away, or a power loss could drop both copies. + syncDir(filepath.Dir(to)) + if err := os.RemoveAll(from); err != nil { + slog.Warn("xdg migration: entry copied but source could not be removed", "path", from, "error", err) + } + return nil +} + +// copyTree recursively copies a directory tree, preserving permissions and +// symlinks. Irregular files (sockets, FIFOs, devices) cannot be copied and are +// skipped: they are transient rendezvous points their owners recreate. +func copyTree(from, to string) error { + info, err := os.Lstat(from) + if err != nil { + return err + } + switch { + case info.Mode()&os.ModeSymlink != 0: + target, err := os.Readlink(from) + if err != nil { + return err + } + return os.Symlink(target, to) + case info.IsDir(): + if err := os.Mkdir(to, 0o700); err != nil { + return err + } + entries, err := os.ReadDir(from) + if err != nil { + return err + } + for _, e := range entries { + if err := copyTree(filepath.Join(from, e.Name()), filepath.Join(to, e.Name())); err != nil { + return err + } + } + return os.Chmod(to, info.Mode().Perm()) + case info.Mode().IsRegular(): + return copyFile(from, to, info.Mode().Perm()) + default: + slog.Warn("xdg migration: skipping irregular file", "path", from, "mode", info.Mode().String()) + return nil + } +} + +func copyFile(from, to string, perm os.FileMode) error { + in, err := os.Open(from) + if err != nil { + return err + } + defer in.Close() + + out, err := os.OpenFile(to, os.O_WRONLY|os.O_CREATE|os.O_EXCL, perm) + if err != nil { + return err + } + if _, err := io.Copy(out, in); err != nil { + _ = out.Close() + return err + } + // Flush content before the tmp tree can be renamed to its final name, + // otherwise a crash could surface a migrated-looking but truncated file. + if err := out.Sync(); err != nil { + _ = out.Close() + return err + } + if err := out.Close(); err != nil { + return err + } + // Chmod explicitly: the mode passed to OpenFile is filtered by umask. + return os.Chmod(to, perm) +} + +// syncDir flushes directory metadata so completed renames survive a crash. +// Windows cannot sync directory handles; the error is best-effort ignored. +func syncDir(path string) { + if d, err := os.Open(path); err == nil { + _ = d.Sync() + _ = d.Close() + } +} diff --git a/pkg/paths/migrate_internal_test.go b/pkg/paths/migrate_internal_test.go new file mode 100644 index 0000000000..c95c6eb07a --- /dev/null +++ b/pkg/paths/migrate_internal_test.go @@ -0,0 +1,327 @@ +package paths + +import ( + "errors" + "os" + "path/filepath" + "runtime" + "strings" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestXDGHelpersHonourEnv(t *testing.T) { + base := t.TempDir() + cfg := filepath.Join(base, "cfg") + data := filepath.Join(base, "data") + cache := filepath.Join(base, "cache") + t.Setenv("XDG_CONFIG_HOME", cfg) + t.Setenv("XDG_DATA_HOME", data) + t.Setenv("XDG_CACHE_HOME", cache) + + assert.Equal(t, filepath.Join(cfg, "cagent"), xdgConfigDir()) + assert.Equal(t, filepath.Join(data, "cagent"), xdgDataDir()) + assert.Equal(t, filepath.Join(cache, "cagent"), xdgCacheDir()) +} + +// TestNativeDataDirWithoutXDG exercises the OS-native default branch (the +// runtime.GOOS switch) that the XDG-env tests bypass. It pins the concrete +// expected path on the platform the suite runs on. +func TestNativeDataDirWithoutXDG(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("XDG_DATA_HOME", "") // force the native fallback + + var want string + switch runtime.GOOS { + case "darwin": + want = filepath.Join(home, "Library", "Application Support", "cagent") + case "windows": + // On Windows os.UserHomeDir reads USERPROFILE; LocalAppData drives the + // data dir. This branch is documented but only asserted when run there. + t.Skip("native Windows path is environment-specific") + default: + want = filepath.Join(home, ".local", "share", "cagent") + } + assert.Equal(t, want, xdgDataDir()) +} + +func TestMigrateDirMovesAndRemovesEmptiedSource(t *testing.T) { + base := t.TempDir() + src := filepath.Join(base, "src") + dst := filepath.Join(base, "dst") + require.NoError(t, os.MkdirAll(filepath.Join(src, "sub"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "a.txt"), []byte("A"), 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(src, "sub", "x.txt"), []byte("X"), 0o600)) + + migrateDir(src, dst) + + assert.FileExists(t, filepath.Join(dst, "a.txt")) + assert.FileExists(t, filepath.Join(dst, "sub", "x.txt")) + assert.False(t, pathExists(src), "fully drained source dir should be removed") +} + +func TestMigrateDirMergesWithoutClobbering(t *testing.T) { + base := t.TempDir() + src := filepath.Join(base, "src") + dst := filepath.Join(base, "dst") + require.NoError(t, os.MkdirAll(src, 0o755)) + require.NoError(t, os.MkdirAll(dst, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "new.txt"), []byte("from-src"), 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(src, "shared.txt"), []byte("src"), 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(dst, "shared.txt"), []byte("dst"), 0o600)) + + migrateDir(src, dst) + + // New entries move over. + got, err := os.ReadFile(filepath.Join(dst, "new.txt")) + require.NoError(t, err) + assert.Equal(t, "from-src", string(got)) + + // Pre-existing destination entries are never overwritten. + got, err = os.ReadFile(filepath.Join(dst, "shared.txt")) + require.NoError(t, err) + assert.Equal(t, "dst", string(got)) + + // The un-moved colliding entry stays in src, so src is kept (not removed). + assert.FileExists(t, filepath.Join(src, "shared.txt")) + assert.True(t, dirExists(src)) +} + +func TestMigrateDirNoops(t *testing.T) { + base := t.TempDir() + dst := filepath.Join(base, "dst") + + migrateDir("", dst) + migrateDir(filepath.Join(base, "missing"), dst) + migrateDir(dst, dst) + + assert.False(t, pathExists(dst), "no-op migrations must not create the destination") +} + +func TestMigrateLegacyMovesDefaultLayout(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("XDG_DATA_HOME", filepath.Join(home, "data")) + t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, "config")) + + legacyData := filepath.Join(home, ".cagent") + require.NoError(t, os.MkdirAll(legacyData, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(legacyData, "session.db"), []byte("x"), 0o600)) + legacyConfig := filepath.Join(home, ".config", "cagent") + require.NoError(t, os.MkdirAll(legacyConfig, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(legacyConfig, "config.yaml"), []byte("y"), 0o600)) + + resetMigrationState(t) + MigrateLegacy() + + assert.FileExists(t, filepath.Join(home, "data", "cagent", "session.db")) + assert.FileExists(t, filepath.Join(home, "config", "cagent", "config.yaml")) + assert.False(t, pathExists(legacyData), "legacy data dir should be relocated") + assert.False(t, pathExists(legacyConfig), "legacy config dir should be relocated") +} + +// TestMigrateLegacyMergesSharedDestination reproduces the macOS case where the +// config and data directories resolve to the SAME location +// (~/Library/Application Support/cagent). Both legacy dirs must merge into the +// shared destination without either being dropped. +func TestMigrateLegacyMergesSharedDestination(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + // Same base for config and data => xdgConfigDir() == xdgDataDir(). + shared := filepath.Join(home, "shared") + t.Setenv("XDG_CONFIG_HOME", shared) + t.Setenv("XDG_DATA_HOME", shared) + require.Equal(t, xdgConfigDir(), xdgDataDir(), "test precondition: dirs must collide") + + legacyData := filepath.Join(home, ".cagent") + require.NoError(t, os.MkdirAll(legacyData, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(legacyData, "session.db"), []byte("S"), 0o600)) + legacyConfig := filepath.Join(home, ".config", "cagent") + require.NoError(t, os.MkdirAll(legacyConfig, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(legacyConfig, "config.yaml"), []byte("C"), 0o600)) + + resetMigrationState(t) + MigrateLegacy() + + dst := filepath.Join(shared, "cagent") + assert.FileExists(t, filepath.Join(dst, "session.db"), "data file must survive the merge") + assert.FileExists(t, filepath.Join(dst, "config.yaml"), "config file must survive the merge") + assert.False(t, pathExists(legacyData)) + assert.False(t, pathExists(legacyConfig)) +} + +func TestMigrateLegacySkipsOverriddenDirs(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("XDG_DATA_HOME", filepath.Join(home, "data")) + + legacyData := filepath.Join(home, ".cagent") + require.NoError(t, os.MkdirAll(legacyData, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(legacyData, "session.db"), []byte("x"), 0o600)) + + resetMigrationState(t) + dataDirOverride.Set(filepath.Join(home, "override")) + MigrateLegacy() + + // An explicit --data-dir / SetRoot override must leave the legacy dir alone. + assert.FileExists(t, filepath.Join(legacyData, "session.db")) + assert.False(t, pathExists(filepath.Join(home, "data", "cagent"))) +} + +// resetMigrationState clears the once-guard and any directory overrides so a +// test can exercise MigrateLegacy from a clean slate, restoring them after. +func resetMigrationState(t *testing.T) { + t.Helper() + migrateOnce = sync.Once{} + cacheDirOverride.Set("") + configDirOverride.Set("") + dataDirOverride.Set("") + t.Cleanup(func() { + migrateOnce = sync.Once{} + cacheDirOverride.Set("") + configDirOverride.Set("") + dataDirOverride.Set("") + }) +} + +// forceCrossDevice makes every rename fail as it would across filesystems +// (EXDEV), forcing migrateDir onto its copy fallback. +func forceCrossDevice(t *testing.T) { + t.Helper() + renameEntry = func(from, to string) error { + return &os.LinkError{Op: "rename", Old: from, New: to, Err: errors.New("cross-device link")} + } + t.Cleanup(func() { renameEntry = os.Rename }) +} + +func shortenLockTimings(t *testing.T) { + t.Helper() + oldWait, oldPoll := migrationLockWait, migrationLockPoll + migrationLockWait, migrationLockPoll = 50*time.Millisecond, 5*time.Millisecond + t.Cleanup(func() { migrationLockWait, migrationLockPoll = oldWait, oldPoll }) +} + +// TestMigrateDirCopiesAcrossFilesystems covers the case where the legacy dir +// and the XDG dir live on different disks: rename fails with EXDEV and the +// migration must fall back to a full copy that preserves content, permissions +// and symlinks, then drain the source. +func TestMigrateDirCopiesAcrossFilesystems(t *testing.T) { + forceCrossDevice(t) + + base := t.TempDir() + src := filepath.Join(base, "src") + dst := filepath.Join(base, "dst") + require.NoError(t, os.MkdirAll(filepath.Join(src, "sub"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "a.txt"), []byte("A"), 0o640)) + require.NoError(t, os.WriteFile(filepath.Join(src, "sub", "x.txt"), []byte("X"), 0o600)) + if runtime.GOOS != "windows" { + require.NoError(t, os.Symlink("a.txt", filepath.Join(src, "link"))) + } + + migrateDir(src, dst) + + got, err := os.ReadFile(filepath.Join(dst, "a.txt")) + require.NoError(t, err) + assert.Equal(t, "A", string(got)) + assert.FileExists(t, filepath.Join(dst, "sub", "x.txt")) + if runtime.GOOS != "windows" { + target, err := os.Readlink(filepath.Join(dst, "link")) + require.NoError(t, err) + assert.Equal(t, "a.txt", target) + + info, err := os.Stat(filepath.Join(dst, "a.txt")) + require.NoError(t, err) + assert.Equal(t, os.FileMode(0o640), info.Mode().Perm()) + } + assert.False(t, pathExists(src), "fully drained source dir should be removed") + + entries, err := os.ReadDir(dst) + require.NoError(t, err) + for _, e := range entries { + assert.False(t, strings.HasPrefix(e.Name(), migrateTmpPrefix), "no in-progress copy may remain: %s", e.Name()) + } +} + +// TestMigrateDirCleansStaleTmpFromCrashedRun simulates a process killed in the +// middle of a cross-device copy: the leftover partial tmp must be discarded +// and the entry re-copied in full from the still-intact source. +func TestMigrateDirCleansStaleTmpFromCrashedRun(t *testing.T) { + forceCrossDevice(t) + + base := t.TempDir() + src := filepath.Join(base, "src") + dst := filepath.Join(base, "dst") + require.NoError(t, os.MkdirAll(src, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "a.txt"), []byte("full"), 0o600)) + require.NoError(t, os.MkdirAll(dst, 0o700)) + require.NoError(t, os.WriteFile(filepath.Join(dst, migrateTmpPrefix+"a.txt"), []byte("par"), 0o600)) + + migrateDir(src, dst) + + got, err := os.ReadFile(filepath.Join(dst, "a.txt")) + require.NoError(t, err) + assert.Equal(t, "full", string(got)) + assert.False(t, pathExists(filepath.Join(dst, migrateTmpPrefix+"a.txt"))) + assert.False(t, pathExists(src)) +} + +// TestMigrateDirKeepsLegacyReachableWhenNothingMoves is the regression for +// the different-disk trap: rename fails (EXDEV) and the copy fails too. The +// destination created by the migration must not be left behind empty, or the +// getters would resolve to it and the untouched legacy data would become +// invisible. +func TestMigrateDirKeepsLegacyReachableWhenNothingMoves(t *testing.T) { + if runtime.GOOS == "windows" || os.Geteuid() == 0 { + t.Skip("requires non-root unix permission semantics") + } + forceCrossDevice(t) + + base := t.TempDir() + src := filepath.Join(base, "src") + dst := filepath.Join(base, "dst") + require.NoError(t, os.MkdirAll(src, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "a.txt"), []byte("A"), 0o600)) + // Unreadable source file: the copy fallback cannot open it. + require.NoError(t, os.Chmod(filepath.Join(src, "a.txt"), 0o000)) + t.Cleanup(func() { _ = os.Chmod(filepath.Join(src, "a.txt"), 0o600) }) + + migrateDir(src, dst) + + assert.True(t, pathExists(filepath.Join(src, "a.txt")), "failed entry must stay in place") + assert.False(t, pathExists(dst), "empty destination must be removed so the legacy fallback stays active") + assert.Equal(t, src, resolveDefault(dst, src), "getters must keep resolving the legacy dir") +} + +func TestMigrateDirSkipsWhenAnotherProcessHoldsLock(t *testing.T) { + base := t.TempDir() + src := filepath.Join(base, "src") + dst := filepath.Join(base, "dst") + require.NoError(t, os.MkdirAll(src, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(src, "a.txt"), []byte("A"), 0o600)) + + // Hold the lock through a second descriptor, as a concurrent process would. + lockFile, err := os.OpenFile(migrationLockPath(dst), os.O_CREATE|os.O_RDWR, 0o600) + require.NoError(t, err) + locked, err := tryLockExclusive(lockFile) + require.NoError(t, err) + require.True(t, locked) + + shortenLockTimings(t) + migrateDir(src, dst) + + assert.FileExists(t, filepath.Join(src, "a.txt"), "migration must be skipped while the lock is held") + assert.False(t, pathExists(dst)) + + require.NoError(t, unlockFile(lockFile)) + require.NoError(t, lockFile.Close()) + + // The next run picks the migration up. + migrateDir(src, dst) + assert.FileExists(t, filepath.Join(dst, "a.txt")) + assert.False(t, pathExists(src)) +} diff --git a/pkg/paths/migrate_lock_unix.go b/pkg/paths/migrate_lock_unix.go new file mode 100644 index 0000000000..7d2882fdce --- /dev/null +++ b/pkg/paths/migrate_lock_unix.go @@ -0,0 +1,28 @@ +//go:build unix + +package paths + +import ( + "errors" + "os" + + "golang.org/x/sys/unix" +) + +// tryLockExclusive attempts a non-blocking exclusive flock(2) on f. It returns +// false without error when another process already holds the lock. +func tryLockExclusive(f *os.File) (bool, error) { + err := unix.Flock(int(f.Fd()), unix.LOCK_EX|unix.LOCK_NB) + if err == nil { + return true, nil + } + if errors.Is(err, unix.EWOULDBLOCK) { + return false, nil + } + return false, err +} + +// unlockFile releases the lock previously acquired with tryLockExclusive. +func unlockFile(f *os.File) error { + return unix.Flock(int(f.Fd()), unix.LOCK_UN) +} diff --git a/pkg/paths/migrate_lock_windows.go b/pkg/paths/migrate_lock_windows.go new file mode 100644 index 0000000000..9ce9d97d67 --- /dev/null +++ b/pkg/paths/migrate_lock_windows.go @@ -0,0 +1,47 @@ +//go:build windows + +package paths + +import ( + "errors" + "os" + + "golang.org/x/sys/windows" +) + +// maxRange asks LockFileEx / UnlockFileEx to cover the whole file by +// passing 0xFFFFFFFF for both the low and high 32 bits of the range. +const maxRange = ^uint32(0) + +// tryLockExclusive attempts a non-blocking exclusive LockFileEx on f. It +// returns false without error when another process already holds the lock. +func tryLockExclusive(f *os.File) (bool, error) { + var ol windows.Overlapped + err := windows.LockFileEx( + windows.Handle(f.Fd()), + windows.LOCKFILE_EXCLUSIVE_LOCK|windows.LOCKFILE_FAIL_IMMEDIATELY, + 0, + maxRange, + maxRange, + &ol, + ) + if err == nil { + return true, nil + } + if errors.Is(err, windows.ERROR_LOCK_VIOLATION) { + return false, nil + } + return false, err +} + +// unlockFile releases the lock previously acquired with tryLockExclusive. +func unlockFile(f *os.File) error { + var ol windows.Overlapped + return windows.UnlockFileEx( + windows.Handle(f.Fd()), + 0, + maxRange, + maxRange, + &ol, + ) +} diff --git a/pkg/paths/paths.go b/pkg/paths/paths.go index d796d3e96b..896b3e09d8 100644 --- a/pkg/paths/paths.go +++ b/pkg/paths/paths.go @@ -3,6 +3,7 @@ package paths import ( "os" "path/filepath" + "runtime" "sync/atomic" ) @@ -19,6 +20,9 @@ func (o *overridable) Set(dir string) { } } +// isSet reports whether an override is currently in effect. +func (o *overridable) isSet() bool { return o.p.Load() != nil } + // get returns the override if set, or falls back to the result of defaultFn. func (o *overridable) get(defaultFn func() string) string { if p := o.p.Load(); p != nil { @@ -70,52 +74,55 @@ func SetRoot(root string) { // // If an override has been set via [SetCacheDir] it is returned instead. // -// On Linux this follows XDG: $XDG_CACHE_HOME/cagent (default ~/.cache/cagent). -// On macOS this uses ~/Library/Caches/cagent. -// On Windows this uses %LocalAppData%/cagent. -// -// If the cache directory cannot be determined, it falls back to a directory -// under the system temporary directory. +// The default follows the XDG Base Directory Specification, honouring +// $XDG_CACHE_HOME on every platform and otherwise using the OS-native +// location: +// - $XDG_CACHE_HOME/cagent, default ~/.cache/cagent (Linux) +// - ~/Library/Caches/cagent (macOS) +// - %LocalAppData%\cagent (Windows) func GetCacheDir() string { return cacheDirOverride.get(func() string { - cacheDir, err := os.UserCacheDir() - if err != nil { - return filepath.Clean(filepath.Join(os.TempDir(), ".cagent-cache")) - } - return filepath.Clean(filepath.Join(cacheDir, "cagent")) + return filepath.Clean(xdgCacheDir()) }) } -// GetConfigDir returns the user's config directory for docker agent. +// GetConfigDir returns the user's config directory for docker agent +// (config.yaml, aliases, user id, MCP OAuth tokens, sandbox tokens). // // If an override has been set via [SetConfigDir] it is returned instead. // -// If the home directory cannot be determined, it falls back to a directory -// under the system temporary directory. This is a best-effort fallback and -// not intended to be a security boundary. +// The default follows the XDG Base Directory Specification, honouring +// $XDG_CONFIG_HOME on every platform and otherwise using the OS-native +// location: +// - $XDG_CONFIG_HOME/cagent, default ~/.config/cagent (Linux) +// - ~/Library/Application Support/cagent (macOS) +// - %AppData%\cagent (Windows) +// +// Until the new location exists, an existing legacy ~/.config/cagent is used so +// installs keep working until [MigrateLegacy] relocates it. func GetConfigDir() string { return configDirOverride.get(func() string { - homeDir, err := os.UserHomeDir() - if err != nil { - return filepath.Clean(filepath.Join(os.TempDir(), ".cagent-config")) - } - return filepath.Clean(filepath.Join(homeDir, ".config", "cagent")) + return resolveDefault(xdgConfigDir(), legacyConfigDir()) }) } -// GetDataDir returns the user's data directory for docker agent (caches, content, logs). +// GetDataDir returns the user's data directory for docker agent (sessions, +// history, installed tools, OCI store, memory, worktrees, snapshots, ...). // // If an override has been set via [SetDataDir] it is returned instead. // -// If the home directory cannot be determined, it falls back to a directory -// under the system temporary directory. +// The default follows the XDG Base Directory Specification, honouring +// $XDG_DATA_HOME on every platform and otherwise using the OS-native +// location: +// - $XDG_DATA_HOME/cagent, default ~/.local/share/cagent (Linux) +// - ~/Library/Application Support/cagent (macOS) +// - %LocalAppData%\cagent (Windows) +// +// Until the new location exists, an existing legacy ~/.cagent is used so +// installs keep working until [MigrateLegacy] relocates it. func GetDataDir() string { return dataDirOverride.get(func() string { - homeDir, err := os.UserHomeDir() - if err != nil { - return filepath.Clean(filepath.Join(os.TempDir(), ".cagent")) - } - return filepath.Clean(filepath.Join(homeDir, ".cagent")) + return resolveDefault(xdgDataDir(), legacyDataDir()) }) } @@ -129,3 +136,86 @@ func GetHomeDir() string { } return filepath.Clean(homeDir) } + +// --- default directory resolution --- +// +// XDG_* env vars are honoured on every platform (not just Linux); otherwise the +// OS-native dir is used. Mirrors github.com/adrg/xdg. + +func xdgConfigDir() string { + if dir := os.Getenv("XDG_CONFIG_HOME"); dir != "" { + return filepath.Join(dir, "cagent") + } + if dir, err := os.UserConfigDir(); err == nil { + return filepath.Join(dir, "cagent") + } + return filepath.Join(os.TempDir(), ".cagent-config") +} + +func xdgCacheDir() string { + if dir := os.Getenv("XDG_CACHE_HOME"); dir != "" { + return filepath.Join(dir, "cagent") + } + if dir, err := os.UserCacheDir(); err == nil { + return filepath.Join(dir, "cagent") + } + return filepath.Join(os.TempDir(), ".cagent-cache") +} + +// xdgDataDir derives the data dir per platform since Go has no os.UserDataDir. +func xdgDataDir() string { + if dir := os.Getenv("XDG_DATA_HOME"); dir != "" { + return filepath.Join(dir, "cagent") + } + home := GetHomeDir() + if home == "" { + return filepath.Join(os.TempDir(), ".cagent") + } + switch runtime.GOOS { + case "darwin": + return filepath.Join(home, "Library", "Application Support", "cagent") + case "windows": + if dir := os.Getenv("LocalAppData"); dir != "" { + return filepath.Join(dir, "cagent") + } + return filepath.Join(home, "AppData", "Local", "cagent") + default: + return filepath.Join(home, ".local", "share", "cagent") + } +} + +func legacyConfigDir() string { + if home := GetHomeDir(); home != "" { + return filepath.Join(home, ".config", "cagent") + } + return "" +} + +func legacyDataDir() string { + if home := GetHomeDir(); home != "" { + return filepath.Join(home, ".cagent") + } + return "" +} + +// resolveDefault returns dst, except while dst does not yet exist and the +// legacy location does, in which case legacy is returned so existing installs +// keep reading their state until [MigrateLegacy] relocates it. +func resolveDefault(dst, legacy string) string { + if legacy != "" && legacy != dst && !pathExists(dst) && dirExists(legacy) { + return filepath.Clean(legacy) + } + return filepath.Clean(dst) +} + +// dirExists reports whether p exists and is a directory. +func dirExists(p string) bool { + info, err := os.Stat(p) + return err == nil && info.IsDir() +} + +// pathExists reports whether p exists (file, directory or symlink). +func pathExists(p string) bool { + _, err := os.Lstat(p) + return err == nil +} diff --git a/pkg/paths/paths_test.go b/pkg/paths/paths_test.go index c3080f60af..8cbb456e6b 100644 --- a/pkg/paths/paths_test.go +++ b/pkg/paths/paths_test.go @@ -1,14 +1,50 @@ package paths_test import ( + "os" "path/filepath" "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/docker/docker-agent/pkg/paths" ) +// These tests mutate process-global state (HOME/XDG env and the directory +// overrides), so they must not run in parallel. + +func TestDefaultDirsHonourXDGEnv(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("XDG_CONFIG_HOME", filepath.Join(home, "cfg")) + t.Setenv("XDG_DATA_HOME", filepath.Join(home, "data")) + t.Setenv("XDG_CACHE_HOME", filepath.Join(home, "cache")) + + paths.SetConfigDir("") + paths.SetDataDir("") + paths.SetCacheDir("") + + assert.Equal(t, filepath.Join(home, "cfg", "cagent"), paths.GetConfigDir()) + assert.Equal(t, filepath.Join(home, "data", "cagent"), paths.GetDataDir()) + assert.Equal(t, filepath.Join(home, "cache", "cagent"), paths.GetCacheDir()) +} + +func TestGetDataDirLegacyFallback(t *testing.T) { + home := t.TempDir() + t.Setenv("HOME", home) + t.Setenv("XDG_DATA_HOME", filepath.Join(home, "data")) + paths.SetDataDir("") + + // While the new location is absent, an existing ~/.cagent is still used. + require.NoError(t, os.MkdirAll(filepath.Join(home, ".cagent"), 0o755)) + assert.Equal(t, filepath.Join(home, ".cagent"), paths.GetDataDir()) + + // Once the new location exists it takes over. + require.NoError(t, os.MkdirAll(filepath.Join(home, "data", "cagent"), 0o755)) + assert.Equal(t, filepath.Join(home, "data", "cagent"), paths.GetDataDir()) +} + func TestOverrides(t *testing.T) { t.Parallel() diff --git a/pkg/runtime/lazy_model_store.go b/pkg/runtime/lazy_model_store.go index 938a268dbb..9c6c14fc74 100644 --- a/pkg/runtime/lazy_model_store.go +++ b/pkg/runtime/lazy_model_store.go @@ -9,8 +9,8 @@ import ( ) // lazyModelStore is the default ModelStore wired in when the caller did not -// pass WithModelStore. It defers constructing the modelsdev store (which calls -// os.UserHomeDir and creates the ~/.cagent cache directory) until the first +// pass WithModelStore. It defers constructing the modelsdev store (which +// resolves and creates the user cache directory) until the first // method invocation. This keeps NewLocalRuntime free of disk I/O — tests // that never touch the catalog can build a runtime without paying the cost // or hitting failure modes that depend on the host filesystem. diff --git a/pkg/sandbox/kit/main_test.go b/pkg/sandbox/kit/main_test.go new file mode 100644 index 0000000000..3d6488aea9 --- /dev/null +++ b/pkg/sandbox/kit/main_test.go @@ -0,0 +1,18 @@ +package kit + +import ( + "os" + "testing" + + "github.com/docker/docker-agent/pkg/internal/xdgtest" +) + +// TestMain restores $HOME-based isolation for this package's tests. They stage +// kits for the "default" agent, which resolves the agent config through +// userconfig; clearing the XDG variables keeps that resolution anchored to the +// test's temporary $HOME instead of a real, shared config dir. See +// pkg/internal/xdgtest. +func TestMain(m *testing.M) { + xdgtest.Clear() + os.Exit(m.Run()) +} diff --git a/pkg/teamloader/main_test.go b/pkg/teamloader/main_test.go index d5e63f3130..b972af6246 100644 --- a/pkg/teamloader/main_test.go +++ b/pkg/teamloader/main_test.go @@ -2,11 +2,22 @@ package teamloader import ( "context" + "os" "testing" "github.com/docker/docker-agent/pkg/gateway" + "github.com/docker/docker-agent/pkg/internal/xdgtest" ) +// TestMain restores $HOME-based isolation for this package's tests. They load +// the "default" agent, whose resolution goes through userconfig; clearing the +// XDG variables keeps it anchored to the test's temporary $HOME instead of a +// real, shared config dir. See pkg/internal/xdgtest. +func TestMain(m *testing.M) { + xdgtest.Clear() + os.Exit(m.Run()) +} + // testCatalog seeds a fake MCP catalog so teamloader tests that load configs // with MCP `ref:` toolsets run without a live network call. var testCatalog = gateway.Catalog{