Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions src/commands/manifest/bazel/bazel-workspace-walk.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* Walk the directory tree rooted at `cwd` and return every directory that
* looks like a Bazel workspace root — i.e. contains `MODULE.bazel`,
* `WORKSPACE`, or `WORKSPACE.bazel`. Real monorepos host multiple roots
* (e.g. `envoy/mobile/MODULE.bazel`, rules_kotlin's per-example
* `examples/<name>/MODULE.bazel`); the per-workspace algorithm in the
* orchestrator runs once per discovered root.
*
* The walker is dependency-injected with the directory-prune policy:
* callers pass the set of basenames and basename prefixes the walk must
* refuse to descend into. This module intentionally hardcodes none of
* the "common" prunes (`.git`, `node_modules`, …) — Bazel callers compose
* the codebase-wide `IGNORED_DIRS` list (`src/utils/glob.mts`) with the
* Bazel-specific bits (`bazel-*` output_base symlinks,
* `.socket-auto-manifest`, build-output `dist*`).
*/

import { readdirSync } from 'node:fs'
import path from 'node:path'

import { logger } from '@socketsecurity/registry/lib/logger'

// Hard ceiling on number of workspace roots we will surface. Real monorepos
// have well under 50; this cap is a guard against pathological inputs.
const MAX_WORKSPACE_ROOTS = 256
// Hard ceiling on directory walk depth. Deepest workspace marker observed
// across the OSS corpus surveyed is 9 (bazel-self test fixtures); deepest
// in realistic application code is 7 (checkmk's thirdparty layout). Cap
// is set to 8 — one level of headroom over the realistic max, while still
// guarding against pathological symlink loops that slipped past any
// prefix prune.
const MAX_WALK_DEPTH = 8
// Files whose presence promotes a directory to a workspace root.
const WORKSPACE_MARKER_FILES = new Set([
'MODULE.bazel',
'WORKSPACE',
'WORKSPACE.bazel',
])

export type FindWorkspaceRootsOptions = {
cwd: string
// Directory basenames to skip outright (exact match). Pass the union of
// the codebase-wide ignore set (`IGNORED_DIRS` in `src/utils/glob.mts`)
// and any caller-specific additions (e.g. `.socket-auto-manifest`).
ignoreDirNames?: ReadonlySet<string>
// Directory basename prefixes to skip. Bazel callers pass `['bazel-',
// 'dist']` so the walk never descends into Bazel's output_base symlinks
// or build-output directories.
ignoreDirPrefixes?: readonly string[]
verbose?: boolean
}

const EMPTY_SET: ReadonlySet<string> = new Set()
const EMPTY_ARRAY: readonly string[] = []

// Walks the tree rooted at `opts.cwd` and returns absolute paths to every
// directory that contains at least one workspace marker file. Output is
// sorted for determinism.
export function findWorkspaceRoots(opts: FindWorkspaceRootsOptions): string[] {
const { cwd, verbose } = opts
const ignoreDirNames = opts.ignoreDirNames ?? EMPTY_SET
const ignoreDirPrefixes = opts.ignoreDirPrefixes ?? EMPTY_ARRAY
const out: string[] = []
// Tuple stack: [absolute dir, depth from cwd].
const stack: Array<[string, number]> = [[cwd, 0]]
while (stack.length) {
if (out.length >= MAX_WORKSPACE_ROOTS) {
if (verbose) {
logger.log(
`[VERBOSE] workspace walker: hit MAX_WORKSPACE_ROOTS cap (${MAX_WORKSPACE_ROOTS}); truncating walk`,
)
}
break
}
const next = stack.pop()
if (!next) {
break
}
const { 0: dir, 1: depth } = next
let entries
try {
entries = readdirSync(dir, { withFileTypes: true })
} catch {
continue
}
// First pass: detect whether this dir is itself a workspace root.
let isWorkspaceRoot = false
for (const entry of entries) {
if (entry.isFile() && WORKSPACE_MARKER_FILES.has(entry.name)) {
isWorkspaceRoot = true
break
}
}
if (isWorkspaceRoot) {
out.push(dir)
}
// Second pass: schedule descents. We descend regardless of whether the
// current dir is itself a root — nested workspaces are common in
// monorepos (root MODULE.bazel + examples/*/MODULE.bazel).
if (depth + 1 > MAX_WALK_DEPTH) {
continue
}
for (const entry of entries) {
if (!entry.isDirectory()) {
continue
}
const name = entry.name
if (ignoreDirNames.has(name)) {
continue
}
let pruned = false
for (const prefix of ignoreDirPrefixes) {
if (name.startsWith(prefix)) {
pruned = true
break
}
}
if (pruned) {
continue
}
stack.push([path.join(dir, name), depth + 1])
}
}
return out.sort()
}
154 changes: 154 additions & 0 deletions src/commands/manifest/bazel/bazel-workspace-walk.test.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import {
mkdirSync,
mkdtempSync,
rmSync,
symlinkSync,
writeFileSync,
} from 'node:fs'
import os from 'node:os'
import path from 'node:path'

import { afterEach, beforeEach, describe, expect, it } from 'vitest'

import { findWorkspaceRoots } from './bazel-workspace-walk.mts'

function touch(file: string): void {
mkdirSync(path.dirname(file), { recursive: true })
writeFileSync(file, '')
}

// Standard prune set Bazel callers pass: the codebase-wide IGNORED_DIRS
// (.git, node_modules, etc.) plus the walker's own output dir, plus
// `bazel-*` output_base symlinks and `dist*` build outputs. Replicated
// inline here so the test stays decoupled from `src/utils/glob.mts`.
const BAZEL_IGNORE_NAMES: ReadonlySet<string> = new Set([
'.git',
'.hg',
'.idea',
'.pnpm-store',
'.socket-auto-manifest',
'.svn',
'.vscode',
'node_modules',
])
const BAZEL_IGNORE_PREFIXES: readonly string[] = ['bazel-', 'dist']

describe('bazel-workspace-walk', () => {
let tmp: string

beforeEach(() => {
tmp = mkdtempSync(path.join(os.tmpdir(), 'sock-bazel-walk-'))
})

afterEach(() => {
rmSync(tmp, { recursive: true, force: true })
})

describe('findWorkspaceRoots', () => {
it('returns the root when only the root has MODULE.bazel', () => {
touch(path.join(tmp, 'MODULE.bazel'))
expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp])
})

it('detects WORKSPACE and WORKSPACE.bazel as root markers', () => {
touch(path.join(tmp, 'WORKSPACE'))
expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp])
rmSync(path.join(tmp, 'WORKSPACE'))
touch(path.join(tmp, 'WORKSPACE.bazel'))
expect(findWorkspaceRoots({ cwd: tmp })).toEqual([tmp])
})

it('finds nested workspaces at arbitrary depth', () => {
touch(path.join(tmp, 'MODULE.bazel'))
touch(path.join(tmp, 'examples', 'dagger', 'MODULE.bazel'))
touch(path.join(tmp, 'examples', 'android', 'nested', 'WORKSPACE.bazel'))
const found = findWorkspaceRoots({ cwd: tmp }).map(p =>
path.relative(tmp, p),
)
expect(found).toEqual(['', 'examples/android/nested', 'examples/dagger'])
})

it('returns [] when there is no workspace root', () => {
writeFileSync(path.join(tmp, 'README.md'), '')
expect(findWorkspaceRoots({ cwd: tmp })).toEqual([])
})

it('does NOT prune by default — pruning policy is caller-supplied', () => {
// No ignoreDirNames / ignoreDirPrefixes passed in: any sub-directory
// containing a workspace marker gets surfaced, even node_modules.
touch(path.join(tmp, 'MODULE.bazel'))
touch(path.join(tmp, 'node_modules', 'MODULE.bazel'))
const found = findWorkspaceRoots({ cwd: tmp }).map(p =>
path.relative(tmp, p),
)
expect(found).toEqual(['', 'node_modules'])
})

it('prunes injected ignoreDirNames', () => {
touch(path.join(tmp, 'MODULE.bazel'))
for (const dir of ['node_modules', '.git', '.socket-auto-manifest']) {
touch(path.join(tmp, dir, 'sub', 'MODULE.bazel'))
}
const found = findWorkspaceRoots({
cwd: tmp,
ignoreDirNames: BAZEL_IGNORE_NAMES,
}).map(p => path.relative(tmp, p))
expect(found).toEqual([''])
})

it('prunes injected ignoreDirPrefixes (bazel-* symlinks)', () => {
// Simulate `bazel-out` pointing at a directory that contains a copy of
// MODULE.bazel. With the `bazel-` prefix injected, the walk must skip
// it; otherwise the walker would surface workspaces from <output_base>.
const fakeOutputBase = mkdtempSync(
path.join(os.tmpdir(), 'sock-fake-outbase-'),
)
try {
mkdirSync(path.join(fakeOutputBase, 'external', 'maven'), {
recursive: true,
})
touch(path.join(fakeOutputBase, 'external', 'maven', 'MODULE.bazel'))
symlinkSync(fakeOutputBase, path.join(tmp, 'bazel-out'))
touch(path.join(tmp, 'MODULE.bazel'))
const found = findWorkspaceRoots({
cwd: tmp,
ignoreDirPrefixes: BAZEL_IGNORE_PREFIXES,
}).map(p => path.relative(tmp, p))
expect(found).toEqual([''])
} finally {
rmSync(fakeOutputBase, { recursive: true, force: true })
}
})

it('prunes injected dist* prefix', () => {
touch(path.join(tmp, 'MODULE.bazel'))
touch(path.join(tmp, 'dist', 'MODULE.bazel'))
touch(path.join(tmp, 'distribution', 'MODULE.bazel'))
const found = findWorkspaceRoots({
cwd: tmp,
ignoreDirPrefixes: BAZEL_IGNORE_PREFIXES,
}).map(p => path.relative(tmp, p))
expect(found).toEqual([''])
})

it('returns absolute, sorted paths', () => {
touch(path.join(tmp, 'z', 'MODULE.bazel'))
touch(path.join(tmp, 'a', 'MODULE.bazel'))
touch(path.join(tmp, 'm', 'MODULE.bazel'))
const found = findWorkspaceRoots({ cwd: tmp })
expect(found).toEqual([
path.join(tmp, 'a'),
path.join(tmp, 'm'),
path.join(tmp, 'z'),
])
for (const p of found) {
expect(path.isAbsolute(p)).toBe(true)
}
})

it('handles an unreadable directory by skipping it (no throw)', () => {
touch(path.join(tmp, 'MODULE.bazel'))
expect(findWorkspaceRoots({ cwd: path.join(tmp, 'nope') })).toEqual([])
})
})
})
Loading