diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml new file mode 100644 index 00000000..5e5e45bc --- /dev/null +++ b/.github/workflows/e2e.yml @@ -0,0 +1,106 @@ +name: e2e + +on: + push: + branches: [main] + paths: + - 'src/**' + - 'e2e/**' + - 'package.json' + - 'yarn.lock' + - '.github/workflows/e2e.yml' + pull_request: + paths: + - 'src/**' + - 'e2e/**' + - 'package.json' + - 'yarn.lock' + - '.github/workflows/e2e.yml' + workflow_dispatch: + +# One in-flight e2e run per ref; cancel older runs so ephemeral resources don't pile up. +concurrency: + group: e2e-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + id-token: write + +jobs: + check-lambda-changes: + name: Check lambda changes + runs-on: ubuntu-latest + outputs: + should_run: ${{ steps.filter.outputs.lambda }} + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + - uses: dorny/paths-filter@fbd0ab8f3e69293af611ebaee6363fc25e6d187d # v4 + id: filter + with: + filters: | + lambda: + - 'src/**' + - 'e2e/**' + - '.github/workflows/e2e.yml' + + e2e-lambda: + name: Lambda e2e (Node ${{ matrix.node-version }}) + runs-on: ubuntu-latest + needs: [check-lambda-changes] + strategy: + matrix: + node-version: [20] + permissions: + contents: read + id-token: write + env: + FORCE_COLOR: 1 + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + + - name: Set up Node ${{ matrix.node-version }} + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: ${{ matrix.node-version }} + + - name: Set up Yarn + run: corepack enable && corepack prepare yarn@4.10.3 --activate + + - name: Install plugin dependencies + run: yarn install --immutable + + # OIDC federation: GitHub -> AWS. The role is assumable only from this repo's + # workflows and scoped to the e2e sandbox account. + - name: AWS auth (OIDC) + if: needs.check-lambda-changes.outputs.should_run == 'true' + uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2 + with: + role-to-assume: ${{ vars.AWS_ROLE_ARN_E2E }} + aws-region: ${{ vars.AWS_REGION_E2E || 'us-east-1' }} + + - name: Install e2e dependencies + run: npm install + working-directory: e2e + + # Short-lived Datadog API + App keys via OIDC federation (dd-sts), governed by + # the serverless-plugin-datadog-e2e policy. No static Datadog keys in this repo. + - name: Get Datadog credentials (dd-sts) + id: dd-sts + if: needs.check-lambda-changes.outputs.should_run == 'true' + uses: DataDog/dd-sts-action@2e8187910199bd93129520183c093e19aa585c75 # v1.0.0 + with: + policy: serverless-plugin-datadog-e2e + + - name: Run e2e suite + run: npm test + working-directory: e2e + env: + DATADOG_API_KEY: ${{ steps.dd-sts.outputs.api_key }} + DATADOG_APP_KEY: ${{ steps.dd-sts.outputs.app_key }} + DATADOG_SITE: ${{ vars.DD_SITE_E2E || 'datadoghq.com' }} + AWS_REGION: ${{ vars.AWS_REGION_E2E || 'us-east-1' }} + # Skipped only when nothing relevant changed. When relevant files change the + # suite runs for real and the dd-sts / AWS OIDC steps must succeed -- an + # auth/federation failure fails the job loudly rather than skipping green. + SKIP_LAMBDA_TESTS: ${{ needs.check-lambda-changes.outputs.should_run != 'true' }} diff --git a/e2e/.env.local.example b/e2e/.env.local.example new file mode 100644 index 00000000..5f0b76ae --- /dev/null +++ b/e2e/.env.local.example @@ -0,0 +1,17 @@ +# Copy to e2e/.env.local (gitignored) for local runs. Real shell env vars take +# precedence, so this file is purely a convenience. + +# Datadog API + APP keys. The API key is wired into the Lambda extension AND used +# to authenticate the API client; the APP key is needed to poll spans/logs. +DATADOG_API_KEY= +DATADOG_APP_KEY= + +# Datadog site the org lives in (datadoghq.com, datadoghq.eu, us3.datadoghq.com, ...). +DATADOG_SITE=datadoghq.com + +# AWS region to deploy the ephemeral Lambda into. Must have pinned layers in +# ../src/layers.json (us-east-1 is the default). +AWS_REGION=us-east-1 + +# Set to skip the suite entirely. +# SKIP_LAMBDA_TESTS=true diff --git a/e2e/.gitignore b/e2e/.gitignore new file mode 100644 index 00000000..3c16a963 --- /dev/null +++ b/e2e/.gitignore @@ -0,0 +1,7 @@ +node_modules +.env.local +fixtures/**/node_modules +fixtures/**/.serverless +fixtures/**/package-lock.json +serverless-plugin-datadog-*.tgz +*.log diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 00000000..4c506654 --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,101 @@ +# serverless-plugin-datadog e2e suite + +End-to-end coverage for the AWS Lambda instrumentation this plugin performs. It +deploys a real, ephemeral Lambda with the plugin enabled, verifies the deployed +config and the telemetry it produces in Datadog, proves re-deploy is idempotent, +then tears the stack down and verifies a clean end-state. + +Conforms to the shared contract in `serverless-ci/e2e/spec.md`; mirrors the +`datadog-ci` reference suite (`e2e/cloud-run.test.ts` + `e2e/helpers/*`). + +## What it does + +``` +sls deploy (APPLY: provision + instrument) -> verify CONFIG + -> aws lambda invoke (trigger) -> verify TELEMETRY (traces + logs) + -> sls deploy again -> assert IDEMPOTENT (no diff/dup) + -> sls remove (REMOVE) -> verify CLEAN (function gone) + -> teardown (always, even on failure) +``` + +For this tool the plugin runs as part of `sls deploy`, so provisioning the +uninstrumented workload and APPLY are the same step. REMOVE deletes the whole +CloudFormation stack, so the clean end-state is the function (and all its DD +config) being absent -- asserted explicitly. + +**Config verified** (`helpers/lambda-verifier.ts`): the pinned Datadog Node layer ++ extension layer (versions read from `../src/layers.json`, so drift blames the +plugin), the redirected handler with the original preserved in `DD_LAMBDA_HANDLER`, +the required `DD_*` env vars, and the `service` / `env` / `version` / `dd_sls_plugin` +tags. Identity (run-id service name, env, version) is asserted -- not mere presence. + +**Telemetry verified** (`helpers/lambda-telemetry-checker.ts`): spans and logs are +polled (15s × 20) filtered by the unique service name, and the matched records must +carry the full identity (service + env + version), not just exist. + +## Resource hygiene + +Every run uses a unique name `one-e2e-slsplugin-lambda-` and stamps a +`one_e2e_created:` tag at creation (`helpers/naming.ts`). The shared +cross-repo sweeper ages out anything older than the grace window. In-test teardown +runs in `afterAll` regardless of outcome. + +## Prerequisites + +- **Node 20** and **npm** (the suite is a standalone npm project, isolated from the + plugin's Yarn Berry setup). +- The plugin is built and the fixture is installed automatically by `pretest` + (`npm test` runs `yarn build` at the repo root, then `npm install` in the fixture). +- **AWS auth** with permission to deploy Lambda / CloudFormation in the target + account. Locally, wrap the run with `aws-vault`: + ``` + aws-vault exec sso-serverless-sandbox-account-admin -- npm test + ``` + In CI, credentials come from GitHub→AWS OIDC (no static keys). +- **Datadog keys**: `DATADOG_API_KEY` (wired into the extension and used for the API + client) and `DATADOG_APP_KEY` (used to poll spans/logs). + +## Run locally + +``` +cd e2e +npm install + +# aws-vault provides AWS creds; dd-auth mints short-lived keys for the org -- no pasted keys. +aws-vault exec sso-serverless-sandbox-account-admin -- \ + dd-auth --domain app.datadoghq.com -- bash -c ' + export DATADOG_API_KEY="$DD_API_KEY" DATADOG_APP_KEY="$DD_APP_KEY" + npm test + ' +``` + +`dd-auth` injects `$DD_API_KEY` / `$DD_APP_KEY` into the wrapped subprocess; the +suite reads them as `DATADOG_API_KEY` / `DATADOG_APP_KEY`. AWS credentials still come +from `aws-vault`. Set `SKIP_LAMBDA_TESTS=true` to skip the suite. + +## Configuration + +| Env var | Required | Default | Purpose | +| ------------------ | -------- | ---------------- | ---------------------------------------------- | +| `DATADOG_API_KEY` | yes | -- | Wired into the extension + API-client auth | +| `DATADOG_APP_KEY` | yes | -- | API-client auth for span/log polling | +| `DATADOG_SITE` | no | `datadoghq.com` | Datadog site | +| `AWS_REGION` | no | `us-east-1` | Deploy region (must be pinned in `layers.json`)| +| `SKIP_LAMBDA_TESTS`| no | -- | `true` skips the suite | + +(AWS credentials come from the ambient AWS env / `aws-vault` / OIDC.) + +## CI + +`.github/workflows/e2e.yml` runs the suite behind a `dorny/paths-filter` gate +(`src/**`, `e2e/**`, the workflow file) and the `SKIP_LAMBDA_TESTS` flag, with +GitHub→AWS OIDC (`aws-actions/configure-aws-credentials`). Required repo settings: + +- Datadog auth (dd-sts): short-lived API + App keys minted at runtime via + [`DataDog/dd-sts-action`](https://github.com/DataDog/dd-sts-action) under the + `serverless-plugin-datadog-e2e` policy -- no static Datadog keys in this repo +- Variables: `AWS_ROLE_ARN_E2E` (the OIDC deploy role), `AWS_REGION_E2E` (default + `us-east-1`), optionally `DD_SITE_E2E` + +The OIDC deploy role and the policy backing it are cataloged in +`serverless-ci/e2e/iam-infra.md`. diff --git a/e2e/fixtures/lambda-node/handler.js b/e2e/fixtures/lambda-node/handler.js new file mode 100644 index 00000000..ad3214bc --- /dev/null +++ b/e2e/fixtures/lambda-node/handler.js @@ -0,0 +1,16 @@ +// Minimal Node.js workload for the e2e suite. Duplicated from the default +// handler in serverless-self-monitoring (lambda-managed-instances/handlers/default/nodejs), +// with one log line added so a log record is emitted on every invocation. +// +// No tracer setup lives here on purpose: the serverless-plugin-datadog wiring +// (Datadog Node layer + extension + redirected handler) auto-instruments the +// invocation and auto-collects logs. The e2e suite tests that wiring, not the +// runtime, so this handler stays trivial. +exports.handler = async function (_event, _context) { + console.log(`one-e2e serverless-plugin-datadog lambda invocation service=${process.env.DD_SERVICE}`); + + return { + statusCode: 200, + body: "hello, world", + }; +}; diff --git a/e2e/fixtures/lambda-node/package.json b/e2e/fixtures/lambda-node/package.json new file mode 100644 index 00000000..c4817b59 --- /dev/null +++ b/e2e/fixtures/lambda-node/package.json @@ -0,0 +1,9 @@ +{ + "name": "one-e2e-lambda-node-workload", + "version": "0.0.0", + "private": true, + "description": "Ephemeral Lambda workload deployed by the serverless-plugin-datadog e2e suite. The serverless-plugin-datadog dependency is installed from a packed tarball by ../../setup.sh (not listed here, to avoid a recursive file: link).", + "dependencies": { + "serverless": "3.39.0" + } +} diff --git a/e2e/fixtures/lambda-node/serverless.yml b/e2e/fixtures/lambda-node/serverless.yml new file mode 100644 index 00000000..b794e094 --- /dev/null +++ b/e2e/fixtures/lambda-node/serverless.yml @@ -0,0 +1,49 @@ +# Uninstrumented workload + the serverless-plugin-datadog instrumentation it plugs +# into. The plugin runs as part of `sls deploy` (APPLY) and is torn down with the +# whole stack on `sls remove` (REMOVE). Everything that has to be unique or stamped +# per run is injected via env so this file stays static (see e2e/helpers/naming.ts). +# +# frameworkVersion is pinned to 3 to avoid the Serverless Framework v4 login/license +# flow in CI. The runtime is pinned to one canonical Node.js version (see RULES in +# the spec: one canonical runtime per platform). +service: ${env:E2E_SERVICE_NAME} + +frameworkVersion: "3" + +plugins: + - serverless-plugin-datadog + +provider: + name: aws + runtime: nodejs20.x + region: ${env:AWS_REGION, "us-east-1"} + stage: e2e + # Hygiene: freshness tag set atomically at creation, on both the CloudFormation + # stack and every resource, so the cross-repo sweeper can age it out. + stackTags: + one_e2e_created: ${env:E2E_CREATED_TS} + tags: + one_e2e_created: ${env:E2E_CREATED_TS} + +custom: + datadog: + # API key wiring + site: required for the extension to ship telemetry. + apiKey: ${env:DD_API_KEY} + site: ${env:DD_SITE, "datadoghq.com"} + # Identity: service carries the unique run id, env + version are fixed markers. + # These flow onto ingested telemetry and let us assert identity, not existence. + service: ${env:E2E_SERVICE_NAME} + env: e2e + version: "1.0.0" + # The mechanism under test: library layer + extension layer + tracing + logs. + addLayers: true + addExtension: true + enableDDTracing: true + enableDDLogs: true + # Keep the deploy hermetic: no git metadata upload (needs app key + git ctx). + enableSourceCodeIntegration: false + uploadGitMetadata: false + +functions: + hello: + handler: handler.handler diff --git a/e2e/helpers/.gitattributes b/e2e/helpers/.gitattributes new file mode 100644 index 00000000..ab59db08 --- /dev/null +++ b/e2e/helpers/.gitattributes @@ -0,0 +1,5 @@ +# Synced from serverless-ci/e2e/shared -- generated by the e2e-shared sync. Collapsed in diffs. +exec.ts linguist-generated=true +lambda-telemetry-checker.ts linguist-generated=true +lambda-verifier.ts linguist-generated=true +naming.ts linguist-generated=true diff --git a/e2e/helpers/e2e.config.ts b/e2e/helpers/e2e.config.ts new file mode 100644 index 00000000..0f28095c --- /dev/null +++ b/e2e/helpers/e2e.config.ts @@ -0,0 +1,86 @@ +/* + * Unless explicitly stated otherwise all files in this repository are licensed + * under the Apache License Version 2.0. + * + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2026 Datadog, Inc. + */ + +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import {fileURLToPath} from 'node:url'; + +import {type E2ENaming} from './naming'; +import {type ExpectedLayers, type LambdaVerifierConfig} from './lambda-verifier'; + +// Repo-local config feeding the shared e2e helpers. This file is NOT synced -- it holds +// everything specific to serverless-plugin-datadog that the shared, parameterized helpers +// read through their config arguments. + +export const NAMING: E2ENaming = {tool: 'slsplugin', platform: 'lambda'}; + +export const ENV_NAME = 'e2e'; +export const ENV_VERSION = '1.0.0'; +const RUNTIME = 'nodejs20.x'; + +// Transient cloud-provider errors safe to retry, passed as ExecOptions.retryPatterns. +export const RETRY_PATTERNS = [ + // Generic / cross-cloud + 'GatewayTimeout', + 'Operation was canceled', + 'ETIMEDOUT', + 'ECONNRESET', + 'temporarily unavailable', + // AWS Lambda / CloudFormation / STS + 'ThrottlingException', + 'TooManyRequestsException', + 'Rate exceeded', + 'RequestLimitExceeded', + 'ResourceConflictException', + 'ServiceException', + 'InternalFailure', + 'ServiceUnavailable', + 'is in progress', // CloudFormation stack op already running + 'ProvisionedConcurrencyConfig', // eventual-consistency churn on update +]; + +// `sls deploy` names functions `--`; stage is pinned to `e2e` and the +// only function is `hello`. +export const functionName = (serviceName: string): string => `${serviceName}-e2e-hello`; + +// Pinned artifact versions come from the plugin's own src/layers.json, so a version +// mismatch blames the plugin/registry, not upstream drift. +const expectedLayerArns = (region: string): ExpectedLayers => { + const layersPath = fileURLToPath(new URL('../../src/layers.json', import.meta.url)); + const layers = JSON.parse(fs.readFileSync(layersPath, 'utf-8')) as { + regions: Record>; + }; + const regionLayers = layers.regions[region]; + assert.ok(regionLayers, `region ${region} not present in src/layers.json`); + const node = regionLayers[RUNTIME]; + const extension = regionLayers.extension; + assert.ok(node, `no ${RUNTIME} layer pinned for ${region} in src/layers.json`); + assert.ok(extension, `no extension layer pinned for ${region} in src/layers.json`); + + return {node, extension}; +}; + +export const VERIFIER: LambdaVerifierConfig = { + functionName, + expectedLayerArns, + redirectHandler: '/opt/nodejs/node_modules/datadog-lambda-js/handler.handler', + originalHandler: 'handler.handler', + // With the extension enabled the plugin tags the function with its own version marker. + toolTag: {key: 'dd_sls_plugin', pattern: /^v\d+\.\d+\.\d+/}, + env: { + apiKeyVars: ['DD_API_KEY', 'DD_API_KEY_SECRET_ARN', 'DD_KMS_API_KEY', 'DD_API_KEY_SSM_ARN'], + present: ['DD_SITE'], + values: (serviceName) => ({ + DD_TRACE_ENABLED: 'true', + DD_SERVERLESS_LOGS_ENABLED: 'true', + DD_SERVICE: serviceName, + DD_ENV: ENV_NAME, + DD_VERSION: ENV_VERSION, + }), + }, +}; diff --git a/e2e/helpers/exec.ts b/e2e/helpers/exec.ts new file mode 100644 index 00000000..58941747 --- /dev/null +++ b/e2e/helpers/exec.ts @@ -0,0 +1,90 @@ +// Code generated by serverless-ci e2e-shared sync. DO NOT EDIT. +// Source of truth lives in serverless-ci/e2e/shared -- edit there; local changes are overwritten. + +/* + * Unless explicitly stated otherwise all files in this repository are licensed + * under the Apache License Version 2.0. + * + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2026 Datadog, Inc. + */ + +import child_process from 'node:child_process'; + +// Runner-agnostic exec + bounded-retry helper: no jest/vitest imports, so it backs any +// test runner. "Retry the cloud, not the assertions": only transient provider errors are +// retried, on a bounded budget; a real failure surfaces immediately. + +export interface ExecResult { + exitCode: number; + stdout: string; + stderr: string; +} + +export interface ExecOptions { + env?: Record; + cwd?: string; + // Serverless / AWS / CFN calls can emit large output; default generous but bounded. + // CFN deploys need ~16MB; sls/cdk need more, so callers raise it as needed. + maxBuffer?: number; + // Transient cloud-provider error substrings safe to retry. Supplied by the caller so + // each cloud/tool contributes its own patterns; empty means never retry. + retryPatterns?: string[]; + maxAttempts?: number; + delaySeconds?: number; +} + +const DEFAULT_MAX_BUFFER = 50 * 1024 * 1024; +const DEFAULT_MAX_ATTEMPTS = 3; +const DEFAULT_DELAY_SECONDS = 10; + +export const execPromise = async (command: string, options: ExecOptions = {}): Promise => { + const {env, cwd, maxBuffer = DEFAULT_MAX_BUFFER} = options; + + return new Promise((resolve) => { + child_process.exec(command, {env: {...process.env, ...env}, cwd, maxBuffer}, (error, stdout, stderr) => { + resolve({ + exitCode: error ? (typeof error.code === 'number' ? error.code : 1) : 0, + stdout: stdout.trim(), + stderr: stderr.trim(), + }); + }); + }); +}; + +const isRetryable = (result: ExecResult, patterns: string[]): boolean => { + const output = `${result.stdout} ${result.stderr}`; + + return patterns.some((pattern) => output.includes(pattern)); +}; + +const waitFor = (seconds: number): Promise => new Promise((resolve) => setTimeout(resolve, seconds * 1000)); + +export const execPromiseWithRetries = async (command: string, options: ExecOptions = {}): Promise => { + const { + retryPatterns = [], + maxAttempts = DEFAULT_MAX_ATTEMPTS, + delaySeconds = DEFAULT_DELAY_SECONDS, + } = options; + + let result: ExecResult = {exitCode: 1, stdout: '', stderr: 'not run'}; + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + result = await execPromise(command, options); + if (result.exitCode === 0) { + return result; + } + if (attempt < maxAttempts && isRetryable(result, retryPatterns)) { + // eslint-disable-next-line no-console + console.log(`Command failed with retryable error (attempt ${attempt}/${maxAttempts}), retrying in ${delaySeconds}s...`); + // eslint-disable-next-line no-console + console.log(`stdout: ${result.stdout}`); + // eslint-disable-next-line no-console + console.log(`stderr: ${result.stderr}`); + await waitFor(delaySeconds); + } else { + return result; + } + } + + return result; +}; diff --git a/e2e/helpers/lambda-telemetry-checker.ts b/e2e/helpers/lambda-telemetry-checker.ts new file mode 100644 index 00000000..b20e976d --- /dev/null +++ b/e2e/helpers/lambda-telemetry-checker.ts @@ -0,0 +1,202 @@ +// Code generated by serverless-ci e2e-shared sync. DO NOT EDIT. +// Source of truth lives in serverless-ci/e2e/shared -- edit there; local changes are overwritten. + +/* + * Unless explicitly stated otherwise all files in this repository are licensed + * under the Apache License Version 2.0. + * + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2026 Datadog, Inc. + */ + +import {client, v2} from '@datadog/datadog-api-client'; + +import {RUN_ID_TAG_KEY} from './naming'; + +// Runner-agnostic telemetry poller. Mirrors the datadog-ci reference +// (cloud-run-telemetry-checker.ts): poll spans + logs on a bounded budget, then assert +// *identity* on the matched records, not mere existence. + +const POLL_INTERVAL_SECONDS = 15; +const MAX_ATTEMPTS = 20; + +// A single ingested span or log, flattened to the fields we assert on -- mirrors the Go +// shared Event: top-level string attributes plus "key:value" tag strings. +interface ParsedEvent { + attrs: Record; + tags: string[]; +} + +interface IdentityTag { + key: string; + value: string; +} + +// Flatten a raw span/log record into attrs + tags. Reserved fields (service/env/version) +// sit at the top of `attributes`; logs nest their structured attributes one level deeper; +// tags arrive as a "key:value" string array. +const parseEvent = (record: unknown): ParsedEvent => { + const attrs: Record = {}; + const tags: string[] = []; + const attributes = (record as {attributes?: Record})?.attributes; + if (attributes && typeof attributes === 'object') { + for (const key of ['service', 'env', 'version']) { + const value = attributes[key]; + if (typeof value === 'string' && value !== '') { + attrs[key] = value; + } + } + const nested = attributes.attributes; + if (nested && typeof nested === 'object') { + for (const [key, value] of Object.entries(nested)) { + if (typeof value === 'string') { + attrs[key] = value; + } + } + } + if (Array.isArray(attributes.tags)) { + for (const tag of attributes.tags) { + if (typeof tag === 'string') { + tags.push(tag); + } + } + } + } + + return {attrs, tags}; +}; + +// Assert key=value as a structured attribute or a "key:value" tag -- identity, not a +// substring match against the serialized blob. +const has = (event: ParsedEvent, key: string, value: string): boolean => + event.attrs[key] === value || event.tags.includes(`${key}:${value}`); + +const identityLabel = (identity: IdentityTag[]): string => identity.map(({key, value}) => `${key}:${value}`).join(', '); + +const waitFor = (seconds: number): Promise => new Promise((resolve) => setTimeout(resolve, seconds * 1000)); + +const buildConfiguration = (): client.Configuration => { + const configuration = client.createConfiguration({ + authMethods: { + apiKeyAuth: process.env.DATADOG_API_KEY ?? process.env.DD_API_KEY, + appKeyAuth: process.env.DATADOG_APP_KEY ?? process.env.DD_APP_KEY, + }, + }); + const site = process.env.DATADOG_SITE ?? process.env.DD_SITE; + if (site) { + configuration.setServerVariables({site}); + } + + return configuration; +}; + +// Poll until at least one returned record carries every identity marker. We filter +// in-process (rather than trusting the query alone) so a stray record that merely +// matches the service filter can't pass for one stamped with the full identity. +const pollUntilIdentity = async ( + label: string, + query: () => Promise, + identity: IdentityTag[], +): Promise => { + for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) { + // eslint-disable-next-line no-console + console.log(`[${label}] attempt ${attempt}/${MAX_ATTEMPTS}`); + try { + const results = await query(); + const matching = results.filter((record) => { + const event = parseEvent(record); + + return identity.every(({key, value}) => has(event, key, value)); + }); + if (matching.length > 0) { + // eslint-disable-next-line no-console + console.log(`[${label}] found ${matching.length} record(s) with identity [${identityLabel(identity)}]`); + + return; + } + } catch (error) { + // eslint-disable-next-line no-console + console.error(`[${label}] query error:`, error); + } + + if (attempt < MAX_ATTEMPTS) { + await waitFor(POLL_INTERVAL_SECONDS); + } + } + throw new Error( + `[${label}] timed out after ${MAX_ATTEMPTS} attempts (${MAX_ATTEMPTS * POLL_INTERVAL_SECONDS}s) ` + + `waiting for telemetry with identity [${identityLabel(identity)}]`, + ); +}; + +const recentWindow = (): {from: string; to: string} => { + const now = new Date(); + const from = new Date(now.getTime() - 15 * 60 * 1000); + + return {from: from.toISOString(), to: now.toISOString()}; +}; + +const querySpans = async (configuration: client.Configuration, serviceName: string): Promise => { + const api = new v2.SpansApi(configuration); + const {from, to} = recentWindow(); + const response = await api.listSpans({ + body: { + data: { + attributes: { + filter: {query: `@service:${serviceName}`, from, to}, + page: {limit: 25}, + }, + type: 'search_request', + }, + }, + }); + + return response.data ?? []; +}; + +const queryLogs = async (configuration: client.Configuration, serviceName: string): Promise => { + const api = new v2.LogsApi(configuration); + const {from, to} = recentWindow(); + const response = await api.listLogs({ + body: { + filter: {query: `service:${serviceName}`, from, to}, + page: {limit: 25}, + }, + }); + + return response.data ?? []; +}; + +export interface TelemetryIdentity { + serviceName: string; + env: string; + version: string; + runId: string; + // Run-id tag key; defaults to the shared convention. Override only if a repo diverges. + runIdTagKey?: string; +} + +export const checkTelemetryFlowing = async ({ + serviceName, + env, + version, + runId, + runIdTagKey = RUN_ID_TAG_KEY, +}: TelemetryIdentity): Promise => { + const configuration = buildConfiguration(); + await Promise.all([ + // Traces carry service + env + version + run-id identity. + pollUntilIdentity('spans', () => querySpans(configuration, serviceName), [ + {key: 'service', value: serviceName}, + {key: 'env', value: env}, + {key: 'version', value: version}, + {key: runIdTagKey, value: runId}, + ]), + // Logs carry service + env + run-id identity. + pollUntilIdentity('logs', () => queryLogs(configuration, serviceName), [ + {key: 'service', value: serviceName}, + {key: 'env', value: env}, + {key: runIdTagKey, value: runId}, + ]), + ]); +}; diff --git a/e2e/helpers/lambda-verifier.ts b/e2e/helpers/lambda-verifier.ts new file mode 100644 index 00000000..15692f3d --- /dev/null +++ b/e2e/helpers/lambda-verifier.ts @@ -0,0 +1,182 @@ +// Code generated by serverless-ci e2e-shared sync. DO NOT EDIT. +// Source of truth lives in serverless-ci/e2e/shared -- edit there; local changes are overwritten. + +/* + * Unless explicitly stated otherwise all files in this repository are licensed + * under the Apache License Version 2.0. + * + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2026 Datadog, Inc. + */ + +import assert from 'node:assert/strict'; + +import {execPromise} from './exec'; +import {FRESHNESS_TAG_KEY} from './naming'; + +// Runner-agnostic Lambda config verifier: drives the AWS CLI and asserts the deployed +// function's identity with node:assert. "Config present" for Lambda (per spec) = DD +// layers + extension layer + DD_* env vars + tags. The parts that vary per tool (the +// CDK construct vs CFN macro vs sls plugin) are supplied through LambdaVerifierConfig, +// so this file is identical across the Lambda TS repos. + +interface LambdaLayer { + Arn: string; +} +interface LambdaConfiguration { + FunctionArn: string; + Handler: string; + Runtime: string; + Layers?: LambdaLayer[]; + Environment?: {Variables?: Record}; +} + +export interface FunctionSnapshot { + handler: string; + layerArns: string[]; + ddEnv: Record; +} + +export interface ExpectedLayers { + node: string; + extension: string; +} + +export interface LambdaVerifierConfig { + // functionName maps the run-unique service name to the deployed function name, which + // differs per tool (e.g. sls deploy => `-e2e-hello`). + functionName: (serviceName: string) => string; + // expectedLayerArns resolves the pinned node + extension layer ARNs for a region, + // read from whatever source the tool pins them in (e.g. the plugin's src/layers.json). + expectedLayerArns: (region: string) => ExpectedLayers; + // redirectHandler is the wrapper handler the tool redirects to. + redirectHandler: string; + // originalHandler, when set, is the expected DD_LAMBDA_HANDLER value (the preserved + // user handler). Omit to skip the double-wrap check. + originalHandler?: string; + // toolTag is the marker tag the tool applies; pattern asserts its shape (e.g. version). + toolTag: {key: string; pattern: RegExp}; + // env identity. apiKeyVars: at least one must be set. present: must be set (any value). + // values: must equal exactly (service/env/version identity). + env: { + apiKeyVars: string[]; + present: string[]; + values: (serviceName: string) => Record; + }; +} + +const getConfiguration = async (fnName: string, region: string): Promise => { + const result = await execPromise( + `aws lambda get-function-configuration --function-name "${fnName}" --region "${region}" --output json`, + ); + assert.equal(result.exitCode, 0, `get-function-configuration failed: ${result.stderr}`); + + return JSON.parse(result.stdout) as LambdaConfiguration; +}; + +const getTags = async (functionArn: string, region: string): Promise> => { + const result = await execPromise( + `aws lambda list-tags --resource "${functionArn}" --region "${region}" --output json`, + ); + assert.equal(result.exitCode, 0, `list-tags failed: ${result.stderr}`); + + return (JSON.parse(result.stdout).Tags ?? {}) as Record; +}; + +// A normalized view used to assert idempotency (re-apply produces no diff). +export const functionSnapshot = async ( + config: LambdaVerifierConfig, + serviceName: string, + region: string, +): Promise => { + const lambda = await getConfiguration(config.functionName(serviceName), region); + const vars = lambda.Environment?.Variables ?? {}; + const ddEnv: Record = {}; + for (const key of Object.keys(vars).sort()) { + if (key.startsWith('DD_')) { + ddEnv[key] = vars[key]; + } + } + + return { + handler: lambda.Handler, + layerArns: (lambda.Layers ?? []).map((l) => l.Arn).sort(), + ddEnv, + }; +}; + +export const verifyInstrumented = async ( + config: LambdaVerifierConfig, + serviceName: string, + region: string, +): Promise => { + const fnName = config.functionName(serviceName); + // eslint-disable-next-line no-console + console.log(`Verifying instrumented state of "${fnName}"...`); + const lambda = await getConfiguration(fnName, region); + const vars = lambda.Environment?.Variables ?? {}; + const layerArns = (lambda.Layers ?? []).map((l) => l.Arn); + const expected = config.expectedLayerArns(region); + + // Handler is redirected to the Datadog wrapper; original preserved (no double-wrap). + assert.equal(lambda.Handler, config.redirectHandler, 'handler not redirected to the Datadog wrapper'); + if (config.originalHandler !== undefined) { + assert.equal( + vars.DD_LAMBDA_HANDLER, + config.originalHandler, + 'DD_LAMBDA_HANDLER should hold the original handler (a different value means a double-wrap)', + ); + } + + // Layers: library layer + extension layer, each present exactly once, pinned. + assert.ok(layerArns.includes(expected.node), `missing pinned Node layer ${expected.node}; got ${JSON.stringify(layerArns)}`); + assert.ok( + layerArns.includes(expected.extension), + `missing pinned extension layer ${expected.extension}; got ${JSON.stringify(layerArns)}`, + ); + assert.equal(layerArns.filter((a) => a === expected.node).length, 1, 'Node layer attached more than once'); + assert.equal(layerArns.filter((a) => a === expected.extension).length, 1, 'extension layer attached more than once'); + + // Env: API key wiring + required DD_* vars + identity. + assert.ok(config.env.apiKeyVars.some((k) => vars[k]), `no API key wiring env var set (one of ${config.env.apiKeyVars.join(', ')})`); + for (const key of config.env.present) { + assert.ok(vars[key], `missing required env var ${key}`); + } + for (const [key, want] of Object.entries(config.env.values(serviceName))) { + assert.equal(vars[key], want, `env var ${key} = ${vars[key]}, want ${want}`); + } + + // Tags: tool marker (proof the tool tagged the function) + freshness tag (set + // atomically at creation for the sweeper). Service/env/version identity is carried by + // the DD_* env vars above and on ingested telemetry. + const tags = await getTags(lambda.FunctionArn, region); + assert.match(tags[config.toolTag.key] ?? '', config.toolTag.pattern, `${config.toolTag.key} tag has unexpected shape`); + assert.ok(tags[FRESHNESS_TAG_KEY], `missing freshness tag ${FRESHNESS_TAG_KEY}`); + + // eslint-disable-next-line no-console + console.log('All instrumented checks passed.'); +}; + +// After remove the whole stack is torn down -- the function itself is gone, which is the +// clean end-state for this mechanism (no per-resource un-instrument). Assert absence. +export const verifyUninstrumented = async ( + config: LambdaVerifierConfig, + serviceName: string, + region: string, +): Promise => { + const fnName = config.functionName(serviceName); + // eslint-disable-next-line no-console + console.log(`Verifying clean (removed) state of "${fnName}"...`); + const result = await execPromise( + `aws lambda get-function-configuration --function-name "${fnName}" --region "${region}" --output json`, + ); + assert.notEqual(result.exitCode, 0, 'function still exists after remove'); + assert.match( + `${result.stdout} ${result.stderr}`, + /ResourceNotFoundException|Function not found/, + `expected ResourceNotFoundException, got: ${result.stderr || result.stdout}`, + ); + + // eslint-disable-next-line no-console + console.log('Clean-state check passed (function and its DD config are gone).'); +}; diff --git a/e2e/helpers/naming.ts b/e2e/helpers/naming.ts new file mode 100644 index 00000000..ea35fa27 --- /dev/null +++ b/e2e/helpers/naming.ts @@ -0,0 +1,35 @@ +// Code generated by serverless-ci e2e-shared sync. DO NOT EDIT. +// Source of truth lives in serverless-ci/e2e/shared -- edit there; local changes are overwritten. + +/* + * Unless explicitly stated otherwise all files in this repository are licensed + * under the Apache License Version 2.0. + * + * This product includes software developed at Datadog (https://www.datadoghq.com/). + * Copyright 2026 Datadog, Inc. + */ + +import crypto from 'node:crypto'; + +// Resource-hygiene convention shared across the e2e suites (see spec "Resource +// Hygiene"). The name prefix is the identity + blast-radius guard the sweeper keys on; +// the freshness tag lets it age resources out safely. The tool + platform vary per +// repo, so they are supplied through a config object rather than hardcoded. + +export interface E2ENaming { + tool: string; + platform: string; +} + +// `one` = team marker (`dd-` implied). Prefix is set atomically at creation. +export const namePrefix = (config: E2ENaming, runId: string): string => + `one-e2e-${config.tool}-${config.platform}-${runId}`; + +export const newRunId = (): string => crypto.randomBytes(4).toString('hex'); + +// Freshness tag value. Native creation time isn't usable cross-cloud, so we stamp it +// ourselves at create time. +export const FRESHNESS_TAG_KEY = 'one_e2e_created'; +// Run-id marker. Standardized across every repo so telemetry and resources cross-reference. +export const RUN_ID_TAG_KEY = 'one_e2e_run_id'; +export const freshnessTimestamp = (): string => `${Math.floor(Date.now() / 1000)}`; diff --git a/e2e/lambda.test.ts b/e2e/lambda.test.ts new file mode 100644 index 00000000..72a4d8ab --- /dev/null +++ b/e2e/lambda.test.ts @@ -0,0 +1,144 @@ +import assert from 'node:assert/strict'; +import os from 'node:os'; +import path from 'node:path'; +import {fileURLToPath} from 'node:url'; + +import {afterAll, beforeAll, describe, it} from 'vitest'; + +import {ENV_NAME, ENV_VERSION, functionName, NAMING, RETRY_PATTERNS, VERIFIER} from './helpers/e2e.config'; +import {execPromise, execPromiseWithRetries} from './helpers/exec'; +import {checkTelemetryFlowing} from './helpers/lambda-telemetry-checker'; +import {functionSnapshot, verifyInstrumented, verifyUninstrumented, type FunctionSnapshot} from './helpers/lambda-verifier'; +import {freshnessTimestamp, namePrefix, newRunId} from './helpers/naming'; + +// Full lifecycle for the serverless-plugin-datadog AWS Lambda instrumentation: +// +// sls deploy (APPLY: provision + instrument) -> verify CONFIG +// -> invoke (trigger) -> verify TELEMETRY flows +// -> sls deploy again -> assert IDEMPOTENT (no diff) +// -> sls remove (REMOVE) -> verify CLEAN end-state +// -> teardown (afterAll, always) +// +// For this tool the plugin runs as part of `sls deploy`, so provisioning the +// uninstrumented workload and APPLY coincide -- there is no separately-deployed +// uninstrumented state to instrument later. REMOVE tears down the whole stack, +// so the clean end-state is the function (and all its DD config) being gone. + +const e2eDir = path.dirname(fileURLToPath(import.meta.url)); +const fixtureDir = path.join(e2eDir, 'fixtures', 'lambda-node'); + +const DEPLOY_TIMEOUT_MS = 900_000; +const TELEMETRY_TIMEOUT_MS = 600_000; + +const describeOrSkip = process.env.SKIP_LAMBDA_TESTS === 'true' ? describe.skip : describe; + +describeOrSkip('serverless-plugin-datadog lambda e2e', () => { + const region = process.env.AWS_REGION ?? 'us-east-1'; + const runId = newRunId(); + const serviceName = namePrefix(NAMING, runId); + const apiKey = process.env.DATADOG_API_KEY ?? process.env.DD_API_KEY; + const appKey = process.env.DATADOG_APP_KEY ?? process.env.DD_APP_KEY; + const site = process.env.DATADOG_SITE ?? process.env.DD_SITE ?? 'datadoghq.com'; + + // Injected into `sls deploy` so serverless.yml stays static. Carries the unique + // name + freshness stamp (set atomically at creation) and the DD wiring inputs. + const deployEnv: Record = { + E2E_SERVICE_NAME: serviceName, + E2E_CREATED_TS: freshnessTimestamp(), + AWS_REGION: region, + DD_API_KEY: apiKey, + DD_SITE: site, + }; + const slsOptions = {env: deployEnv, cwd: fixtureDir}; + + const deploy = () => + execPromiseWithRetries('npx --no-install serverless deploy --stage e2e --conceal', { + ...slsOptions, + retryPatterns: RETRY_PATTERNS, + maxAttempts: 2, + delaySeconds: 20, + }); + + let firstSnapshot: FunctionSnapshot; + + beforeAll(() => { + assert.ok(apiKey, 'DATADOG_API_KEY (or DD_API_KEY) must be set: used to wire the extension and authenticate the API client'); + assert.ok(appKey, 'DATADOG_APP_KEY (or DD_APP_KEY) must be set: used to poll spans/logs from the Datadog API'); + // eslint-disable-next-line no-console + console.log(`Run id ${runId} -> service "${serviceName}" in ${region} (site ${site})`); + }); + + afterAll(async () => { + // Teardown always runs, even if a test above failed mid-lifecycle. + const result = await execPromise('npx --no-install serverless remove --stage e2e', slsOptions); + if (result.exitCode !== 0) { + // eslint-disable-next-line no-console + console.warn(`Teardown remove returned ${result.exitCode} (ok if already removed): ${result.stderr}`); + } + }); + + it( + 'deploys and instruments the function', + async () => { + const result = await deploy(); + assert.equal(result.exitCode, 0, `sls deploy failed: ${result.stderr || result.stdout}`); + + await verifyInstrumented(VERIFIER, serviceName, region); + firstSnapshot = await functionSnapshot(VERIFIER, serviceName, region); + }, + DEPLOY_TIMEOUT_MS, + ); + + it( + 'flows traces and logs after invocation', + async () => { + const outFile = path.join(os.tmpdir(), `${serviceName}-invoke.json`); + // A few invocations to give the extension something to flush promptly. + for (let i = 0; i < 3; i++) { + const result = await execPromiseWithRetries( + `aws lambda invoke --function-name "${functionName(serviceName)}" --region "${region}"` + + ` --payload '{}' --cli-binary-format raw-in-base64-out --output json "${outFile}"`, + {retryPatterns: RETRY_PATTERNS}, + ); + assert.equal(result.exitCode, 0, `lambda invoke failed: ${result.stderr}`); + const meta = JSON.parse(result.stdout) as {StatusCode?: number; FunctionError?: string}; + assert.equal(meta.StatusCode, 200, `unexpected invoke status: ${result.stdout}`); + assert.ok(!meta.FunctionError, `invocation errored: ${meta.FunctionError}`); + } + + await checkTelemetryFlowing({serviceName, env: ENV_NAME, version: ENV_VERSION, runId}); + }, + TELEMETRY_TIMEOUT_MS, + ); + + it( + 're-applies idempotently (no diff, no duplicate)', + async () => { + const result = await deploy(); + assert.equal(result.exitCode, 0, `re-deploy failed: ${result.stderr || result.stdout}`); + + // Still instrumented, still no double-wrap / duplicate layers... + await verifyInstrumented(VERIFIER, serviceName, region); + // ...and byte-for-byte the same instrumentation as the first apply. + const secondSnapshot = await functionSnapshot(VERIFIER, serviceName, region); + assert.deepEqual(secondSnapshot, firstSnapshot, 're-apply changed the function config'); + }, + DEPLOY_TIMEOUT_MS, + ); + + it( + 'removes cleanly with no residue', + async () => { + const result = await execPromiseWithRetries('npx --no-install serverless remove --stage e2e', { + ...slsOptions, + retryPatterns: RETRY_PATTERNS, + maxAttempts: 2, + delaySeconds: 20, + }); + assert.equal(result.exitCode, 0, `sls remove failed: ${result.stderr || result.stdout}`); + + await verifyUninstrumented(VERIFIER, serviceName, region); + }, + DEPLOY_TIMEOUT_MS, + ); +}); diff --git a/e2e/load-env.ts b/e2e/load-env.ts new file mode 100644 index 00000000..14185af5 --- /dev/null +++ b/e2e/load-env.ts @@ -0,0 +1,28 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import {fileURLToPath} from 'node:url'; + +// Local convenience: load e2e/.env.local (gitignored) into process.env without a +// dependency. Real environment variables always win, so this is a no-op in CI. +const envPath = path.join(path.dirname(fileURLToPath(import.meta.url)), '.env.local'); + +if (fs.existsSync(envPath)) { + for (const line of fs.readFileSync(envPath, 'utf-8').split('\n')) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) { + continue; + } + const idx = trimmed.indexOf('='); + if (idx === -1) { + continue; + } + const key = trimmed.slice(0, idx).trim(); + let value = trimmed.slice(idx + 1).trim(); + if ((value.startsWith('"') && value.endsWith('"')) || (value.startsWith("'") && value.endsWith("'"))) { + value = value.slice(1, -1); + } + if (!(key in process.env)) { + process.env[key] = value; + } + } +} diff --git a/e2e/package.json b/e2e/package.json new file mode 100644 index 00000000..68a89623 --- /dev/null +++ b/e2e/package.json @@ -0,0 +1,19 @@ +{ + "name": "serverless-plugin-datadog-e2e", + "version": "0.0.0", + "private": true, + "type": "module", + "description": "End-to-end suite for the serverless-plugin-datadog AWS Lambda instrumentation.", + "scripts": { + "setup": "bash setup.sh", + "pretest": "npm run setup", + "test": "vitest run", + "typecheck": "tsc --noEmit" + }, + "devDependencies": { + "@datadog/datadog-api-client": "^1.40.0", + "@types/node": "^20.19.0", + "typescript": "^5.9.3", + "vitest": "^2.1.9" + } +} diff --git a/e2e/setup.sh b/e2e/setup.sh new file mode 100644 index 00000000..bd8191ca --- /dev/null +++ b/e2e/setup.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Builds the plugin, packs it into a tarball, and installs it into the workload +# fixture alongside the Serverless Framework. +# +# Why a tarball and not `file:../../..`: npm's file: protocol whole-dir-links the +# target, and the repo root contains this fixture -- which would link back to the +# repo, recursing forever. A packed tarball respects .npmignore (dist only) and +# extracts cleanly, the same approach the datadog-ci e2e suite uses for artifacts. +set -euo pipefail + +cd "$(dirname "$0")" +E2E_DIR="$PWD" + +echo "==> Building plugin" +(cd .. && COREPACK_ENABLE_DOWNLOAD_PROMPT=0 yarn build) + +echo "==> Packing plugin" +rm -f "$E2E_DIR"/serverless-plugin-datadog-*.tgz +TARBALL_NAME=$(cd .. && npm pack --silent --pack-destination "$E2E_DIR") +TARBALL="$E2E_DIR/$TARBALL_NAME" +echo " packed $TARBALL_NAME" + +echo "==> Installing workload fixture" +cd fixtures/lambda-node +npm install --no-audit --no-fund +# --no-save so the committed fixture package.json stays free of a local tarball path. +npm install --no-audit --no-fund --no-save "$TARBALL" + +echo "==> Setup complete" diff --git a/e2e/tsconfig.json b/e2e/tsconfig.json new file mode 100644 index 00000000..f7650ff9 --- /dev/null +++ b/e2e/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "Bundler", + "lib": ["ES2022"], + "resolveJsonModule": true, + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "noEmit": true, + "types": ["node"] + }, + "include": ["**/*.ts"], + "exclude": ["node_modules", "fixtures"] +} diff --git a/e2e/vitest.config.ts b/e2e/vitest.config.ts new file mode 100644 index 00000000..c6e7a56e --- /dev/null +++ b/e2e/vitest.config.ts @@ -0,0 +1,15 @@ +import {defineConfig} from 'vitest/config'; + +export default defineConfig({ + test: { + include: ['*.test.ts'], + // Cloud round-trips are slow; defaults are overridden per test/hook too. + testTimeout: 600_000, + hookTimeout: 900_000, + // The lifecycle tests share deployed state and must run in declared order. + fileParallelism: false, + pool: 'forks', + // Loads e2e/.env.local for local runs (no-op in CI, real env wins). + setupFiles: ['./load-env.ts'], + }, +});