From 58a1317d2081c9853b5e5a99533f8329f5b48a8f Mon Sep 17 00:00:00 2001 From: Erik Arvidsson Date: Mon, 1 Jun 2026 15:58:30 +0200 Subject: [PATCH 1/5] feat: Unicode LIKE/upper()/lower() via statically-linked ICU SQLite's built-in LIKE/upper()/lower() only case-fold ASCII, so non-English text compared case-insensitively (e.g. zqlite's ILIKE) behaved incorrectly. Enable SQLite's bundled ICU extension (SQLITE_ENABLE_ICU), which is already present in the amalgamation guarded by the macro and auto-registers a Unicode-aware LIKE/upper()/lower()/REGEXP on every connection. ICU is linked STATICALLY so the prebuilt binaries stay self-contained: zero-cache installs them via prebuild-install onto runtime images (e.g. Alpine) that have no ICU, where a dynamic `NEEDED libicu*.so.` would fail to load (and would couple each binary to one ICU soname). The new deps/icu.js discovers ICU (pkg-config / Homebrew / system paths) and emits the static archive paths plus the C++/system libs they require. CI installs the static ICU packages (libicu-dev on Debian, icu-dev+icu-static on Alpine, icu4c via Homebrew on macOS). Windows is intentionally excluded for now: static ICU there means building it from source (vcpkg), which is impractically slow in CI. Windows keeps SQLite's ASCII-only LIKE until that is addressed; production (zero-cache) runs on Alpine, and macOS/Linux dev builds get full Unicode. Cost: ICU's data table makes each binary ~30MB larger. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/build.yml | 20 +++--- binding.gyp | 13 ++++ deps/icu.js | 126 ++++++++++++++++++++++++++++++++++++ deps/sqlite3.gyp | 11 +++- 4 files changed, 161 insertions(+), 9 deletions(-) create mode 100644 deps/icu.js diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d240501..1d97872 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -46,18 +46,18 @@ jobs: - if: ${{ startsWith(matrix.os, 'windows') }} run: pip.exe install setuptools - if: ${{ startsWith(matrix.os, 'macos') }} - run: brew install python-setuptools + run: brew install python-setuptools icu4c - if: ${{ !startsWith(matrix.os, 'windows') && !startsWith(matrix.os, 'macos') }} run: python3 -m pip install setuptools - if: ${{ startsWith(matrix.os, 'ubuntu') && matrix.node < 25 }} run: | sudo apt update - sudo apt install -y gcc-10 g++-10 libreadline-dev libncurses5-dev + sudo apt install -y gcc-10 g++-10 libreadline-dev libncurses5-dev libicu-dev sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 - if: ${{ startsWith(matrix.os, 'ubuntu') && matrix.node >= 25 }} run: | sudo apt update - sudo apt install -y gcc-11 g++-11 libreadline-dev libncurses5-dev + sudo apt install -y gcc-11 g++-11 libreadline-dev libncurses5-dev libicu-dev sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 100 --slave /usr/bin/g++ g++ /usr/bin/g++-11 --slave /usr/bin/gcov gcov /usr/bin/gcov-11 - run: npm install --ignore-scripts - run: npm run build-debug @@ -93,13 +93,13 @@ jobs: with: bun-version: ${{ matrix.bun }} - if: ${{ startsWith(matrix.os, 'macos') }} - run: brew install python-setuptools + run: brew install python-setuptools icu4c - if: ${{ !startsWith(matrix.os, 'macos') }} run: python3 -m pip install setuptools - if: ${{ startsWith(matrix.os, 'ubuntu') }} run: | sudo apt update - sudo apt install -y gcc-10 g++-10 libreadline-dev libncurses5-dev + sudo apt install -y gcc-10 g++-10 libreadline-dev libncurses5-dev libicu-dev sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10 - run: bun install --ignore-scripts - run: bun run build-debug @@ -163,7 +163,7 @@ jobs: - if: ${{ startsWith(matrix.os, 'windows') }} run: pip.exe install setuptools - if: ${{ startsWith(matrix.os, 'macos') }} - run: brew install python-setuptools + run: brew install python-setuptools icu4c - run: npm install --ignore-scripts - run: ${{ env.NODE_BUILD_CMD_LEGACY }} -u ${{ secrets.GITHUB_TOKEN }} - run: ${{ env.NODE_BUILD_CMD_MODERN }} -u ${{ secrets.GITHUB_TOKEN }} @@ -182,6 +182,7 @@ jobs: needs: test steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - run: apt-get update && apt-get install -y libicu-dev - run: npm install --ignore-scripts - run: ${{ env.NODE_BUILD_CMD_LEGACY }} -u ${{ secrets.GITHUB_TOKEN }} @@ -193,6 +194,7 @@ jobs: needs: test steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - run: apt-get update && apt-get install -y libicu-dev - run: npm install --ignore-scripts - run: ${{ env.NODE_BUILD_CMD_MODERN }} -u ${{ secrets.GITHUB_TOKEN }} @@ -204,7 +206,7 @@ jobs: needs: test steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - - run: apk add build-base git python3 py3-setuptools libstdc++ readline-dev ncurses-dev --update-cache + - run: apk add build-base git python3 py3-setuptools libstdc++ readline-dev ncurses-dev icu-dev icu-static --update-cache - run: npm install --ignore-scripts - run: ${{ env.NODE_BUILD_CMD_LEGACY }} -u ${{ secrets.GITHUB_TOKEN }} - run: ${{ env.NODE_BUILD_CMD_MODERN }} -u ${{ secrets.GITHUB_TOKEN }} @@ -225,7 +227,7 @@ jobs: - uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 - run: | docker run --rm -v $(pwd):/tmp/project --entrypoint /bin/sh --platform linux/${{ matrix.arch }} node:20-alpine -c "\ - apk add build-base git python3 py3-setuptools libstdc++ readline-dev ncurses-dev --update-cache && \ + apk add build-base git python3 py3-setuptools libstdc++ readline-dev ncurses-dev icu-dev icu-static --update-cache && \ cd /tmp/project && \ npm install --ignore-scripts && \ ${{ env.NODE_BUILD_CMD_LEGACY }} -u ${{ secrets.GITHUB_TOKEN }} && \ @@ -247,6 +249,7 @@ jobs: - uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 - run: | docker run --rm -v $(pwd):/tmp/project --entrypoint /bin/sh --platform linux/${{ matrix.arch }} node:20-bullseye -c "\ + apt-get update && apt-get install -y libicu-dev && \ cd /tmp/project && \ npm install --ignore-scripts && \ ${{ env.NODE_BUILD_CMD_LEGACY }} -u ${{ secrets.GITHUB_TOKEN }}" @@ -267,6 +270,7 @@ jobs: - uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3 - run: | docker run --rm -v $(pwd):/tmp/project --entrypoint /bin/sh --platform linux/${{ matrix.arch }} node:20-bookworm -c "\ + apt-get update && apt-get install -y libicu-dev && \ cd /tmp/project && \ npm install --ignore-scripts && \ ${{ env.NODE_BUILD_CMD_MODERN }} -u ${{ secrets.GITHUB_TOKEN }}" diff --git a/binding.gyp b/binding.gyp index d5582af..f1fff11 100644 --- a/binding.gyp +++ b/binding.gyp @@ -24,6 +24,12 @@ }, }, 'conditions': [ + # ICU is statically linked into the SQLite static library on + # non-Windows; the final .node must resolve its ICU symbols. (See + # deps/sqlite3.gyp for why Windows is excluded.) + ['OS != "win"', { + 'libraries': [' would +// fail to load there (and couples the binary to one ICU soname). Static +// linking embeds ICU into the binary instead. +// +// Usage: +// node icu.js include -> the ICU include directory (for #include ) +// node icu.js libs -> newline-separated linker inputs: full paths to the +// ICU static archives, then the C++ runtime / system +// libraries those archives require. +// +// Discovery order: pkg-config (Linux/Alpine) -> Homebrew icu4c (macOS) -> +// common system locations. Set ICU_ROOT to override (expects ICU_ROOT/lib and +// ICU_ROOT/include). +// === + +const {execSync} = require('child_process'); +const fs = require('fs'); +const path = require('path'); + +const isWin = process.platform === 'win32'; +const isMac = process.platform === 'darwin'; + +function run(cmd) { + try { + return execSync(cmd, {stdio: ['ignore', 'pipe', 'ignore']}).toString().trim(); + } catch { + return ''; + } +} + +function firstDir(candidates) { + return candidates.find(p => p && fs.existsSync(p)) || ''; +} + +// Locate the ICU lib and include directories. Returns {libDir, includeDir}. +function locate() { + if (process.env.ICU_ROOT) { + const root = process.env.ICU_ROOT; + return {libDir: path.join(root, 'lib'), includeDir: path.join(root, 'include')}; + } + + // pkg-config (Debian's libicu-dev and Alpine's icu-dev ship icu-i18n.pc). + const pcLibDir = run('pkg-config --variable=libdir icu-i18n'); + const pcIncDir = run('pkg-config --variable=includedir icu-i18n'); + if (pcLibDir && fs.existsSync(pcLibDir)) { + return {libDir: pcLibDir, includeDir: pcIncDir}; + } + + // Homebrew icu4c (macOS, keg-only so not on default search paths). + if (isMac) { + let prefix = run('brew --prefix icu4c'); + if (!prefix || !fs.existsSync(prefix)) { + prefix = firstDir(['/opt/homebrew/opt/icu4c', '/usr/local/opt/icu4c']); + } + if (prefix) { + return {libDir: path.join(prefix, 'lib'), includeDir: path.join(prefix, 'include')}; + } + } + + // Common system locations (Debian multiarch, Alpine, manual installs). + const libDir = firstDir([ + '/usr/lib/x86_64-linux-gnu', + '/usr/lib/aarch64-linux-gnu', + '/usr/lib/arm-linux-gnueabihf', + '/usr/lib', + '/usr/local/lib', + ]); + const includeDir = firstDir(['/usr/include', '/usr/local/include']); + return {libDir, includeDir}; +} + +// ICU static archives in dependency order (i18n -> uc -> data). Windows ICU +// uses different archive names. +const ARCHIVE_NAMES = isWin + ? ['icuin', 'icuuc', 'icudt'] + : ['libicui18n', 'libicuuc', 'libicudata']; + +function libsOutput(loc) { + const ext = isWin ? '.lib' : '.a'; + const out = []; + for (const name of ARCHIVE_NAMES) { + const full = loc.libDir && path.join(loc.libDir, name + ext); + if (full && fs.existsSync(full)) { + out.push(full); + } else { + // Fall back to a normal library reference so local dev without the + // static archives still builds (dynamically). Prebuild CI installs the + // static libs, so this path is not taken for shipped binaries. + process.stderr.write( + `deps/icu.js: static archive ${name}${ext} not found in ${loc.libDir || '(unknown)'}; ` + + `falling back to dynamic linking for ${name}\n`, + ); + out.push(isWin ? name + '.lib' : '-l' + name.replace(/^lib/, '')); + } + } + // C++ runtime + system libraries required by ICU's (C++) static archives. + if (isWin) { + out.push('advapi32.lib'); + } else if (isMac) { + out.push('-lc++'); + } else { + out.push('-lstdc++', '-lm', '-lpthread', '-ldl'); + } + return out; +} + +const mode = process.argv[2]; +const loc = locate(); + +if (mode === 'include') { + process.stdout.write(loc.includeDir || ''); +} else { + process.stdout.write(libsOutput(loc).join('\n')); +} diff --git a/deps/sqlite3.gyp b/deps/sqlite3.gyp index 3e777c4..cad0a81 100755 --- a/deps/sqlite3.gyp +++ b/deps/sqlite3.gyp @@ -57,7 +57,16 @@ # This is currently required by better-sqlite3. 'SQLITE_ENABLE_COLUMN_METADATA', ], - }] + }], + # Unicode-aware LIKE/upper()/lower() via SQLite's bundled ICU extension, + # statically linked so the prebuilt binaries stay self-contained. + # Not enabled on Windows yet: static ICU there means building it from + # source (vcpkg), which is impractically slow in CI. Windows therefore + # keeps SQLite's ASCII-only LIKE for now. + ['OS != "win"', { + 'defines': ['SQLITE_ENABLE_ICU'], + 'include_dirs': [' Date: Mon, 1 Jun 2026 16:18:02 +0200 Subject: [PATCH 2/5] deps/icu.js: drop dead Windows code path ICU is gated `OS != "win"` in the gyp files, so icu.js is never invoked on Windows. Remove the unused Windows branches (.lib archive names, advapi32, the isWin handling) and note the macOS/Linux-only scope. Co-Authored-By: Claude Opus 4.8 (1M context) --- deps/icu.js | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/deps/icu.js b/deps/icu.js index e2e5a2a..e29d6e2 100644 --- a/deps/icu.js +++ b/deps/icu.js @@ -23,13 +23,15 @@ // Discovery order: pkg-config (Linux/Alpine) -> Homebrew icu4c (macOS) -> // common system locations. Set ICU_ROOT to override (expects ICU_ROOT/lib and // ICU_ROOT/include). +// +// ICU is not enabled on Windows (see deps/sqlite3.gyp), so this script only +// ever runs on macOS and Linux. // === const {execSync} = require('child_process'); const fs = require('fs'); const path = require('path'); -const isWin = process.platform === 'win32'; const isMac = process.platform === 'darwin'; function run(cmd) { @@ -81,17 +83,13 @@ function locate() { return {libDir, includeDir}; } -// ICU static archives in dependency order (i18n -> uc -> data). Windows ICU -// uses different archive names. -const ARCHIVE_NAMES = isWin - ? ['icuin', 'icuuc', 'icudt'] - : ['libicui18n', 'libicuuc', 'libicudata']; +// ICU static archives, in dependency order (i18n -> uc -> data). +const ARCHIVE_NAMES = ['libicui18n', 'libicuuc', 'libicudata']; function libsOutput(loc) { - const ext = isWin ? '.lib' : '.a'; const out = []; for (const name of ARCHIVE_NAMES) { - const full = loc.libDir && path.join(loc.libDir, name + ext); + const full = loc.libDir && path.join(loc.libDir, name + '.a'); if (full && fs.existsSync(full)) { out.push(full); } else { @@ -99,16 +97,14 @@ function libsOutput(loc) { // static archives still builds (dynamically). Prebuild CI installs the // static libs, so this path is not taken for shipped binaries. process.stderr.write( - `deps/icu.js: static archive ${name}${ext} not found in ${loc.libDir || '(unknown)'}; ` + + `deps/icu.js: static archive ${name}.a not found in ${loc.libDir || '(unknown)'}; ` + `falling back to dynamic linking for ${name}\n`, ); - out.push(isWin ? name + '.lib' : '-l' + name.replace(/^lib/, '')); + out.push('-l' + name.replace(/^lib/, '')); } } // C++ runtime + system libraries required by ICU's (C++) static archives. - if (isWin) { - out.push('advapi32.lib'); - } else if (isMac) { + if (isMac) { out.push('-lc++'); } else { out.push('-lstdc++', '-lm', '-lpthread', '-ldl'); From 0b728ecd1a90e23c1924b5a6ed4b5ea45de17a89 Mon Sep 17 00:00:00 2001 From: Erik Arvidsson Date: Mon, 1 Jun 2026 16:31:15 +0200 Subject: [PATCH 3/5] icu.js: validate ICU headers, fail loudly instead of silent dynamic linking; add Unicode tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on #31: - locate(): the pkg-config path now requires the ICU headers (/unicode/utypes.h) to actually exist before trusting the .pc, and the system-path discovery picks an include dir that has them. The `include` mode fails with a clear message instead of emitting an empty/bogus path that would later blow up with a confusing missing-header error. - libsOutput(): no longer silently falls back to dynamic linking when a static archive is missing. Prebuilt binaries must stay self-contained (zero-cache runs them on ICU-less images like Alpine), so the build now aborts with an actionable message. The dynamic fallback is opt-in via ICU_ALLOW_DYNAMIC=1 for local development. - Add test/52.icu.js asserting Unicode behavior on non-Windows (lower('Ä')='ä', upper('ß')='SS', 'Ä' LIKE 'ä'=1) and ASCII-only behavior on Windows, guarding against future regressions from SQLite updates or build-flag changes. Co-Authored-By: Claude Opus 4.8 (1M context) --- deps/icu.js | 47 +++++++++++++++++++++++++++++++++++++++-------- test/52.icu.js | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 test/52.icu.js diff --git a/deps/icu.js b/deps/icu.js index e29d6e2..06dd580 100644 --- a/deps/icu.js +++ b/deps/icu.js @@ -46,6 +46,19 @@ function firstDir(candidates) { return candidates.find(p => p && fs.existsSync(p)) || ''; } +// An include dir is only useful if the ICU headers are actually under it +// (/unicode/utypes.h). Validating this lets us reject a misconfigured +// pkg-config .pc and fall through to another discovery method, instead of +// emitting a bogus path that fails later with a confusing missing-header error. +function hasIcuHeaders(dir) { + return !!dir && fs.existsSync(path.join(dir, 'unicode', 'utypes.h')); +} + +function fail(message) { + process.stderr.write(`deps/icu.js: ${message}\n`); + process.exit(1); +} + // Locate the ICU lib and include directories. Returns {libDir, includeDir}. function locate() { if (process.env.ICU_ROOT) { @@ -54,9 +67,10 @@ function locate() { } // pkg-config (Debian's libicu-dev and Alpine's icu-dev ship icu-i18n.pc). + // Require both the lib dir and the actual ICU headers before trusting it. const pcLibDir = run('pkg-config --variable=libdir icu-i18n'); const pcIncDir = run('pkg-config --variable=includedir icu-i18n'); - if (pcLibDir && fs.existsSync(pcLibDir)) { + if (pcLibDir && fs.existsSync(pcLibDir) && hasIcuHeaders(pcIncDir)) { return {libDir: pcLibDir, includeDir: pcIncDir}; } @@ -79,7 +93,7 @@ function locate() { '/usr/lib', '/usr/local/lib', ]); - const includeDir = firstDir(['/usr/include', '/usr/local/include']); + const includeDir = ['/usr/include', '/usr/local/include'].find(hasIcuHeaders) || ''; return {libDir, includeDir}; } @@ -92,15 +106,25 @@ function libsOutput(loc) { const full = loc.libDir && path.join(loc.libDir, name + '.a'); if (full && fs.existsSync(full)) { out.push(full); - } else { - // Fall back to a normal library reference so local dev without the - // static archives still builds (dynamically). Prebuild CI installs the - // static libs, so this path is not taken for shipped binaries. + } else if (process.env.ICU_ALLOW_DYNAMIC === '1') { + // Opt-in dynamic fallback for local dev on machines without the static + // archives. Never used for shipped prebuilds, which must be self-contained. process.stderr.write( `deps/icu.js: static archive ${name}.a not found in ${loc.libDir || '(unknown)'}; ` + - `falling back to dynamic linking for ${name}\n`, + `ICU_ALLOW_DYNAMIC=1 set, falling back to dynamic -l${name.replace(/^lib/, '')}\n`, ); out.push('-l' + name.replace(/^lib/, '')); + } else { + // Refuse to silently produce a dynamically-linked binary: zero-cache ships + // these prebuilds onto runtime images (e.g. Alpine) that have no ICU, where + // a dynamic ICU dependency would only fail at load time. + fail( + `static ICU archive ${name}.a not found in ${loc.libDir || '(unknown library dir)'}.\n` + + ` Prebuilt binaries must statically link ICU to stay self-contained, so the build is\n` + + ` aborting rather than linking ICU dynamically. Install the static ICU libraries\n` + + ` (libicu-dev on Debian, icu-dev + icu-static on Alpine, icu4c via Homebrew on macOS),\n` + + ` or set ICU_ALLOW_DYNAMIC=1 to allow a dynamic fallback for local development.`, + ); } } // C++ runtime + system libraries required by ICU's (C++) static archives. @@ -116,7 +140,14 @@ const mode = process.argv[2]; const loc = locate(); if (mode === 'include') { - process.stdout.write(loc.includeDir || ''); + if (!hasIcuHeaders(loc.includeDir)) { + fail( + `could not find the ICU headers (unicode/utypes.h) in ${loc.includeDir || '(unknown include dir)'}.\n` + + ` Install the ICU development package (libicu-dev on Debian, icu-dev on Alpine,\n` + + ` icu4c via Homebrew on macOS), or set ICU_ROOT to an ICU install prefix.`, + ); + } + process.stdout.write(loc.includeDir); } else { process.stdout.write(libsOutput(loc).join('\n')); } diff --git a/test/52.icu.js b/test/52.icu.js new file mode 100644 index 0000000..cd61b7c --- /dev/null +++ b/test/52.icu.js @@ -0,0 +1,40 @@ +'use strict'; +const os = require('os'); +const Database = require('../.'); + +// ICU is statically linked on macOS and Linux (see deps/icu.js and binding.gyp), +// which makes LIKE/lower()/upper() Unicode-aware. It is intentionally NOT linked +// on Windows (static ICU there is impractical to build in CI), so those builds +// keep SQLite's ASCII-only behavior. +const isWindows = os.platform().startsWith('win'); +const itWindows = isWindows ? it : it.skip; + +describe('ICU Unicode support', function () { + beforeEach(function () { + this.db = new Database(util.next()); + }); + afterEach(function () { + this.db.close(); + }); + + const evalScalar = function (db, expr) { + return db.prepare(`SELECT ${expr} AS v`).pluck().get(); + }; + + util.itUnix('case-folds non-ASCII characters when ICU is enabled', function () { + expect(evalScalar(this.db, "lower('Ä')")).to.equal('ä'); + expect(evalScalar(this.db, "upper('ß')")).to.equal('SS'); + // SQLite's LIKE is provided by ICU here, so it folds case across Unicode. + expect(evalScalar(this.db, "'Ä' LIKE 'ä'")).to.equal(1); + expect(evalScalar(this.db, "'ПРИВЕТ' LIKE 'привет'")).to.equal(1); + // Distinct characters still do not match. + expect(evalScalar(this.db, "'Ä' LIKE 'å'")).to.equal(0); + }); + + itWindows('leaves non-ASCII characters unchanged when ICU is disabled', function () { + expect(evalScalar(this.db, "lower('Ä')")).to.equal('Ä'); + expect(evalScalar(this.db, "'Ä' LIKE 'ä'")).to.equal(0); + // ASCII case-insensitivity still works without ICU. + expect(evalScalar(this.db, "'ABC' LIKE 'abc'")).to.equal(1); + }); +}); From 64664af101c84872429b206c3d52ec6fa27ca47d Mon Sep 17 00:00:00 2001 From: Erik Arvidsson Date: Mon, 1 Jun 2026 16:50:26 +0200 Subject: [PATCH 4/5] icu.js: link ICU dynamically on glibc Linux (non-PIC static archives) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Debian/Ubuntu ship libicu*.a built without -fPIC, so they cannot be linked into a shared object (the .node) — the build failed with "recompile with -fPIC". Static linking only works where the archives are PIC: macOS (Homebrew) and Alpine (musl). So: * macOS + Alpine -> static link (self-contained, as before). * glibc Linux -> dynamic link against the distro .so; those consumers must have ICU installed at runtime. Production (zero-cache on Alpine) keeps a self-contained, statically-linked binary. ICU_ALLOW_DYNAMIC=1 still forces dynamic everywhere for local dev. Co-Authored-By: Claude Opus 4.8 (1M context) --- deps/icu.js | 90 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/deps/icu.js b/deps/icu.js index 06dd580..93502c0 100644 --- a/deps/icu.js +++ b/deps/icu.js @@ -1,24 +1,27 @@ 'use strict'; // === -// ICU discovery helper for node-gyp — STATIC linking. +// ICU discovery helper for node-gyp. // // Defining SQLITE_ENABLE_ICU compiles SQLite's bundled ICU extension (already // present in the amalgamation, guarded by #ifdef SQLITE_ENABLE_ICU) and // auto-registers Unicode-aware LIKE/upper()/lower()/REGEXP on every // connection. That code calls into ICU. // -// We link ICU *statically* so the prebuilt .node binaries stay self-contained: +// We prefer STATIC linking so the prebuilt .node binaries stay self-contained: // zero-cache ships them via prebuild-install onto runtime images (e.g. Alpine) // that do not have ICU installed, and a dynamic NEEDED libicu*.so. would -// fail to load there (and couples the binary to one ICU soname). Static -// linking embeds ICU into the binary instead. +// fail to load there. Static linking is only possible where the ICU archives +// are -fPIC, which holds on macOS (Homebrew) and Alpine (musl). Debian/Ubuntu +// (glibc) ship non-PIC static archives, so there we link ICU dynamically +// against the system .so (the consumer must have ICU installed at runtime). +// See `useStatic` below. // // Usage: // node icu.js include -> the ICU include directory (for #include ) -// node icu.js libs -> newline-separated linker inputs: full paths to the -// ICU static archives, then the C++ runtime / system -// libraries those archives require. +// node icu.js libs -> newline-separated linker inputs (static archive +// paths or -L/-l flags), then the C++ runtime / +// system libraries ICU depends on. // // Discovery order: pkg-config (Linux/Alpine) -> Homebrew icu4c (macOS) -> // common system locations. Set ICU_ROOT to override (expects ICU_ROOT/lib and @@ -33,6 +36,20 @@ const fs = require('fs'); const path = require('path'); const isMac = process.platform === 'darwin'; +const isLinux = process.platform === 'linux'; +const isAlpine = isLinux && fs.existsSync('/etc/alpine-release'); + +// We static-link ICU only where the static archives are position-independent +// (-fPIC) and can therefore be linked into a shared object (the .node): +// * macOS — Homebrew's icu4c archives are PIC. +// * Alpine — musl builds everything PIC, so icu-static is PIC. +// Debian/Ubuntu (glibc) ship NON-PIC static archives (libicu*.a), which fail to +// link into a shared object ("recompile with -fPIC"), so on glibc Linux we link +// ICU dynamically against the distro .so instead — those consumers must have +// ICU installed at runtime. ICU_ALLOW_DYNAMIC=1 forces dynamic everywhere as a +// local-dev escape hatch. +const useStatic = + (isMac || isAlpine) && process.env.ICU_ALLOW_DYNAMIC !== '1'; function run(cmd) { try { @@ -100,34 +117,45 @@ function locate() { // ICU static archives, in dependency order (i18n -> uc -> data). const ARCHIVE_NAMES = ['libicui18n', 'libicuuc', 'libicudata']; -function libsOutput(loc) { - const out = []; - for (const name of ARCHIVE_NAMES) { +// Full paths to the ICU static archives, so the linker pulls them in +// statically and the resulting binary stays self-contained. +function staticLibInputs(loc) { + return ARCHIVE_NAMES.map(name => { const full = loc.libDir && path.join(loc.libDir, name + '.a'); if (full && fs.existsSync(full)) { - out.push(full); - } else if (process.env.ICU_ALLOW_DYNAMIC === '1') { - // Opt-in dynamic fallback for local dev on machines without the static - // archives. Never used for shipped prebuilds, which must be self-contained. - process.stderr.write( - `deps/icu.js: static archive ${name}.a not found in ${loc.libDir || '(unknown)'}; ` + - `ICU_ALLOW_DYNAMIC=1 set, falling back to dynamic -l${name.replace(/^lib/, '')}\n`, - ); - out.push('-l' + name.replace(/^lib/, '')); - } else { - // Refuse to silently produce a dynamically-linked binary: zero-cache ships - // these prebuilds onto runtime images (e.g. Alpine) that have no ICU, where - // a dynamic ICU dependency would only fail at load time. - fail( - `static ICU archive ${name}.a not found in ${loc.libDir || '(unknown library dir)'}.\n` + - ` Prebuilt binaries must statically link ICU to stay self-contained, so the build is\n` + - ` aborting rather than linking ICU dynamically. Install the static ICU libraries\n` + - ` (libicu-dev on Debian, icu-dev + icu-static on Alpine, icu4c via Homebrew on macOS),\n` + - ` or set ICU_ALLOW_DYNAMIC=1 to allow a dynamic fallback for local development.`, - ); + return full; } + // On the static platforms (macOS, Alpine) a missing archive is fatal: we + // must not silently produce a dynamically-linked binary, since zero-cache + // ships these prebuilds onto images (e.g. Alpine) that have no ICU. + fail( + `static ICU archive ${name}.a not found in ${loc.libDir || '(unknown library dir)'}.\n` + + ` This platform links ICU statically to stay self-contained, so the build is aborting\n` + + ` rather than linking ICU dynamically. Install the static ICU libraries (icu-dev +\n` + + ` icu-static on Alpine, icu4c via Homebrew on macOS), or set ICU_ALLOW_DYNAMIC=1 to\n` + + ` allow a dynamic fallback for local development.`, + ); + return null; // unreachable; fail() exits + }); +} + +// Ordinary -l flags, resolved against the system ICU shared libraries. Used on +// glibc Linux (Debian/Ubuntu), whose static archives are not -fPIC and so can't +// be linked into a shared object; the consumer must have ICU at runtime. +function dynamicLibInputs(loc) { + const out = []; + if (loc.libDir) { + out.push('-L' + loc.libDir); + } + for (const name of ARCHIVE_NAMES) { + out.push('-l' + name.replace(/^lib/, '')); } - // C++ runtime + system libraries required by ICU's (C++) static archives. + return out; +} + +function libsOutput(loc) { + const out = useStatic ? staticLibInputs(loc) : dynamicLibInputs(loc); + // C++ runtime + system libraries that ICU depends on. if (isMac) { out.push('-lc++'); } else { From 6f8f2440e66166c5234a9dd4a748d3e50cd7881b Mon Sep 17 00:00:00 2001 From: Erik Arvidsson Date: Mon, 1 Jun 2026 17:40:35 +0200 Subject: [PATCH 5/5] download.sh: note why SQLITE_ENABLE_ICU is not in DEFINES Per review feedback: explain that ICU is defined conditionally (non-Windows) in deps/sqlite3.gyp rather than in this unconditional, all-platform DEFINES list. Co-Authored-By: Claude Opus 4.8 (1M context) --- deps/download.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/deps/download.sh b/deps/download.sh index aa962e9..9db7a6b 100755 --- a/deps/download.sh +++ b/deps/download.sh @@ -20,7 +20,15 @@ CHECKIN="0e862bc9ed7aa9ae" -# Defines below are sorted alphabetically +# Defines below are sorted alphabetically. +# +# Note: SQLITE_ENABLE_ICU is intentionally NOT listed here. These defines are +# applied unconditionally on every platform (they become defines.gypi and are +# passed to every compile), but ICU is only available on non-Windows builds. +# It is therefore defined conditionally (OS != "win") in deps/sqlite3.gyp +# instead. The ICU extension code already ships in the amalgamation guarded by +# #ifdef SQLITE_ENABLE_ICU, so it does not need to be set when generating +# sqlite3.c here. DEFINES=" HAVE_INT16_T=1 HAVE_INT32_T=1