diff --git a/.github/workflows/publish-node.yml b/.github/workflows/publish-node.yml new file mode 100644 index 0000000..32f9003 --- /dev/null +++ b/.github/workflows/publish-node.yml @@ -0,0 +1,80 @@ +name: Publish Node + +on: + workflow_dispatch: + +jobs: + build: + strategy: + matrix: + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + binary: schema_analysis-linux-x64 + - os: ubuntu-latest + target: aarch64-unknown-linux-gnu + binary: schema_analysis-linux-arm64 + - os: macos-latest + target: x86_64-apple-darwin + binary: schema_analysis-darwin-x64 + - os: macos-latest + target: aarch64-apple-darwin + binary: schema_analysis-darwin-arm64 + - os: windows-latest + target: x86_64-pc-windows-msvc + binary: schema_analysis-win32-x64.exe + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v6 + + - uses: dtolnay/rust-toolchain@stable + with: + targets: ${{ matrix.target }} + + - name: Install cross-compilation tools + if: matrix.target == 'aarch64-unknown-linux-gnu' + run: | + sudo apt-get update + sudo apt-get install -y gcc-aarch64-linux-gnu + echo "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV" + + - name: Build + run: cargo build --release --features cli --target ${{ matrix.target }} + working-directory: schema_analysis + + - name: Rename binary (unix) + if: runner.os != 'Windows' + run: cp target/${{ matrix.target }}/release/schema_analysis packages/node/binaries/${{ matrix.binary }} + + - name: Rename binary (windows) + if: runner.os == 'Windows' + run: cp target/${{ matrix.target }}/release/schema_analysis.exe packages/node/binaries/${{ matrix.binary }} + + - uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.binary }} + path: packages/node/binaries/${{ matrix.binary }} + + publish: + needs: build + runs-on: ubuntu-latest + environment: npm + permissions: + id-token: write + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-node@v4 + with: + node-version: 22 + registry-url: https://registry.npmjs.org + + - uses: actions/download-artifact@v4 + with: + path: packages/node/binaries + merge-multiple: true + + - run: chmod +x packages/node/binaries/* + + - run: npx npm@latest publish --provenance + working-directory: packages/node diff --git a/Cargo.lock b/Cargo.lock index a5d1372..1bb18c4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -289,7 +289,7 @@ dependencies = [ "base64 0.13.1", "bitvec", "hex", - "indexmap 2.13.0", + "indexmap 2.14.0", "js-sys", "once_cell", "rand 0.8.5", @@ -710,39 +710,12 @@ dependencies = [ "syn", ] -[[package]] -name = "downcast-rs" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" - [[package]] name = "drain_filter_polyfill" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "669a445ee724c5c69b1b06fe0b63e70a1c84bc9bb7d9696cd4f4e3ec45050408" -[[package]] -name = "dyn-clonable" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a36efbb9bfd58e1723780aa04b61aba95ace6a05d9ffabfdb0b43672552f0805" -dependencies = [ - "dyn-clonable-impl", - "dyn-clone", -] - -[[package]] -name = "dyn-clonable-impl" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8671d54058979a37a26f3511fbf8d198ba1aa35ffb202c42587d918d77213a" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "dyn-clone" version = "1.0.17" @@ -1103,7 +1076,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.13.0", + "indexmap 2.14.0", "slab", "tokio", "tokio-util", @@ -1139,9 +1112,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.16.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" [[package]] name = "heck" @@ -1514,12 +1487,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.13.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.16.1", + "hashbrown 0.17.0", "serde", "serde_core", ] @@ -1703,7 +1676,7 @@ checksum = "9d2a0f220c8a5ef3c51199dfb9cdd702bc0eb80d52fbe70c7890adfaaae8a4b1" dependencies = [ "anyhow", "camino", - "indexmap 2.13.0", + "indexmap 2.14.0", "or_poisoned", "proc-macro2", "quote", @@ -2028,9 +2001,9 @@ checksum = "8c04f5d74368e4d0dfe06c45c8627c81bd7c317d52762d118fb9b3076f6420fd" [[package]] name = "ordered-float" -version = "3.9.2" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" +checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" dependencies = [ "num-traits", "rand 0.8.5", @@ -2039,12 +2012,13 @@ dependencies = [ [[package]] name = "ordermap" -version = "0.5.6" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e98f974336ceffd5b1b1f4fcbb89a23c8dcd334adc4b8612f11b7fa99944535" +checksum = "7f7476a5b122ff1fce7208e7ee9dccd0a516e835f5b8b19b8f3c98a34cf757c1" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", + "serde_core", ] [[package]] @@ -2463,7 +2437,7 @@ dependencies = [ "futures", "guardian", "hydration_context", - "indexmap 2.13.0", + "indexmap 2.14.0", "or_poisoned", "paste", "pin-project-lite", @@ -2483,7 +2457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e114642d342893571ff40b4e1da8ccdea907be44c649041eb7d8413b3fd95e8" dependencies = [ "guardian", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools", "or_poisoned", "paste", @@ -2805,14 +2779,12 @@ dependencies = [ [[package]] name = "schema_analysis" -version = "0.6.0" +version = "0.7.0" dependencies = [ "anyhow", "assert_cmd", "bson", "clap", - "downcast-rs", - "dyn-clonable", "json_typegen_shared", "linked-hash-map", "maplit", @@ -2822,12 +2794,12 @@ dependencies = [ "predicates", "quick-xml 0.26.0", "regex", - "schemars 0.8.21", + "schemars 1.2.1", "serde", "serde_cbor", "serde_json", "serde_yaml", - "toml 0.5.11", + "toml 1.1.1+spec-1.1.0", "version-sync", ] @@ -2853,7 +2825,7 @@ dependencies = [ "serde_json", "serde_yaml", "server_fn", - "toml 0.8.20", + "toml 1.1.1+spec-1.1.0", "utile", "uuid", "wasm-bindgen", @@ -2862,18 +2834,6 @@ dependencies = [ "web_worker", ] -[[package]] -name = "schemars" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09c024468a378b7e36765cd36702b7a90cc3cba11654f6685c8f233408e89e92" -dependencies = [ - "dyn-clone", - "schemars_derive", - "serde", - "serde_json", -] - [[package]] name = "schemars" version = "0.9.0" @@ -2894,15 +2854,16 @@ checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" dependencies = [ "dyn-clone", "ref-cast", + "schemars_derive", "serde", "serde_json", ] [[package]] name = "schemars_derive" -version = "0.8.21" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1eee588578aff73f856ab961cd2f79e36bc45d7ded33a7562adba4667aecc0e" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" dependencies = [ "proc-macro2", "quote", @@ -3031,7 +2992,7 @@ version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "itoa", "memchr", "serde", @@ -3101,7 +3062,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.13.0", + "indexmap 2.14.0", "schemars 0.9.0", "schemars 1.2.1", "serde_core", @@ -3128,7 +3089,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "itoa", "ryu", "serde", @@ -3350,7 +3311,7 @@ dependencies = [ "erased", "futures", "html-escape", - "indexmap 2.13.0", + "indexmap 2.14.0", "itertools", "js-sys", "next_tuple", @@ -3559,15 +3520,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml" version = "0.7.8" @@ -3577,19 +3529,7 @@ dependencies = [ "serde", "serde_spanned 0.6.8", "toml_datetime 0.6.8", - "toml_edit 0.19.15", -] - -[[package]] -name = "toml" -version = "0.8.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" -dependencies = [ - "serde", - "serde_spanned 0.6.8", - "toml_datetime 0.6.8", - "toml_edit 0.22.24", + "toml_edit", ] [[package]] @@ -3598,10 +3538,12 @@ version = "1.1.1+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "994b95d9e7bae62b34bab0e2a4510b801fa466066a6a8b2b57361fa1eba068ee" dependencies = [ + "indexmap 2.14.0", "serde_core", "serde_spanned 1.1.1", "toml_datetime 1.1.1+spec-1.1.0", "toml_parser", + "toml_writer", "winnow 1.0.1", ] @@ -3629,26 +3571,13 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.13.0", + "indexmap 2.14.0", "serde", "serde_spanned 0.6.8", "toml_datetime 0.6.8", "winnow 0.5.40", ] -[[package]] -name = "toml_edit" -version = "0.22.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" -dependencies = [ - "indexmap 2.13.0", - "serde", - "serde_spanned 0.6.8", - "toml_datetime 0.6.8", - "winnow 0.7.15", -] - [[package]] name = "toml_parser" version = "1.1.1+spec-1.1.0" @@ -3658,6 +3587,12 @@ dependencies = [ "winnow 1.0.1", ] +[[package]] +name = "toml_writer" +version = "1.1.1+spec-1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db" + [[package]] name = "tower" version = "0.5.3" @@ -4069,7 +4004,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" dependencies = [ "anyhow", - "indexmap 2.13.0", + "indexmap 2.14.0", "wasm-encoder", "wasmparser", ] @@ -4117,7 +4052,7 @@ checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ "bitflags", "hashbrown 0.15.2", - "indexmap 2.13.0", + "indexmap 2.14.0", "semver", ] @@ -4350,15 +4285,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "winnow" -version = "0.7.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" -dependencies = [ - "memchr", -] - [[package]] name = "winnow" version = "1.0.1" @@ -4396,7 +4322,7 @@ checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" dependencies = [ "anyhow", "heck", - "indexmap 2.13.0", + "indexmap 2.14.0", "prettyplease", "syn", "wasm-metadata", @@ -4427,7 +4353,7 @@ checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", "bitflags", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", "serde", "serde_derive", @@ -4446,7 +4372,7 @@ checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" dependencies = [ "anyhow", "id-arena", - "indexmap 2.13.0", + "indexmap 2.14.0", "log", "semver", "serde", diff --git a/README.md b/README.md index 8a732c1..adc07bd 100644 --- a/README.md +++ b/README.md @@ -28,11 +28,15 @@ our gymnast friend, serde. ```bash # Run without installing +npx schema_analysis data.json +# or uvx schema_analysis data.json # or pipx run schema_analysis data.json # Install +npm install -g schema_analysis +# or pip install schema_analysis # or uv tool install schema_analysis diff --git a/packages/node/README.md b/packages/node/README.md new file mode 100644 index 0000000..8d2de9f --- /dev/null +++ b/packages/node/README.md @@ -0,0 +1,112 @@ +# schema_analysis + +## Universal-ish Schema Analysis + +Ever wished you could figure out what was in that json file? Or maybe it was xml... Ehr, yaml? +It was definitely toml. + +Alas, many great tools will only work with one of those formats, and the internet is not so +nice a place as to finally understand that no, xml is not an acceptable data format. + +Enter this neat little tool, a single interface to any self-describing format supported by +our gymnast friend, serde. + +### Features + +- Works with any self-describing format with a Serde implementation. +- Suitable for large files. +- Keeps track of some useful info for each type (opt out with --minimal). +- Keeps track of null/missing/duplicate values separately. +- Integrates with [Schemars](https://github.com/GREsau/schemars) and + [json_typegen](https://github.com/evestera/json_typegen) to produce types and a json schema if needed. +- There's a demo website [here](https://schema-analysis.com/). + +### Installation + +```bash +# Run without installing +npx schema_analysis data.json +# or +uvx schema_analysis data.json +# or +pipx run schema_analysis data.json + +# Install +npm install -g schema_analysis +# or +pip install schema_analysis +# or +uv tool install schema_analysis +# or +cargo install schema_analysis --features cli --locked +``` + +### CLI Usage + +`schema_analysis` can infer schemas and generate types from data directly from the command line. + +``` +schema_analysis [OPTIONS] [FILES]... +``` + +It auto-detects the input format from file extensions (`.json`, `.yaml`/`.yml`, `.xml`, `.toml`, `.cbor`, `.bson`) +and reads from stdin if no files are provided. + +**Options:** + +| Option | Description | Default | +| --- | --- | --- | +| `--format ` | Override input format (`json`, `yaml`, `xml`, `toml`, `cbor`, `bson`) | auto-detected | +| `--output ` | Output mode (`schema`, `rust`, `typescript`, `typescript-alias`, `kotlin`, `kotlin-kotlinx`, `json-schema`, `shape`) | `schema` | +| `--name ` | Root type name for code generation | `Root` | +| `--compact` | Compact JSON output (no pretty printing) | | +| `--minimal` | Skip analysis info (counts, samples, min/max, etc.), outputting only the schema structure | | + +**Examples:** + +```bash +# Infer a schema from a JSON file +schema_analysis data.json + +# Generate Rust types +schema_analysis data.json --output rust --name MyData + +# Generate TypeScript interfaces +schema_analysis api.json --output typescript --name ApiResponse + +# Generate JSON Schema +schema_analysis data.json --output json-schema + +# Merge multiple files into a single schema +schema_analysis file1.json file2.json file3.json + +# Read from stdin +cat data.json | schema_analysis --format json +``` + +### Library Usage + +For use as a library, see the [Rust crate](https://crates.io/crates/schema_analysis/) or the [repo](https://github.com/QuartzLibrary/schema_analysis). + +### Performance + +> These are not proper benchmarks, but should give a vague idea of the performance on a i7-7700HQ laptop (2017) laptop with the raw data already loaded into memory. + +| Size | wasm (MB/s) | native (MB/s) | Format | File # | +| --------------------- | ------------ | ------------- | ------ | ------ | +| [~180MB] | ~20s (9) | ~5s (36) | json | 1 | +| [~650MB] | ~150s (4.3) | ~50s (13) | json | 1 | +| [~1.7GB] | ~470s (3.6) | ~145s (11.7) | json | 1 | +| [~2.1GB] | a | ~182s (11.5) | json | 1 | +| [~13.3GB]b | | ~810s (16.4) | xml | ~200k | + +a This one seems to go over some kind of browser limit when fetching the data in the Web Worker, I believe I would have to split large files to handle it. + +b ~2.7GB compressed. This one seems like it would be a worst-case scenario because it includes decompression overhead and the files had a section that was formatted text which resulted in crazy schemas. (The json pretty printed schema was almost 0.5GB!) + + +[~180MB]: https://github.com/zemirco/sf-city-lots-json/blob/master/citylots.json +[~650MB]: https://catalog.data.gov/dataset/forestry-planting-spaces +[~1.7GB]: https://catalog.data.gov/dataset/nys-thruway-origin-and-destination-points-for-all-vehicles-15-minute-intervals-2018-q4 +[~2.1GB]: https://catalog.data.gov/dataset/turnstile-usage-data-2016 +[~13.3GB]: https://ftp.ncbi.nlm.nih.gov/pub/pmc/oa_bulk/ diff --git a/packages/node/bin/cli.js b/packages/node/bin/cli.js new file mode 100644 index 0000000..634491d --- /dev/null +++ b/packages/node/bin/cli.js @@ -0,0 +1,25 @@ +#!/usr/bin/env node + +const { spawnSync } = require("child_process"); +const path = require("path"); + +const PLATFORMS = { + "linux-x64": "schema_analysis-linux-x64", + "linux-arm64": "schema_analysis-linux-arm64", + "darwin-x64": "schema_analysis-darwin-x64", + "darwin-arm64": "schema_analysis-darwin-arm64", + "win32-x64": "schema_analysis-win32-x64.exe", +}; + +const key = `${process.platform}-${process.arch}`; +const bin = PLATFORMS[key]; +if (!bin) { + console.error( + `schema_analysis: unsupported platform ${process.platform}-${process.arch}` + ); + process.exit(1); +} + +const binPath = path.join(__dirname, "..", "binaries", bin); +const result = spawnSync(binPath, process.argv.slice(2), { stdio: "inherit" }); +process.exit(result.status ?? 1); diff --git a/packages/node/package.json b/packages/node/package.json new file mode 100644 index 0000000..4a9c7b4 --- /dev/null +++ b/packages/node/package.json @@ -0,0 +1,26 @@ +{ + "name": "schema_analysis", + "version": "0.7.0", + "description": "Infer schemas from JSON, YAML, XML, TOML, CBOR, and BSON", + "license": "MIT OR Apache-2.0", + "author": "QuartzLibrary", + "repository": { + "type": "git", + "url": "https://github.com/QuartzLibrary/schema_analysis" + }, + "homepage": "https://schema-analysis.com/", + "keywords": [ + "schema", + "analysis", + "json", + "yaml", + "serde" + ], + "bin": { + "schema_analysis": "bin/cli.js" + }, + "files": [ + "bin/", + "binaries/" + ] +} diff --git a/packages/python/README.md b/packages/python/README.md index 1b49ea5..8d2de9f 100644 --- a/packages/python/README.md +++ b/packages/python/README.md @@ -25,11 +25,15 @@ our gymnast friend, serde. ```bash # Run without installing +npx schema_analysis data.json +# or uvx schema_analysis data.json # or pipx run schema_analysis data.json # Install +npm install -g schema_analysis +# or pip install schema_analysis # or uv tool install schema_analysis diff --git a/packages/python/pyproject.toml b/packages/python/pyproject.toml index f70173a..294f4d6 100644 --- a/packages/python/pyproject.toml +++ b/packages/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "schema_analysis" -version = "0.6.0" +version = "0.7.0" description = "Infer schemas from JSON, YAML, XML, TOML, CBOR, and BSON" readme = "README.md" license = { text = "MIT OR Apache-2.0" } diff --git a/schema_analysis/Cargo.toml b/schema_analysis/Cargo.toml index 9e52f28..9a117b5 100644 --- a/schema_analysis/Cargo.toml +++ b/schema_analysis/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "schema_analysis" -version = "0.6.0" +version = "0.7.0" authors = ["QuartzLibrary"] license = "MIT OR Apache-2.0" edition = "2024" @@ -35,22 +35,16 @@ schemars_integration = ["dep:schemars", "dep:serde_json"] # Serde is the heart of this libary, it provides the common interfaces that # allows a single Visitor implementation to infer the shape of any format # with a self-describing schema. -serde = { version = "1.0", features = [ "serde_derive" ] } +serde = { version = "1", features = [ "serde_derive" ] } -regex = "1.5" # Used to detect interesting strings -once_cell = "1.8" # For global constants that require allocation -ordered-float = { version = "3.4", features = [ "serde" ] } # To save sets of floats -ordermap = { version = "0.5", features = [ "serde" ] } # To preserve the order of fields in the schema - -# These are used to allow the users of the library to run -# custom analysis on the nodes. Check src/context/aggregators.rs -# and src/traits.rs for more info. -downcast-rs = "1.2" -dyn-clonable = "0.9" +regex = "1" # Used to detect interesting strings +once_cell = "1" # For global constants that require allocation +ordered-float = { version = "5", features = [ "serde" ] } # To save sets of floats +ordermap = { version = "1", features = [ "serde" ] } # To preserve the order of fields in the schema # Schemars integration allows the generation of json schemas. -schemars = { version = "0.8", optional = true } -serde_json = { version = "1.0", optional = true } +schemars = { version = "1", optional = true } +serde_json = { version = "1", optional = true } # json_typegen integration allows the generation of types in several languages and json schemas. json_typegen_shared = { version = "0.7", optional = true, default-features = false } @@ -62,20 +56,20 @@ anyhow = { version = "1", optional = true } # Format parsers for CLI (optional, behind "cli" feature) serde_yaml = { version = "0.9", optional = true } serde_cbor = { version = "0.11", optional = true } -toml = { version = "0.5", optional = true } -bson = { version = "2.0", optional = true } +toml = { version = "1", optional = true, features = ["preserve_order"] } +bson = { version = "2", optional = true } quick-xml = { version = "0.26", features = ["serialize"], optional = true } [dev-dependencies] -maplit = "1.0" # Ergonomic macros to write maps. +maplit = "1" # Ergonomic macros to write maps. linked-hash-map = "0.5" # Used by json_typegen for structs. # The following are the formats that have been tested. -serde_json = "1.0" +serde_json = "1" serde_yaml = "0.9" serde_cbor = "0.11" -toml = "0.5" -bson = "2.0" +toml = { version = "1", features = ["preserve_order"] } +bson = "2" quick-xml = {version = "0.26", features = ["serialize"]} version-sync = "0.9" # Used to ensure the doc url is up-to-date diff --git a/schema_analysis/src/lib.rs b/schema_analysis/src/lib.rs index d1f2da5..2bb3fd5 100644 --- a/schema_analysis/src/lib.rs +++ b/schema_analysis/src/lib.rs @@ -1,6 +1,6 @@ #![forbid(unsafe_code)] #![warn(missing_docs)] -#![doc(html_root_url = "https://docs.rs/schema_analysis/0.6.0/")] +#![doc(html_root_url = "https://docs.rs/schema_analysis/0.7.0/")] /*! # Universal-ish Schema Analysis diff --git a/schema_analysis/src/main.rs b/schema_analysis/src/main.rs index 70d1a57..afcbada 100644 --- a/schema_analysis/src/main.rs +++ b/schema_analysis/src/main.rs @@ -219,8 +219,8 @@ where } InputFormat::Toml => { let s = read_all_string(reader)?; - let mut de = toml::Deserializer::new(&s); - inferred.deserialize(&mut de)?; + let de = toml::Deserializer::parse(&s)?; + inferred.deserialize(de)?; } InputFormat::Bson => { let doc = bson::Document::from_reader(reader).context("Failed to parse BSON")?; @@ -257,7 +257,7 @@ where } Output::JsonSchema => schema - .to_json_schema_with_schemars_version(&JsonSchemaVersion::Draft2019_09) + .to_json_schema_with_schemars_version(&JsonSchemaVersion::Draft2020_12) .context("Failed to generate JSON Schema"), output => { diff --git a/schema_analysis/src/targets/schemars.rs b/schema_analysis/src/targets/schemars.rs index c1b5fa4..e67647f 100644 --- a/schema_analysis/src/targets/schemars.rs +++ b/schema_analysis/src/targets/schemars.rs @@ -1,7 +1,5 @@ //! Integration with [schemars](https://github.com/GREsau/schemars) -use schemars::schema as schemars_types; - use crate::{Schema, context::Context}; impl Schema { @@ -16,71 +14,68 @@ impl Schema { &self, version: &JsonSchemaVersion, ) -> serde_json::Result { - let settings: schemars::r#gen::SchemaSettings = version.to_schemars_settings(); - let mut generator: schemars::r#gen::SchemaGenerator = settings.into(); + let settings: schemars::generate::SchemaSettings = version.to_schemars_settings(); + let generator: schemars::generate::SchemaGenerator = settings.into(); - let root = self.to_schemars_schema(&mut generator); + let root = self.to_schemars_schema(generator); serde_json::to_string_pretty(&root) } /// Convert using a provided generator (which also holds the settings) to a json schema. pub fn to_schemars_schema( &self, - generator: &mut schemars::r#gen::SchemaGenerator, - ) -> schemars_types::RootSchema { - let inner = helpers::inferred_to_schemars(generator, self); - helpers::wrap_in_root(inner, generator.settings()) + mut generator: schemars::generate::SchemaGenerator, + ) -> schemars::Schema { + let mut schema = helpers::inferred_to_schemars(&mut generator, self); + if let Some(meta_schema) = generator.settings().meta_schema.as_deref() { + schema.insert("$schema".to_owned(), meta_schema.into()); + } + for transform in generator.transforms_mut() { + transform.transform(&mut schema); + } + schema } } /// The currently supported json schema versions. #[derive(Debug, Clone, PartialEq, Eq, Hash, Default)] pub enum JsonSchemaVersion { - /// `schemars::r#gen::SchemaSettings::draft07` + /// [schemars::generate::SchemaSettings::draft07] Draft07, - /// `schemars::r#gen::SchemaSettings::draft2019_09` - #[default] + /// [schemars::generate::SchemaSettings::draft2019_09] Draft2019_09, - /// `schemars::r#gen::SchemaSettings::openapi3` + /// [schemars::generate::SchemaSettings::draft2020_12] + #[default] + Draft2020_12, + /// [schemars::generate::SchemaSettings::openapi3] OpenApi3, } impl JsonSchemaVersion { /// Convert the version to full settings. - pub fn to_schemars_settings(&self) -> schemars::r#gen::SchemaSettings { - use schemars::r#gen::SchemaSettings; + pub fn to_schemars_settings(&self) -> schemars::generate::SchemaSettings { + use schemars::generate::SchemaSettings; match self { JsonSchemaVersion::Draft07 => SchemaSettings::draft07(), JsonSchemaVersion::Draft2019_09 => SchemaSettings::draft2019_09(), + JsonSchemaVersion::Draft2020_12 => SchemaSettings::draft2020_12(), JsonSchemaVersion::OpenApi3 => SchemaSettings::openapi3(), } } } mod helpers { - - use std::collections::BTreeSet; - - use schemars::schema as schemars_types; + use ordermap::{OrderMap, OrderSet}; + use schemars::JsonSchema; + use schemars::json_schema; + use serde_json::Value; use crate::{Field, Schema, context::Context}; - /// Wraps a [Schema](schemars_types::Schema) in a [RootSchema](schemars_types::RootSchema). - pub fn wrap_in_root( - inner: schemars_types::Schema, - settings: &schemars::r#gen::SchemaSettings, - ) -> schemars_types::RootSchema { - schemars_types::RootSchema { - meta_schema: settings.meta_schema.clone(), - definitions: Default::default(), - schema: inner.into_object(), - } - } - - /// Converts an inferred [Schema] to a schemars [Schema](schemars_types::Schema). + /// Converts an inferred [Schema] to a schemars [Schema](schemars::Schema). pub fn inferred_to_schemars( - generator: &mut schemars::r#gen::SchemaGenerator, + generator: &mut schemars::generate::SchemaGenerator, inferred: &Schema, - ) -> schemars_types::Schema { + ) -> schemars::Schema { // Note: we can use the generator even if we don't generate the final root schema // using it because simple values will not be referrenced. // Do not use for complex values. @@ -90,38 +85,30 @@ mod helpers { // Using specific integer/float types causes the schema to remember the // specific representation. - Schema::Integer(_) => schemars_types::SchemaObject { - instance_type: Some(schemars_types::InstanceType::Integer.into()), - ..Default::default() - } - .into(), - Schema::Float(_) => schemars_types::SchemaObject { - instance_type: Some(schemars_types::InstanceType::Number.into()), - ..Default::default() - } - .into(), + Schema::Integer(_) => json_schema!({ + "type": "integer" + }), + + Schema::Float(_) => json_schema!({ + "type": "number" + }), Schema::String(_) => generator.subschema_for::(), Schema::Bytes(_) => generator.subschema_for::>(), - Schema::Sequence { field, .. } => schemars_types::SchemaObject { - instance_type: Some(schemars_types::InstanceType::Array.into()), - array: Some(Box::new(schemars_types::ArrayValidation { - items: Some(internal_field_to_schemars_schema(generator, field).into()), - ..Default::default() - })), - ..Default::default() - } - .into(), + Schema::Sequence { field, .. } => schemars::json_schema!({ + "type": "array", + "items": internal_field_to_schemars_schema(generator, field) + }), Schema::Struct { fields, .. } => { - let required: BTreeSet = fields + let required: OrderSet<_> = fields .iter() // Null values are handled in the Field function. .filter(|(_, v)| !v.status.may_be_missing) .map(|(k, _)| k.clone()) .collect(); - let properties = fields + let properties: OrderMap<_, _> = fields .iter() .map(|(k, field)| { ( @@ -130,102 +117,138 @@ mod helpers { ) }) .collect(); - schemars_types::SchemaObject { - instance_type: Some(schemars_types::InstanceType::Object.into()), - object: Some(Box::new(schemars_types::ObjectValidation { - required, - properties, - ..Default::default() - })), - ..Default::default() + + let mut schema = json_schema!({ "type": "object" }); + if !properties.is_empty() { + schema.insert( + "properties".to_owned(), + serde_json::to_value(properties).unwrap(), + ); } - .into() + if !required.is_empty() { + schema.insert( + "required".to_owned(), + serde_json::to_value(required).unwrap(), + ); + } + schema } Schema::Union { variants } => { - let json_schemas = variants + let json_schemas: Vec<_> = variants .iter() .map(|s| inferred_to_schemars(generator, s)) .collect(); - schemars_types::SchemaObject { - subschemas: Some(Box::new(schemars_types::SubschemaValidation { - any_of: Some(json_schemas), - ..Default::default() - })), - ..Default::default() - } - .into() + + json_schema!({ + "anyOf": json_schemas, + }) } } } - /// Converts a [Field] into a [Schema](schemars_types::Schema). + /// Converts a [Field] into a [Schema](schemars::Schema). fn internal_field_to_schemars_schema( - generator: &mut schemars::r#gen::SchemaGenerator, + generator: &mut schemars::generate::SchemaGenerator, field: &Field, - ) -> schemars_types::Schema { + ) -> schemars::Schema { // Note: we can use the generator even if we don't generate the final root schema // using it because simple values will not be referrenced. // Do not use for complex values. let mut schema = match &field.schema { Some(schema) => inferred_to_schemars(generator, schema), - None => schemars_types::Schema::Bool(true), + None => schemars::Schema::from(true), }; if field.status.may_be_null { - // Taken from: - // https://github.com/GREsau/schemars/blob/master/schemars/src/json_schema_impls/core.rs - if generator.settings().option_add_null_type { - schema = match schema { - schemars_types::Schema::Bool(true) => schemars_types::Schema::Bool(true), - schemars_types::Schema::Bool(false) => generator.subschema_for::<()>(), - schemars_types::Schema::Object(schemars_types::SchemaObject { - instance_type: Some(ref mut instance_type), - .. - }) => { - add_null_type(instance_type); - schema - } - schema => schemars_types::SchemaObject { - // TODO technically the schema already accepts null, so this may be unnecessary - subschemas: Some(Box::new(schemars_types::SubschemaValidation { - any_of: Some(vec![schema, generator.subschema_for::<()>()]), - ..Default::default() - })), - ..Default::default() - } - .into(), - } - } - if generator.settings().option_nullable { - let mut schema_obj = schema.into_object(); - schema_obj - .extensions - .insert("nullable".to_owned(), serde_json::json!(true)); - schema = schemars_types::Schema::Object(schema_obj); - }; + allow_null(generator, &mut schema); } schema } /// Taken from: /// https://github.com/GREsau/schemars/blob/master/schemars/src/json_schema_impls/core.rs - fn add_null_type( - instance_type: &mut schemars_types::SingleOrVec, + /// https://github.com/GREsau/schemars/blob/master/schemars/src/_private/mod.rs + /// Alt hash: e67495be31e784d32f3d3310edb925458b0f2574 + #[expect(clippy::collapsible_if)] // Min diff + fn allow_null( + generator: &mut schemars::generate::SchemaGenerator, + schema: &mut schemars::Schema, ) { - match instance_type { - schemars_types::SingleOrVec::Single(ty) - if **ty != schemars_types::InstanceType::Null => - { - *instance_type = vec![**ty, schemars_types::InstanceType::Null].into() + fn is_null_schema(value: &Value) -> bool { + <&schemars::Schema>::try_from(value).is_ok_and(|s| has_type(s.as_value(), "null")) + } + + match (schema.as_bool(), schema.as_object_mut()) { + (None, Some(obj)) => { + if obj.len() == 1 + && obj + .get("anyOf") + .and_then(Value::as_array) + .is_some_and(|a| a.iter().any(is_null_schema)) + { + return; + } + + if contains_immediate_subschema(obj) { + *schema = json_schema!({ + "anyOf": [ + obj, + <()>::json_schema(generator) + ] + }); + // No need to check `type`/`const`/`enum` because they're trivially not present + return; + } + + if let Some(instance_type) = obj.get_mut("type") { + match instance_type { + Value::Array(array) => { + let null = Value::from("null"); + if !array.contains(&null) { + array.push(null); + } + } + Value::String(string) => { + if string != "null" { + let current_type = core::mem::take(string).into(); + *instance_type = Value::Array(vec![current_type, "null".into()]); + } + } + _ => {} + } + } + + if let Some(c) = obj.remove("const") { + if !c.is_null() { + obj.insert("enum".to_string(), Value::Array(vec![c, Value::Null])); + } + } else if let Some(Value::Array(e)) = obj.get_mut("enum") { + if !e.contains(&Value::Null) { + e.push(Value::Null); + } + } } - schemars_types::SingleOrVec::Vec(ty) - if !ty.contains(&schemars_types::InstanceType::Null) => - { - ty.push(schemars_types::InstanceType::Null) + (Some(true), None) => {} + (Some(false), None) => { + *schema = <()>::json_schema(generator); } - _ => {} - }; + _ => unreachable!(), + } + } + + pub(crate) fn has_type(value: &Value, ty: &str) -> bool { + match value.get("type") { + Some(Value::Array(values)) => values.iter().any(|v| v.as_str() == Some(ty)), + Some(Value::String(s)) => s == ty, + _ => false, + } + } + + fn contains_immediate_subschema(schema_obj: &serde_json::Map) -> bool { + ["if", "allOf", "anyOf", "oneOf", "$ref"] + .into_iter() + .any(|k| schema_obj.contains_key(k)) } } diff --git a/schema_analysis/tests/cli_fixtures/expected/json_json_schema.json b/schema_analysis/tests/cli_fixtures/expected/json_json_schema.json index 8cf76d7..425f3b5 100644 --- a/schema_analysis/tests/cli_fixtures/expected/json_json_schema.json +++ b/schema_analysis/tests/cli_fixtures/expected/json_json_schema.json @@ -1,21 +1,15 @@ { - "$schema": "https://json-schema.org/draft/2019-09/schema", + "$schema": "https://json-schema.org/draft/2020-12/schema", "type": "object", - "required": [ - "active", - "age", - "name", - "scores" - ], "properties": { - "active": { - "type": "boolean" + "name": { + "type": "string" }, "age": { "type": "integer" }, - "name": { - "type": "string" + "active": { + "type": "boolean" }, "scores": { "type": "array", @@ -23,5 +17,11 @@ "type": "integer" } } - } + }, + "required": [ + "name", + "age", + "active", + "scores" + ] } diff --git a/schema_analysis/tests/target_json_schema_schemars.rs b/schema_analysis/tests/target_json_schema_schemars.rs index a07e9f9..17a26e5 100644 --- a/schema_analysis/tests/target_json_schema_schemars.rs +++ b/schema_analysis/tests/target_json_schema_schemars.rs @@ -11,7 +11,7 @@ struct JSchema; test_format!(JSchema); -const SCHEMA_TYPE: &str = "https://json-schema.org/draft/2019-09/schema"; +const SCHEMA_TYPE: &str = "https://json-schema.org/draft/2020-12/schema"; impl FormatTests for JSchema { type Value = Value; @@ -167,7 +167,7 @@ impl FormatTests for JSchema { "type": "array", "items": { "type": "object", - "required": [ "hello", "mixed", "world" ], + "required": [ "hello", "world", "mixed" ], "properties": { "hello": { "type": "integer" }, "mixed": { @@ -204,7 +204,7 @@ impl FormatTests for JSchema { json!({ "$schema": SCHEMA_TYPE, "type": "object", - "required": [ "hello", "sequence", "world" ], + "required": [ "hello", "world", "sequence" ], "properties": { "hello": { "type": "integer" }, "world": { "type": "string" }, @@ -220,7 +220,7 @@ impl FormatTests for JSchema { json!({ "$schema": SCHEMA_TYPE, "type": "object", - "required": [ "hello", "optional", "sequence", "world" ], + "required": [ "hello", "world", "optional", "sequence" ], "properties": { "hello": { "type": "integer" }, "optional": true, diff --git a/web/Cargo.toml b/web/Cargo.toml index 0869dce..5cc2ba5 100644 --- a/web/Cargo.toml +++ b/web/Cargo.toml @@ -71,7 +71,7 @@ web-sys = { version = "0.3", features = [ serde = { version = "1", features = ["derive"] } serde_json = "1" serde_yaml = "0.9" -toml = "0.8" +toml = { version = "1", features = ["preserve_order"] } bson = "2" quick-xml = { version = "0.37", features = ["serialize"] } serde_cbor = "0.11"