Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
504efb4
Bump submodule
duckdblabs-bot May 12, 2026
ce0fa6d
[duckdb-labs bot] Bump DuckDB submodule (#449)
evertlammerts May 12, 2026
646c569
Remove jemalloc
evertlammerts May 13, 2026
8bfc1da
Bump submodule
duckdblabs-bot May 13, 2026
3b6261a
[duckdb-labs bot] Bump DuckDB submodule (#455)
evertlammerts May 13, 2026
272f75e
Bump submodule
duckdblabs-bot May 15, 2026
5e63f4d
[duckdb-labs bot] Bump DuckDB submodule (#457)
evertlammerts May 15, 2026
4a38278
Bump submodule
duckdblabs-bot May 18, 2026
d7e8ab4
[duckdb-labs bot] Bump DuckDB submodule (#461)
evertlammerts May 18, 2026
fcdac3d
Bump submodule
duckdblabs-bot May 19, 2026
1e93bf4
Exclude gcovr for win arm
evertlammerts May 19, 2026
74fecdb
Use recursive mutex to deal with GIL <-> internal lock deadlocks
evertlammerts May 18, 2026
3d778de
Fix concjunction OR
evertlammerts May 19, 2026
9a86633
Fix concjunction OR (#465)
evertlammerts May 19, 2026
7b77328
[duckdb-labs bot] Bump DuckDB submodule (#464)
evertlammerts May 19, 2026
fd8889e
Use recursive mutex to deal with GIL <-> internal lock deadlocks (#462)
evertlammerts May 19, 2026
97df049
fix .clangd
evertlammerts May 19, 2026
559f6af
Only disable unity builds for editable installs on OSX
evertlammerts May 19, 2026
c88229d
Allow self-joining of Polars lazyframes
evertlammerts May 19, 2026
f87d6d9
Allow self-joining of Polars lazyframes (#466)
evertlammerts May 19, 2026
289bfbd
Bump submodule
duckdblabs-bot May 19, 2026
811b135
DuckDB submodule pinned at v1.5.3
evertlammerts May 20, 2026
e109449
Make rel->query work with a read only connection
evertlammerts May 26, 2026
64f56bc
Make rel->query work with a read only connection (#471)
evertlammerts May 26, 2026
5e5f6af
Bump submodule
duckdblabs-bot May 28, 2026
91308b0
[duckdb-labs bot] Bump DuckDB submodule (#472)
evertlammerts May 28, 2026
0ae99c1
Bump submodule
duckdblabs-bot May 29, 2026
ce0a8f5
[duckdb-labs bot] Bump DuckDB submodule (#473)
evertlammerts May 29, 2026
1e998b2
Bump submodule
duckdblabs-bot Jun 3, 2026
f4892c8
[duckdb-labs bot] Bump DuckDB submodule (#476)
evertlammerts Jun 3, 2026
410f625
Bump submodule
duckdblabs-bot Jun 8, 2026
50d2b28
[duckdb-labs bot] Bump DuckDB submodule (#479)
evertlammerts Jun 8, 2026
b1e25b3
Ignore Polars dynamic predicates
evertlammerts Jun 8, 2026
43cfeb6
Bump submodule
duckdblabs-bot Jun 9, 2026
2faf589
Ignore Polars dynamic predicates (#482)
evertlammerts Jun 9, 2026
74ad29b
[duckdb-labs bot] Bump DuckDB submodule (#485)
evertlammerts Jun 9, 2026
1fe9a55
Bump submodule
duckdblabs-bot Jun 10, 2026
1e41c88
accept pathlib.Path, os.PathLike, bytes, and file-like objects in rea…
evertlammerts Jun 10, 2026
b41a92c
[duckdb-labs bot] Bump DuckDB submodule (#488)
evertlammerts Jun 10, 2026
b936678
Accept pathlib.Path, os.PathLike, bytes, and file-like objects in rea…
evertlammerts Jun 10, 2026
b365cb9
Merge remote-tracking branch 'upstream/v1.5-variegata' into main_prep
evertlammerts Jun 11, 2026
620d84c
main fixes
evertlammerts Jun 11, 2026
6b23153
finish expression filter integration
evertlammerts Jun 12, 2026
aa511e3
add timestamp_tz_ns support
evertlammerts Jun 12, 2026
ecbebcb
add timestamp_tz_ns support to the pyspark module
evertlammerts Jun 12, 2026
55155d7
fix whitespace-only expression bug
evertlammerts Jun 12, 2026
5b9dbbf
Integrated with Identifier and MaxLogicalType, and using SetChildCard…
evertlammerts Jun 16, 2026
849119d
more fixes
evertlammerts Jun 16, 2026
4d7e6ff
added sqlnull support and fixed explain
evertlammerts Jun 16, 2026
0911fa4
fixed some more test failures
evertlammerts Jun 16, 2026
289a9e3
fix pybind type casters for enums
evertlammerts Jun 17, 2026
0ff9762
fix identifier <-> name conversion
evertlammerts Jun 17, 2026
76175ee
fix profiler test
evertlammerts Jun 17, 2026
249f44f
xfail query graph rendering test
evertlammerts Jun 17, 2026
e2aad13
adapt adbc tests to stricter default TransactionInvalidationPolicy
evertlammerts Jun 17, 2026
6d1c749
fix spark tests
evertlammerts Jun 17, 2026
c7f7f9a
fix test errors
evertlammerts Jun 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ Supported: `3.10`, `3.11`, `3.12`, `3.13`, `3.14`. Do **not** use free-threaded

Key `pyproject.toml` settings:

- `BUILD_EXTENSIONS = "core_functions;json;parquet;icu;jemalloc"` — extensions built into the wheel.
- `BUILD_EXTENSIONS = "core_functions;json;parquet;icu"` — extensions built into the wheel. (jemalloc is part of DuckDB core and is auto-enabled on supported platforms — 64-bit Linux, non-musl, non-BSD.)
- Editable overrides: `build-dir = "build/debug/"`, `editable.rebuild = true`, `editable.mode = "redirect"`, `cmake.build-type = "Debug"`, `DISABLE_UNITY = "1"` (unity disabled for better debugging).
- Coverage overrides: `build-dir = "build/coverage/"`, `RelWithDebInfo`, `--coverage` flags. Activate with `COVERAGE=true uv sync ...`.

Expand Down
88 changes: 31 additions & 57 deletions _duckdb-stubs/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -309,22 +309,15 @@ class DuckDBPyConnection:
strict_mode: bool | None = None,
) -> DuckDBPyRelation: ...
def from_df(self, df: pandas.DataFrame) -> DuckDBPyRelation: ...
@typing.overload
def from_parquet(
self,
file_glob: str,
binary_as_string: bool = False,
*,
file_row_number: bool = False,
filename: bool = False,
hive_partitioning: bool = False,
union_by_name: bool = False,
compression: ParquetCompression | None = None,
) -> DuckDBPyRelation: ...
@typing.overload
def from_parquet(
self,
file_globs: Sequence[str],
path_or_buffer: str
| bytes
| os.PathLike[str]
| os.PathLike[bytes]
| typing.IO[bytes]
| typing.IO[str]
| Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]],
binary_as_string: bool = False,
*,
file_row_number: bool = False,
Expand Down Expand Up @@ -433,22 +426,15 @@ class DuckDBPyConnection:
hive_types: HiveTypes | None = None,
hive_types_autocast: bool | None = None,
) -> DuckDBPyRelation: ...
@typing.overload
def read_parquet(
self,
file_glob: str,
binary_as_string: bool = False,
*,
file_row_number: bool = False,
filename: bool = False,
hive_partitioning: bool = False,
union_by_name: bool = False,
compression: ParquetCompression | None = None,
) -> DuckDBPyRelation: ...
@typing.overload
def read_parquet(
self,
file_globs: Sequence[str],
path_or_buffer: str
| bytes
| os.PathLike[str]
| os.PathLike[bytes]
| typing.IO[bytes]
| typing.IO[str]
| Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]],
binary_as_string: bool = False,
*,
file_row_number: bool = False,
Expand Down Expand Up @@ -551,7 +537,9 @@ class DuckDBPyRelation:
def distinct(self) -> DuckDBPyRelation: ...
def except_(self, other_rel: Self) -> DuckDBPyRelation: ...
def execute(self) -> DuckDBPyRelation: ...
def explain(self, type: ExplainType | ExplainTypeLiteral = ExplainType.STANDARD) -> str: ...
def explain(
self, type: ExplainType | ExplainTypeLiteral = ExplainType.STANDARD, format: str | None = None
) -> str: ...
def favg(
self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = ""
) -> DuckDBPyRelation: ...
Expand Down Expand Up @@ -1061,21 +1049,14 @@ def from_csv_auto(
strict_mode: bool | None = None,
) -> DuckDBPyRelation: ...
def from_df(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ...
@typing.overload
def from_parquet(
file_glob: str,
binary_as_string: bool = False,
*,
file_row_number: bool = False,
filename: bool = False,
hive_partitioning: bool = False,
union_by_name: bool = False,
compression: ParquetCompression | None = None,
connection: DuckDBPyConnection | None = None,
) -> DuckDBPyRelation: ...
@typing.overload
def from_parquet(
file_globs: Sequence[str],
path_or_buffer: str
| bytes
| os.PathLike[str]
| os.PathLike[bytes]
| typing.IO[bytes]
| typing.IO[str]
| Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]],
binary_as_string: bool = False,
*,
file_row_number: bool = False,
Expand Down Expand Up @@ -1232,21 +1213,14 @@ def read_json(
hive_types: HiveTypes | None = None,
hive_types_autocast: bool | None = None,
) -> DuckDBPyRelation: ...
@typing.overload
def read_parquet(
file_glob: str,
binary_as_string: bool = False,
*,
file_row_number: bool = False,
filename: bool = False,
hive_partitioning: bool = False,
union_by_name: bool = False,
compression: ParquetCompression | None = None,
connection: DuckDBPyConnection | None = None,
) -> DuckDBPyRelation: ...
@typing.overload
def read_parquet(
file_globs: Sequence[str],
path_or_buffer: str
| bytes
| os.PathLike[str]
| os.PathLike[bytes]
| typing.IO[bytes]
| typing.IO[str]
| Sequence[str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes] | typing.IO[str]],
binary_as_string: bool = False,
*,
file_row_number: bool = False,
Expand Down
40 changes: 1 addition & 39 deletions cmake/duckdb_loader.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
# Simple DuckDB Build Configuration Module
#
# Sets sensible defaults for DuckDB Python extension builds and provides a clean
# interface for adding DuckDB as a library target. Adds jemalloc option for
# debugging but will never allow jemalloc in a release build if not on Linux.
# interface for adding DuckDB as a library target.
#
# Usage: include(cmake/duckdb_loader.cmake) # Optionally load extensions
# set(BUILD_EXTENSIONS "json;parquet;icu")
Expand Down Expand Up @@ -108,37 +107,6 @@ set(DEBUG_STACKTRACE
# Internal Functions
# ════════════════════════════════════════════════════════════════════════════════

function(_duckdb_validate_jemalloc_config)
# Check if jemalloc is in the extension list
if(NOT BUILD_EXTENSIONS MATCHES "jemalloc")
return()
endif()

# jemalloc is only enabled on 64bit x86 linux builds
if(CMAKE_SIZEOF_VOID_P EQUAL 8
AND CMAKE_SYSTEM_NAME STREQUAL "Linux"
AND NOT BSD)
set(jemalloc_allowed TRUE)
else()
set(jemalloc_allowed FALSE)
endif()

if(NOT jemalloc_allowed)
message(WARNING "jemalloc extension is only supported on Linux.\n"
"Removing jemalloc from extension list.")
# Remove jemalloc from the extension list
string(REPLACE "jemalloc" "" BUILD_EXTENSIONS_FILTERED
"${BUILD_EXTENSIONS}")
string(REGEX REPLACE ";+" ";" BUILD_EXTENSIONS_FILTERED
"${BUILD_EXTENSIONS_FILTERED}")
string(REGEX REPLACE "^;|;$" "" BUILD_EXTENSIONS_FILTERED
"${BUILD_EXTENSIONS_FILTERED}")
set(BUILD_EXTENSIONS
"${BUILD_EXTENSIONS_FILTERED}"
PARENT_SCOPE)
endif()
endfunction()

function(_duckdb_validate_source_path)
if(NOT EXISTS "${DUCKDB_SOURCE_PATH}")
message(
Expand Down Expand Up @@ -234,19 +202,13 @@ endfunction()

function(duckdb_add_library target_name)
_duckdb_validate_source_path()
_duckdb_validate_jemalloc_config()
_duckdb_print_summary()

# Add DuckDB subdirectory - it will use our variables
add_subdirectory("${DUCKDB_SOURCE_PATH}" duckdb EXCLUDE_FROM_ALL)

# Create clean interface target
_duckdb_create_interface_target(${target_name})

# Propagate BUILD_EXTENSIONS back to caller scope in case it was modified
set(BUILD_EXTENSIONS
"${BUILD_EXTENSIONS}"
PARENT_SCOPE)
endfunction()

function(duckdb_link_extensions target_name)
Expand Down
8 changes: 6 additions & 2 deletions duckdb/experimental/spark/sql/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
IntegerType,
LongType,
MapType,
NullType,
ShortType,
StringType,
StructField,
Expand All @@ -27,6 +28,7 @@
TimeNTZType,
TimestampMillisecondNTZType,
TimestampNanosecondNTZType,
TimestampNanosecondType,
TimestampNTZType,
TimestampSecondNTZType,
TimestampType,
Expand All @@ -41,6 +43,7 @@
)

_sqltype_to_spark_class = {
"null": NullType,
"boolean": BooleanType,
"utinyint": UnsignedByteType,
"tinyint": ByteType,
Expand All @@ -62,9 +65,10 @@
"time with time zone": TimeType,
"timestamp": TimestampNTZType,
"timestamp with time zone": TimestampType,
"timestamp_ms": TimestampNanosecondNTZType,
"timestamp_ns": TimestampMillisecondNTZType,
"timestamp_ms": TimestampMillisecondNTZType,
"timestamp_ns": TimestampNanosecondNTZType,
"timestamp_s": TimestampSecondNTZType,
"timestamptz_ns": TimestampNanosecondType,
"interval": DayTimeIntervalType,
"list": ArrayType,
"struct": StructType,
Expand Down
21 changes: 21 additions & 0 deletions duckdb/experimental/spark/sql/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"TimestampMillisecondNTZType",
"TimestampNTZType",
"TimestampNanosecondNTZType",
"TimestampNanosecondType",
"TimestampSecondNTZType",
"TimestampType",
"UUIDType",
Expand Down Expand Up @@ -239,6 +240,26 @@ def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
return datetime.datetime.fromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)


class TimestampNanosecondType(AtomicType, metaclass=DataTypeSingleton):
"""Timestamp (datetime.datetime) data type with timezone information with nanosecond precision."""

def __init__(self) -> None: # noqa: D107
super().__init__(DuckDBPyType("TIMESTAMPTZ_NS"))

def needConversion(self) -> bool: # noqa: D102
return True

@classmethod
def typeName(cls) -> str: # noqa: D102
return "timestamptz_ns"

def toInternal(self, dt: datetime.datetime) -> int: # noqa: D102
raise ContributionsAcceptedError

def fromInternal(self, ts: int) -> datetime.datetime: # noqa: D102
raise ContributionsAcceptedError


class TimestampNTZType(AtomicType, metaclass=DataTypeSingleton):
"""Timestamp (datetime.datetime) data type without timezone information with microsecond precision."""

Expand Down
Loading
Loading