From 6884723737c5b258a42d29ff4c0739175fc8a088 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Wed, 27 May 2026 15:31:06 -0400 Subject: [PATCH] Handle CUDA checkpoint restore arg layouts --- cuda_bindings/build_hooks.py | 39 +++++++- .../cuda/bindings/_bindings/cydriver.pxd.in | 2 +- .../cuda/bindings/_bindings/cydriver.pyx.in | 2 +- cuda_bindings/cuda/bindings/cydriver.pxd.in | 9 +- cuda_bindings/cuda/bindings/cydriver.pyx.in | 2 +- cuda_bindings/cuda/bindings/driver.pxd.in | 14 ++- cuda_bindings/cuda/bindings/driver.pyx.in | 88 ++++++++++++------- cuda_bindings/docs/source/module/driver.rst | 18 ++-- 8 files changed, 120 insertions(+), 54 deletions(-) diff --git a/cuda_bindings/build_hooks.py b/cuda_bindings/build_hooks.py index eab37ea7266..5155415cf2f 100644 --- a/cuda_bindings/build_hooks.py +++ b/cuda_bindings/build_hooks.py @@ -78,13 +78,32 @@ def __init__(self, name, members): self._name = name self._member_names = [] self._member_types = [] + self._member_declarators = [] for var_name, var_type, _ in members: - var_type = var_type[0] - var_type = var_type.removeprefix("struct ") - var_type = var_type.removeprefix("union ") + base_type = var_type[0] + base_type = base_type.removeprefix("struct ") + base_type = base_type.removeprefix("union ") self._member_names += [var_name] - self._member_types += [var_type] + self._member_types += [base_type] + self._member_declarators += [tuple(var_type[1:])] + + def member_type(self, member_name): + try: + return self._member_types[self._member_names.index(member_name)] + except ValueError: + return None + + def member_array_length(self, member_name): + try: + declarators = self._member_declarators[self._member_names.index(member_name)] + except ValueError: + return None + + for declarator in declarators: + if isinstance(declarator, list) and len(declarator) == 1: + return declarator[0] + return None def discoverMembers(self, memberDict, prefix, seen=None): if seen is None: @@ -161,6 +180,9 @@ def _parse_headers(header_dict, include_path_list, parser_caching): # Since we only support 64 bit architectures, we can inline the sizeof(T*) to 8 and then compute the # result in Python. The arithmetic expression is preserved to help with clarity and understanding r"char reserved\[52 - sizeof\(CUcheckpointGpuPair \*\)\];": rf"char reserved[{52 - 8}];", + r"char reserved\[64 - sizeof\(CUcheckpointGpuPair \*\) - sizeof\(unsigned int\)\];": ( + rf"char reserved[{64 - 8 - 4}];" + ), } print(f'Parsing headers in "{include_path_list}" (Caching = {parser_caching})', flush=True) @@ -310,6 +332,13 @@ def _build_cuda_bindings(strip=False): found_types, found_functions, found_values, found_struct, struct_list = _parse_headers( header_dict, include_path_list, parser_caching ) + struct_field_types = {} + struct_field_array_lengths = {} + for struct_name, struct in struct_list.items(): + for member_name in struct._member_names: + key = f"{struct_name}.{member_name}" + struct_field_types[key] = struct.member_type(member_name) + struct_field_array_lengths[key] = struct.member_array_length(member_name) # Generate code from .in templates path_list = [ @@ -332,6 +361,8 @@ def _build_cuda_bindings(strip=False): "found_values": found_values, "found_struct": found_struct, "struct_list": struct_list, + "struct_field_types": struct_field_types, + "struct_field_array_lengths": struct_field_array_lengths, "os": os, "sys": sys, "platform": platform, diff --git a/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in b/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in index 4d2ab7e04b9..0b85c0042b1 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in +++ b/cuda_bindings/cuda/bindings/_bindings/cydriver.pxd.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 12.9.0, generator version 49a8141. Do not modify it directly. +# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1711+g875fec45. Do not modify it directly. from cuda.bindings.cydriver cimport * {{if 'cuGetErrorString' in found_functions}} diff --git a/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in b/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in index a4213960404..ec28b247fcb 100644 --- a/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in +++ b/cuda_bindings/cuda/bindings/_bindings/cydriver.pyx.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1630+gadce055ea.d20260422. Do not modify it directly. +# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1711+g875fec45. Do not modify it directly. {{if 'Windows' == platform.system()}} import os cimport cuda.bindings._lib.windll as windll diff --git a/cuda_bindings/cuda/bindings/cydriver.pxd.in b/cuda_bindings/cuda/bindings/cydriver.pxd.in index 66b32154a42..bef1c4537e1 100644 --- a/cuda_bindings/cuda/bindings/cydriver.pxd.in +++ b/cuda_bindings/cuda/bindings/cydriver.pxd.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 12.9.0, generator version 49a8141. Do not modify it directly. +# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1711+g875fec45. Do not modify it directly. from libc.stdint cimport uint32_t, uint64_t @@ -2311,7 +2311,12 @@ cdef extern from "cuda.h": ctypedef CUcheckpointCheckpointArgs_st CUcheckpointCheckpointArgs cdef struct CUcheckpointRestoreArgs_st: - cuuint64_t reserved[8] + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'char'}} + char reserved[{{struct_field_array_lengths['CUcheckpointRestoreArgs_st.reserved']}}] + {{endif}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'cuuint64_t'}} + cuuint64_t reserved[{{struct_field_array_lengths['CUcheckpointRestoreArgs_st.reserved']}}] + {{endif}} ctypedef CUcheckpointRestoreArgs_st CUcheckpointRestoreArgs diff --git a/cuda_bindings/cuda/bindings/cydriver.pyx.in b/cuda_bindings/cuda/bindings/cydriver.pyx.in index e64dddf478e..63eacdc7e1e 100644 --- a/cuda_bindings/cuda/bindings/cydriver.pyx.in +++ b/cuda_bindings/cuda/bindings/cydriver.pyx.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 12.9.0, generator version 49a8141. Do not modify it directly. +# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1711+g875fec45. Do not modify it directly. cimport cuda.bindings._bindings.cydriver as cydriver {{if 'cuGetErrorString' in found_functions}} diff --git a/cuda_bindings/cuda/bindings/driver.pxd.in b/cuda_bindings/cuda/bindings/driver.pxd.in index f532ee7af4f..cfef650c39c 100644 --- a/cuda_bindings/cuda/bindings/driver.pxd.in +++ b/cuda_bindings/cuda/bindings/driver.pxd.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1588+g61faef43a. Do not modify it directly. +# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1711+g875fec45. Do not modify it directly. cimport cuda.bindings.cydriver as cydriver include "_lib/utils.pxd" @@ -5097,7 +5097,11 @@ cdef class CUcheckpointRestoreArgs_st: Attributes ---------- - {{if 'CUcheckpointRestoreArgs_st.reserved' in found_struct}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'char'}} + reserved : bytes + Reserved for future use, must be zeroed + {{endif}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'cuuint64_t'}} reserved : list[cuuint64_t] Reserved for future use, must be zeroed {{endif}} @@ -10560,7 +10564,11 @@ cdef class CUcheckpointRestoreArgs(CUcheckpointRestoreArgs_st): Attributes ---------- - {{if 'CUcheckpointRestoreArgs_st.reserved' in found_struct}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'char'}} + reserved : bytes + Reserved for future use, must be zeroed + {{endif}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'cuuint64_t'}} reserved : list[cuuint64_t] Reserved for future use, must be zeroed {{endif}} diff --git a/cuda_bindings/cuda/bindings/driver.pyx.in b/cuda_bindings/cuda/bindings/driver.pyx.in index a2f01079f23..cf098177720 100644 --- a/cuda_bindings/cuda/bindings/driver.pyx.in +++ b/cuda_bindings/cuda/bindings/driver.pyx.in @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2021-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE -# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1630+gadce055ea.d20260422. Do not modify it directly. +# This code was automatically generated with version 12.9.0, generator version 0.3.1.dev1711+g875fec45. Do not modify it directly. from typing import Any, Optional import cython import ctypes @@ -23250,7 +23250,11 @@ cdef class CUcheckpointRestoreArgs_st: Attributes ---------- - {{if 'CUcheckpointRestoreArgs_st.reserved' in found_struct}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'char'}} + reserved : bytes + Reserved for future use, must be zeroed + {{endif}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'cuuint64_t'}} reserved : list[cuuint64_t] Reserved for future use, must be zeroed {{endif}} @@ -23283,7 +23287,26 @@ cdef class CUcheckpointRestoreArgs_st: return '\n'.join(str_list) else: return '' - {{if 'CUcheckpointRestoreArgs_st.reserved' in found_struct}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'char'}} + @property + def reserved(self): + return PyBytes_FromStringAndSize(self._pvt_ptr[0].reserved, {{struct_field_array_lengths['CUcheckpointRestoreArgs_st.reserved']}}) + @reserved.setter + def reserved(self, reserved): + if len(reserved) != {{struct_field_array_lengths['CUcheckpointRestoreArgs_st.reserved']}}: + raise ValueError("reserved length must be {{struct_field_array_lengths['CUcheckpointRestoreArgs_st.reserved']}}, is " + str(len(reserved))) + if CHAR_MIN == 0: + for i, b in enumerate(reserved): + if b < 0 and b > -129: + b = b + 256 + self._pvt_ptr[0].reserved[i] = b + else: + for i, b in enumerate(reserved): + if b > 127 and b < 256: + b = b - 256 + self._pvt_ptr[0].reserved[i] = b + {{endif}} + {{if struct_field_types.get('CUcheckpointRestoreArgs_st.reserved') == 'cuuint64_t'}} @property def reserved(self): return [cuuint64_t(init_value=_reserved) for _reserved in self._pvt_ptr[0].reserved] @@ -32759,16 +32782,15 @@ def cuMemcpy3DBatchAsync(size_t numOps, opList : Optional[tuple[CUDA_MEMCPY3D_BA :py:obj:`~.CUmemcpy3DOperand.op.ptr.rowLength` field specifies the length of each row in elements and must either be zero or be greater than or equal to the width of the copy specified in - :py:obj:`~.CUDA_MEMCPY3D_BATCH_OP`::extent::width. The + :py:obj:`~.CUDA_MEMCPY3D_BATCH_OP.extent.width`. The :py:obj:`~.CUmemcpy3DOperand.op.ptr.layerHeight` field specifies the height of each layer and must either be zero or be greater than or equal to the height of the copy specified in - :py:obj:`~.CUDA_MEMCPY3D_BATCH_OP`::extent::height. When either of - these values is zero, that aspect of the operand is considered to be - tightly packed according to the copy extent. For managed memory - pointers on devices where - :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS` is true or - system-allocated pageable memory on devices where + :py:obj:`~.CUDA_MEMCPY3D_BATCH_OP.extent.height`. When either of these + values is zero, that aspect of the operand is considered to be tightly + packed according to the copy extent. For managed memory pointers on + devices where :py:obj:`~.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS` + is true or system-allocated pageable memory on devices where :py:obj:`~.CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS` is true, the :py:obj:`~.CUmemcpy3DOperand.op.ptr.locHint` field can be used to hint the location of the operand. @@ -34551,9 +34573,9 @@ def cuMemCreate(size_t size, prop : Optional[CUmemAllocationProp], unsigned long allocation targeting a specific host NUMA node, applications must set :py:obj:`~.CUmemAllocationProp.CUmemLocation.type` to :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` and - :py:obj:`~.CUmemAllocationProp`::CUmemLocation::id must specify the - NUMA ID of the CPU. On systems where NUMA is not available - :py:obj:`~.CUmemAllocationProp`::CUmemLocation::id must be set to 0. + :py:obj:`~.CUmemAllocationProp.CUmemLocation.id` must specify the NUMA + ID of the CPU. On systems where NUMA is not available + :py:obj:`~.CUmemAllocationProp.CUmemLocation.id` must be set to 0. Specifying :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT` or :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` as the :py:obj:`~.CUmemLocation.type` will result in @@ -34757,7 +34779,7 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr where :py:obj:`~.CUarrayMapInfo.resourceType` specifies the type of resource to be operated on. If :py:obj:`~.CUarrayMapInfo.resourceType` - is set to :py:obj:`~.CUresourcetype`::CU_RESOURCE_TYPE_ARRAY then + is set to :py:obj:`~.CUresourcetype.CU_RESOURCE_TYPE_ARRAY` then :py:obj:`~.CUarrayMapInfo.resource.array` must be set to a valid sparse CUDA array handle. The CUDA array must be either a 2D, 2D layered or 3D CUDA array and must have been allocated using :py:obj:`~.cuArrayCreate` @@ -34767,7 +34789,7 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr using :py:obj:`~.cuMipmappedArrayGetLevel`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE` will be returned. If :py:obj:`~.CUarrayMapInfo.resourceType` is set to - :py:obj:`~.CUresourcetype`::CU_RESOURCE_TYPE_MIPMAPPED_ARRAY then + :py:obj:`~.CUresourcetype.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY` then :py:obj:`~.CUarrayMapInfo.resource.mipmap` must be set to a valid sparse CUDA mipmapped array handle. The CUDA mipmapped array must be either a 2D, 2D layered or 3D CUDA mipmapped array and must have been @@ -34782,16 +34804,16 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr **View CUDA Toolkit Documentation for a C++ code example** where - :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL + :py:obj:`~.CUarraySparseSubresourceType.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL` indicates a sparse-miplevel which spans at least one tile in every dimension. The remaining miplevels which are too small to span at least one tile in any dimension constitute the mip tail region as indicated by - :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL + :py:obj:`~.CUarraySparseSubresourceType.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL` subresource type. If :py:obj:`~.CUarrayMapInfo.subresourceType` is set to - :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL + :py:obj:`~.CUarraySparseSubresourceType.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL` then :py:obj:`~.CUarrayMapInfo.subresource.sparseLevel` struct must contain valid array subregion offsets and extents. The :py:obj:`~.CUarrayMapInfo.subresource.sparseLevel.offsetX`, @@ -34817,7 +34839,7 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr :py:obj:`~.cuMipmappedArrayGetSparseProperties` If :py:obj:`~.CUarrayMapInfo.subresourceType` is set to - :py:obj:`~.CUarraySparseSubresourceType`::CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL + :py:obj:`~.CUarraySparseSubresourceType.CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL` then :py:obj:`~.CUarrayMapInfo.subresource.miptail` struct must contain valid mip tail offset in :py:obj:`~.CUarrayMapInfo.subresource.miptail.offset` and size in @@ -34842,17 +34864,17 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr **View CUDA Toolkit Documentation for a C++ code example** If :py:obj:`~.CUarrayMapInfo.memOperationType` is set to - :py:obj:`~.CUmemOperationType`::CU_MEM_OPERATION_TYPE_MAP then the + :py:obj:`~.CUmemOperationType.CU_MEM_OPERATION_TYPE_MAP` then the subresource will be mapped onto the tile pool memory specified by :py:obj:`~.CUarrayMapInfo.memHandle` at offset :py:obj:`~.CUarrayMapInfo.offset`. The tile pool allocation has to be created by specifying the :py:obj:`~.CU_MEM_CREATE_USAGE_TILE_POOL` flag when calling :py:obj:`~.cuMemCreate`. Also, :py:obj:`~.CUarrayMapInfo.memHandleType` must be set to - :py:obj:`~.CUmemHandleType`::CU_MEM_HANDLE_TYPE_GENERIC. + :py:obj:`~.CUmemHandleType.CU_MEM_HANDLE_TYPE_GENERIC`. If :py:obj:`~.CUarrayMapInfo.memOperationType` is set to - :py:obj:`~.CUmemOperationType`::CU_MEM_OPERATION_TYPE_UNMAP then an + :py:obj:`~.CUmemOperationType.CU_MEM_OPERATION_TYPE_UNMAP` then an unmapping operation is performed. :py:obj:`~.CUarrayMapInfo.memHandle` must be NULL. @@ -34861,7 +34883,7 @@ def cuMemMapArrayAsync(mapInfoList : Optional[tuple[CUarrayMapInfo] | list[CUarr exactly one bit set, and the corresponding device must match the device associated with the stream. If :py:obj:`~.CUarrayMapInfo.memOperationType` is set to - :py:obj:`~.CUmemOperationType`::CU_MEM_OPERATION_TYPE_MAP, the device + :py:obj:`~.CUmemOperationType.CU_MEM_OPERATION_TYPE_MAP`, the device must also match the device associated with the tile pool memory allocation as specified by :py:obj:`~.CUarrayMapInfo.memHandle`. @@ -34976,9 +34998,9 @@ def cuMemSetAccess(ptr, size_t size, desc : Optional[tuple[CUmemAccessDesc] | li :py:obj:`~.cuMemCreate`. Users cannot specify :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` accessibility for allocations created on with other location types. Note: When - :py:obj:`~.CUmemAccessDesc`::CUmemLocation::type is + :py:obj:`~.CUmemAccessDesc.CUmemLocation.type` is :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA`, - :py:obj:`~.CUmemAccessDesc`::CUmemLocation::id is ignored. When setting + :py:obj:`~.CUmemAccessDesc.CUmemLocation.id` is ignored. When setting the access flags for a virtual address range mapping a multicast object, `ptr` and `size` must be aligned to the value returned by :py:obj:`~.cuMulticastGetGranularity` with the flag @@ -35727,13 +35749,13 @@ def cuMemPoolCreate(poolProps : Optional[CUmemPoolProps]): device and IPC capabilities. To create a memory pool targeting a specific host NUMA node, - applications must set :py:obj:`~.CUmemPoolProps`::CUmemLocation::type - to :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` and - :py:obj:`~.CUmemPoolProps`::CUmemLocation::id must specify the NUMA ID - of the host memory node. Specifying + applications must set :py:obj:`~.CUmemPoolProps.CUmemLocation.type` to + :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA` and + :py:obj:`~.CUmemPoolProps.CUmemLocation.id` must specify the NUMA ID of + the host memory node. Specifying :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT` or :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST` as the - :py:obj:`~.CUmemPoolProps`::CUmemLocation::type will result in + :py:obj:`~.CUmemPoolProps.CUmemLocation.type` will result in :py:obj:`~.CUDA_ERROR_INVALID_VALUE`. By default, the pool's memory will be accessible from the device it is allocated on. In the case of pools created with :py:obj:`~.CU_MEM_LOCATION_TYPE_HOST_NUMA`, their @@ -40088,7 +40110,7 @@ def cuExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[CUDA_E the mipmapped array is bound as a color target in the graphics API, then the flag :py:obj:`~.CUDA_ARRAY3D_COLOR_ATTACHMENT` must be specified in - :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC`::arrayDesc::Flags. + :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC.arrayDesc.Flags`. :py:obj:`~.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC.numLevels` specifies the total number of levels in the mipmap chain. @@ -40394,7 +40416,7 @@ def cuSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[CUexternalSemap :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX`, :py:obj:`~.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT` then the keyed mutex will be released with the key specified in - :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_PARAMS`::params::keyedmutex::key. + :py:obj:`~.CUDA_EXTERNAL_SEMAPHORE_PARAMS.params.keyedmutex.key`. Parameters ---------- @@ -42669,7 +42691,7 @@ def cuLaunchGridAsync(f, int grid_width, int grid_height, hStream): Notes ----- - In certain cases where cubins are created with no ABI (i.e., using `ptxas` `--abi-compile` `no`), this function may serialize kernel launches. The CUDA driver retains asynchronous behavior by growing the per-thread stack as needed per launch and not shrinking it afterwards. + In certain cases where cubins are created with no ABI (i.e., using `ptxas` `no`), this function may serialize kernel launches. The CUDA driver retains asynchronous behavior by growing the per-thread stack as needed per launch and not shrinking it afterwards. """ cdef cydriver.CUstream cyhStream if hStream is None: diff --git a/cuda_bindings/docs/source/module/driver.rst b/cuda_bindings/docs/source/module/driver.rst index f0abf24a728..19a31171789 100644 --- a/cuda_bindings/docs/source/module/driver.rst +++ b/cuda_bindings/docs/source/module/driver.rst @@ -3274,7 +3274,7 @@ Data types used by CUDA driver Valid for graph nodes, launches. This attribute is graphs-only, and passing it to a launch in a non-capturing stream will result in an error. - :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::deviceUpdatable can only be set to 0 or 1. Setting the field to 1 indicates that the corresponding kernel node should be device-updatable. On success, a handle will be returned via :py:obj:`~.CUlaunchAttributeValue`::deviceUpdatableKernelNode::devNode which can be passed to the various device-side update functions to update the node's kernel parameters from within another kernel. For more information on the types of device updates that can be made, as well as the relevant limitations thereof, see :py:obj:`~.cudaGraphKernelNodeUpdatesApply`. + :py:obj:`~.CUlaunchAttributeValue.deviceUpdatableKernelNode.deviceUpdatable` can only be set to 0 or 1. Setting the field to 1 indicates that the corresponding kernel node should be device-updatable. On success, a handle will be returned via :py:obj:`~.CUlaunchAttributeValue.deviceUpdatableKernelNode.devNode` which can be passed to the various device-side update functions to update the node's kernel parameters from within another kernel. For more information on the types of device updates that can be made, as well as the relevant limitations thereof, see :py:obj:`~.cudaGraphKernelNodeUpdatesApply`. Nodes which are device-updatable have additional restrictions compared to regular kernel nodes. Firstly, device-updatable nodes cannot be removed from their graph via :py:obj:`~.cuGraphDestroyNode`. Additionally, once opted-in to this functionality, a node cannot opt out, and any attempt to set the deviceUpdatable attribute to 0 will result in an error. Device-updatable kernel nodes also cannot have their attributes copied to/from another kernel node via :py:obj:`~.cuGraphKernelNodeCopyAttributes`. Graphs containing one or more device-updatable nodes also do not allow multiple instantiation, and neither the graph nor its instantiated version can be passed to :py:obj:`~.cuGraphExecUpdate`. @@ -7206,6 +7206,14 @@ Checkpoint and restore capabilities are currently restricted to Linux. .. autofunction:: cuda.bindings.driver.cuCheckpointProcessRestore .. autofunction:: cuda.bindings.driver.cuCheckpointProcessUnlock +Profiler Control +---------------- + +This section describes the profiler control functions of the low-level CUDA driver application programming interface. + +.. autofunction:: cuda.bindings.driver.cuProfilerStart +.. autofunction:: cuda.bindings.driver.cuProfilerStop + EGL Interoperability -------------------- @@ -7252,14 +7260,6 @@ This section describes the OpenGL interoperability functions of the low-level CU .. autofunction:: cuda.bindings.driver.cuGraphicsGLRegisterImage .. autofunction:: cuda.bindings.driver.cuGLGetDevices -Profiler Control ----------------- - -This section describes the profiler control functions of the low-level CUDA driver application programming interface. - -.. autofunction:: cuda.bindings.driver.cuProfilerStart -.. autofunction:: cuda.bindings.driver.cuProfilerStop - VDPAU Interoperability ----------------------