Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pylibcudf.null_mask.null_count #17711

Merged
merged 7 commits into from
Jan 14, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions python/cudf/cudf/_lib/column.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ from pylibcudf.libcudf.column.column_view cimport (
from pylibcudf.libcudf.types cimport size_type
from rmm.librmm.device_buffer cimport device_buffer

cdef dtype_from_lists_column_view(column_view cv)
cdef dtype_from_column_view(column_view cv)

cdef class Column:
Expand Down Expand Up @@ -42,5 +41,3 @@ cdef class Column:

@staticmethod
cdef Column from_column_view(column_view, object)

cdef size_type compute_null_count(self) except? 0
23 changes: 9 additions & 14 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ from pylibcudf.libcudf.column.column_factories cimport (
)
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
from pylibcudf.libcudf.scalar.scalar cimport scalar

from cudf._lib.scalar cimport DeviceScalar
Expand Down Expand Up @@ -346,7 +345,15 @@ cdef class Column:
@property
def null_count(self):
if self._null_count is None:
self._null_count = self.compute_null_count()
if not self.nullable or self.size == 0:
self._null_count = 0
else:
with acquire_spill_lock():
self._null_count = pylibcudf.null_mask.null_count(
self.base_mask.get_ptr(mode="read"),
self.offset,
self.offset + self.size
)
return self._null_count

@property
Expand Down Expand Up @@ -410,18 +417,6 @@ cdef class Column:
else:
return other_col

cdef libcudf_types.size_type compute_null_count(self) except? 0:
with acquire_spill_lock():
if not self.nullable:
return 0
return cpp_null_count(
<libcudf_types.bitmask_type*><uintptr_t>(
self.base_mask.get_ptr(mode="read")
),
self.offset,
self.offset + self.size
)

cdef mutable_column_view mutable_view(self) except *:
if isinstance(self.dtype, cudf.CategoricalDtype):
col = self.base_children[0]
Expand Down
4 changes: 3 additions & 1 deletion python/pylibcudf/pylibcudf/null_mask.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from pylibcudf.libcudf.types cimport mask_state, size_type

Expand All @@ -16,3 +16,5 @@ cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *)
cpdef tuple bitmask_and(list columns)

cpdef tuple bitmask_or(list columns)

cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop)
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/null_mask.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ def create_null_mask(
) -> DeviceBuffer: ...
def bitmask_and(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
def bitmask_or(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
def null_count(bitmask: int, start: int, stop: int) -> int: ...
29 changes: 28 additions & 1 deletion python/pylibcudf/pylibcudf/null_mask.pyx
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

from libcpp.memory cimport make_unique
from libcpp.pair cimport pair
from libcpp.utility cimport move
from pylibcudf.libcudf cimport null_mask as cpp_null_mask
from pylibcudf.libcudf.types cimport mask_state, size_type
from pylibcudf.utils cimport int_to_bitmask_ptr

from rmm.librmm.device_buffer cimport device_buffer
from rmm.pylibrmm.device_buffer cimport DeviceBuffer
Expand All @@ -20,6 +21,7 @@ __all__ = [
"bitmask_or",
"copy_bitmask",
"create_null_mask",
"null_count",
]

cdef DeviceBuffer buffer_to_python(device_buffer buf):
Expand Down Expand Up @@ -148,3 +150,28 @@ cpdef tuple bitmask_or(list columns):
c_result = cpp_null_mask.bitmask_or(c_table.view())

return buffer_to_python(move(c_result.first)), c_result.second


cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop):
"""Given a validity bitmask, counts the number of null elements.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you using Py_ssize_t over int because the bitmask is essentially an index to a python buffer object? And according to this stackoverflow post. Py_ssize_t preffered over int when indexing into python objects.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was matching the ptr type of int_to_void_ptr here. Additionally, using int here would result in an OverflowError


For details, see :cpp:func:`null_count`.

Parameters
----------
bitmask : int
Integer pointer to the bitmask.

start : int
Index of the first bit to count (inclusive).

stop : int
Index of the last bit to count (exclusive).

mroeschke marked this conversation as resolved.
Show resolved Hide resolved
Returns
-------
int
The number of null elements in the specified range.
"""
with nogil:
return cpp_null_mask.null_count(int_to_bitmask_ptr(bitmask), start, stop)
Loading