diff --git a/.devcontainer/cuda11.8-conda/devcontainer.json b/.devcontainer/cuda11.8-conda/devcontainer.json index 8423fe21c29..7a1361e52c5 100644 --- a/.devcontainer/cuda11.8-conda/devcontainer.json +++ b/.devcontainer/cuda11.8-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda11.8-pip/devcontainer.json b/.devcontainer/cuda11.8-pip/devcontainer.json index 4945d6cf753..64d7cd54130 100644 --- a/.devcontainer/cuda11.8-pip/devcontainer.json +++ b/.devcontainer/cuda11.8-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "11.8", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.08-cpp-cuda11.8-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-cuda11.8-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda11.8-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda11.8-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-conda/devcontainer.json b/.devcontainer/cuda12.5-conda/devcontainer.json index fadce01d060..c1924243506 100644 --- a/.devcontainer/cuda12.5-conda/devcontainer.json +++ b/.devcontainer/cuda12.5-conda/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "conda", - "BASE": "rapidsai/devcontainers:24.08-cpp-mambaforge-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-mambaforge-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.5-conda" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-conda" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.devcontainer/cuda12.5-pip/devcontainer.json b/.devcontainer/cuda12.5-pip/devcontainer.json index 026eb540952..beab2940176 100644 --- a/.devcontainer/cuda12.5-pip/devcontainer.json +++ b/.devcontainer/cuda12.5-pip/devcontainer.json @@ -5,17 +5,17 @@ "args": { "CUDA": "12.5", "PYTHON_PACKAGE_MANAGER": "pip", - "BASE": "rapidsai/devcontainers:24.08-cpp-cuda12.5-ubuntu22.04" + "BASE": "rapidsai/devcontainers:24.10-cpp-cuda12.5-ubuntu22.04" } }, "runArgs": [ "--rm", "--name", - "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.08-cuda12.5-pip" + "${localEnv:USER:anon}-rapids-${localWorkspaceFolderBasename}-24.10-cuda12.5-pip" ], "hostRequirements": {"gpu": "optional"}, "features": { - "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.8": {} + "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils:24.10": {} }, "overrideFeatureInstallOrder": [ "ghcr.io/rapidsai/devcontainers/features/rapids-build-utils" diff --git a/.github/labeler.yml b/.github/labeler.yml index 48967417af3..8506d38a048 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -12,7 +12,7 @@ cudf.polars: - 'python/cudf_polars/**' pylibcudf: - - 'python/cudf/cudf/_lib/pylibcudf/**' + - 'python/pylibcudf/**' libcudf: - 'cpp/**' diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 2e5959338b0..43fc567cb67 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -28,7 +28,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -37,7 +37,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -46,7 +46,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -57,19 +57,63 @@ jobs: if: github.ref_type == 'branch' needs: python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: arch: "amd64" branch: ${{ inputs.branch }} build_type: ${{ inputs.build_type || 'branch' }} - container_image: "rapidsai/ci-conda:latest" + container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11" date: ${{ inputs.date }} node_type: "gpu-v100-latest-1" run_script: "ci/build_docs.sh" sha: ${{ inputs.sha }} + wheel-build-libcudf: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + with: + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_libcudf.sh + wheel-publish-libcudf: + needs: wheel-build-libcudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: libcudf + package-type: cpp + wheel-build-pylibcudf: + needs: [wheel-publish-libcudf] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_pylibcudf.sh + wheel-publish-pylibcudf: + needs: wheel-build-pylibcudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: pylibcudf + package-type: python wheel-build-cudf: + needs: wheel-publish-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -79,17 +123,18 @@ jobs: wheel-publish-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: cudf + package-type: python wheel-build-dask-cudf: needs: wheel-publish-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -101,17 +146,18 @@ jobs: wheel-publish-dask-cudf: needs: wheel-build-dask-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: dask_cudf + package-type: python wheel-build-cudf-polars: - needs: wheel-publish-cudf + needs: wheel-publish-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -123,13 +169,14 @@ jobs: wheel-publish-cudf-polars: needs: wheel-build-cudf-polars secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.10 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: cudf_polars + package-type: python trigger-pandas-tests: if: inputs.build_type == 'nightly' needs: wheel-build-cudf diff --git a/.github/workflows/pandas-tests.yaml b/.github/workflows/pandas-tests.yaml index 5a937b2f362..10c803f7921 100644 --- a/.github/workflows/pandas-tests.yaml +++ b/.github/workflows/pandas-tests.yaml @@ -17,9 +17,10 @@ jobs: pandas-tests: # run the Pandas unit tests secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: - matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and (.CUDA_VER | startswith("12.5.")) )) + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 25f11863b0d..766df59594b 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,6 +12,7 @@ concurrency: jobs: pr-builder: needs: + - changed-files - checks - conda-cpp-build - conda-cpp-checks @@ -23,6 +24,8 @@ jobs: - static-configure - conda-notebook-tests - docs-build + - wheel-build-libcudf + - wheel-build-pylibcudf - wheel-build-cudf - wheel-tests-cudf - wheel-build-cudf-polars @@ -35,66 +38,148 @@ jobs: - pandas-tests - pandas-tests-diff secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + if: always() + with: + needs: ${{ toJSON(needs) }} + changed-files: + runs-on: ubuntu-latest + name: "Check changed files" + outputs: + test_cpp: ${{ steps.changed-files.outputs.cpp_any_changed == 'true' }} + test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }} + test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }} + test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }} + test_cudf_pandas: ${{ steps.changed-files.outputs.cudf_pandas_any_changed == 'true' }} + steps: + - name: Get PR info + id: get-pr-info + uses: nv-gha-runners/get-pr-info@main + - name: Checkout code repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + - name: Calculate merge base + id: calculate-merge-base + env: + PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }} + BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} + run: | + (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") > "$GITHUB_OUTPUT" + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v45 + with: + base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }} + sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }} + files_yaml: | + cpp: + - '**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!java/**' + - '!notebooks/**' + - '!python/**' + - '!ci/cudf_pandas_scripts/**' + java: + - '**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!notebooks/**' + - '!python/**' + - '!ci/cudf_pandas_scripts/**' + notebooks: + - '**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!java/**' + - '!ci/cudf_pandas_scripts/**' + python: + - '**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!java/**' + - '!notebooks/**' + - '!ci/cudf_pandas_scripts/**' + cudf_pandas: + - '**' + - 'ci/cudf_pandas_scripts/**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!docs/**' + - '!img/**' + - '!java/**' + - '!notebooks/**' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.10 with: enable_check_generated_files: false conda-cpp-build: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-24.10 with: build_type: pull-request conda-cpp-checks: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: pull-request enable_check_symbols: true conda-cpp-tests: - needs: conda-cpp-build + needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 + if: needs.changed-files.outputs.test_cpp == 'true' with: build_type: pull-request conda-python-build: needs: conda-cpp-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: pull-request conda-python-cudf-tests: - needs: conda-python-build + needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + if: needs.changed-files.outputs.test_python == 'true' with: build_type: pull-request script: "ci/test_python_cudf.sh" conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism - needs: conda-python-build + needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 + if: needs.changed-files.outputs.test_python == 'true' with: build_type: pull-request script: "ci/test_python_other.sh" conda-java-tests: - needs: conda-cpp-build + needs: [conda-cpp-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + if: needs.changed-files.outputs.test_java == 'true' with: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:latest" + container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11" run_script: "ci/test_java.sh" static-configure: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request # Use the wheel container so we can skip conda solves and since our @@ -102,52 +187,71 @@ jobs: container_image: "rapidsai/ci-wheel:latest" run_script: "ci/configure_cpp_static.sh" conda-notebook-tests: - needs: conda-python-build + needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + if: needs.changed-files.outputs.test_notebooks == 'true' with: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:latest" + container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11" run_script: "ci/test_notebooks.sh" docs-build: needs: conda-python-build secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:latest" + container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11" run_script: "ci/build_docs.sh" - wheel-build-cudf: + wheel-build-libcudf: needs: checks secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + with: + # build for every combination of arch and CUDA version, but only for the latest Python + matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) + build_type: pull-request + script: "ci/build_wheel_libcudf.sh" + wheel-build-pylibcudf: + needs: [checks, wheel-build-libcudf] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 + with: + build_type: pull-request + script: "ci/build_wheel_pylibcudf.sh" + wheel-build-cudf: + needs: wheel-build-pylibcudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: build_type: pull-request script: "ci/build_wheel_cudf.sh" wheel-tests-cudf: - needs: wheel-build-cudf + needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + if: needs.changed-files.outputs.test_python == 'true' with: build_type: pull-request script: ci/test_wheel_cudf.sh wheel-build-cudf-polars: - needs: wheel-build-cudf + needs: wheel-build-pylibcudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request script: "ci/build_wheel_cudf_polars.sh" wheel-tests-cudf-polars: - needs: wheel-build-cudf-polars + needs: [wheel-build-cudf-polars, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + if: needs.changed-files.outputs.test_python == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -158,7 +262,7 @@ jobs: cudf-polars-polars-tests: needs: wheel-build-cudf-polars secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -169,16 +273,17 @@ jobs: wheel-build-dask-cudf: needs: wheel-build-cudf secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request script: "ci/build_wheel_dask_cudf.sh" wheel-tests-dask-cudf: - needs: wheel-build-dask-cudf + needs: [wheel-build-dask-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + if: needs.changed-files.outputs.test_python == 'true' with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -186,7 +291,7 @@ jobs: script: ci/test_wheel_dask_cudf.sh devcontainer: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.10 with: arch: '["amd64"]' cuda: '["12.5"]' @@ -195,20 +300,24 @@ jobs: build-all -DBUILD_BENCHMARKS=ON --verbose; sccache -s; unit-tests-cudf-pandas: - needs: wheel-build-cudf + needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true' with: + # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request script: ci/cudf_pandas_scripts/run_tests.sh pandas-tests: # run the Pandas unit tests using PR branch - needs: wheel-build-cudf + needs: [wheel-build-cudf, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 + if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true' with: - matrix_filter: map(select(.ARCH == "amd64" and .PY_VER == "3.9" and (.CUDA_VER | startswith("12.5.")) )) + # This selects "ARCH=amd64 + the latest supported Python + CUDA". + matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) build_type: pull-request script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit. @@ -216,7 +325,7 @@ jobs: pandas-tests-diff: # diff the results of running the Pandas unit tests and publish a job summary needs: pandas-tests - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: node_type: cpu4 build_type: pull-request diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml index 8ca971dc28d..45e5191eb54 100644 --- a/.github/workflows/pr_issue_status_automation.yml +++ b/.github/workflows/pr_issue_status_automation.yml @@ -23,7 +23,7 @@ on: jobs: get-project-id: - uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/project-get-item-id.yaml@branch-24.10 if: github.event.pull_request.state == 'open' secrets: inherit permissions: @@ -34,7 +34,7 @@ jobs: update-status: # This job sets the PR and its linked issues to "In Progress" status - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.10 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: @@ -50,7 +50,7 @@ jobs: update-sprint: # This job sets the PR and its linked issues to the current "Weekly Sprint" - uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.10 if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }} needs: get-project-id with: diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 36c9088d93c..eb15c2931d2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -16,7 +16,7 @@ on: jobs: conda-cpp-checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-post-build-checks.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -25,7 +25,7 @@ jobs: enable_check_symbols: true conda-cpp-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -33,7 +33,7 @@ jobs: sha: ${{ inputs.sha }} conda-cpp-memcheck-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -41,11 +41,11 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:latest" + container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11" run_script: "ci/test_cpp_memcheck.sh" static-configure: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: pull-request # Use the wheel container so we can skip conda solves and since our @@ -54,7 +54,7 @@ jobs: run_script: "ci/configure_cpp_static.sh" conda-python-cudf-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -64,7 +64,7 @@ jobs: conda-python-other-tests: # Tests for dask_cudf, custreamz, cudf_kafka are separated for CI parallelism secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: script: "ci/test_python_other.sh" conda-java-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -81,11 +81,11 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:latest" + container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11" run_script: "ci/test_java.sh" conda-notebook-tests: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -93,11 +93,11 @@ jobs: sha: ${{ inputs.sha }} node_type: "gpu-v100-latest-1" arch: "amd64" - container_image: "rapidsai/ci-conda:latest" + container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11" run_script: "ci/test_notebooks.sh" wheel-tests-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} @@ -106,7 +106,7 @@ jobs: script: ci/test_wheel_cudf.sh wheel-tests-dask-cudf: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: # This selects "ARCH=amd64 + the latest supported Python + CUDA". matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))])) @@ -117,10 +117,22 @@ jobs: script: ci/test_wheel_dask_cudf.sh unit-tests-cudf-pandas: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.10 with: build_type: nightly branch: ${{ inputs.branch }} date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/cudf_pandas_scripts/run_tests.sh + third-party-integration-tests-cudf-pandas: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + node_type: "gpu-v100-latest-1" + container_image: "rapidsai/ci-conda:latest" + run_script: | + ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml diff --git a/.gitignore b/.gitignore index c89fb49697a..180a6a286e2 100644 --- a/.gitignore +++ b/.gitignore @@ -79,6 +79,7 @@ Debug build/ cpp/build/ cpp/examples/*/install/ +cpp/examples/*/build/ cpp/include/cudf/ipc_generated/*.h cpp/thirdparty/googletest/ @@ -176,3 +177,7 @@ jupyter_execute # clang tooling compile_commands.json .clangd/ + +# pytest artifacts +rmm_log.txt +python/cudf/cudf_pandas_tests/data/rmm_log.txt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8c0c533cd8e..56469ce011e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -144,7 +144,7 @@ repos: - id: ruff-format files: python/.*$ - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v0.2.0 + rev: v0.4.0 hooks: - id: verify-copyright exclude: | diff --git a/CHANGELOG.md b/CHANGELOG.md index cea4e1da28c..b9d37c25dad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,299 @@ +# cudf 24.10.00 (9 Oct 2024) + +## 🚨 Breaking Changes + +- Whitespace normalization of nested column coerced as string column in JSONL inputs ([#16759](https://github.com/rapidsai/cudf/pull/16759)) [@shrshi](https://github.com/shrshi) +- Add libcudf wrappers around current_device_resource functions. ([#16679](https://github.com/rapidsai/cudf/pull/16679)) [@harrism](https://github.com/harrism) +- Fix empty cluster handling in tdigest merge ([#16675](https://github.com/rapidsai/cudf/pull/16675)) [@jihoonson](https://github.com/jihoonson) +- Remove java ColumnView.copyWithBooleanColumnAsValidity ([#16660](https://github.com/rapidsai/cudf/pull/16660)) [@revans2](https://github.com/revans2) +- Support reading multiple PQ sources with mismatching nullability for columns ([#16639](https://github.com/rapidsai/cudf/pull/16639)) [@mhaseeb123](https://github.com/mhaseeb123) +- Remove arrow_io_source ([#16607](https://github.com/rapidsai/cudf/pull/16607)) [@vyasr](https://github.com/vyasr) +- Remove legacy Arrow interop APIs ([#16590](https://github.com/rapidsai/cudf/pull/16590)) [@vyasr](https://github.com/vyasr) +- Remove NativeFile support from cudf Python ([#16589](https://github.com/rapidsai/cudf/pull/16589)) [@vyasr](https://github.com/vyasr) +- Revert "Make proxy NumPy arrays pass isinstance check in `cudf.pandas`" ([#16586](https://github.com/rapidsai/cudf/pull/16586)) [@Matt711](https://github.com/Matt711) +- Align public utility function signatures with pandas 2.x ([#16565](https://github.com/rapidsai/cudf/pull/16565)) [@mroeschke](https://github.com/mroeschke) +- Disallow cudf.Index accepting column in favor of ._from_column ([#16549](https://github.com/rapidsai/cudf/pull/16549)) [@mroeschke](https://github.com/mroeschke) +- Refactor dictionary encoding in PQ writer to migrate to the new `cuco::static_map` ([#16541](https://github.com/rapidsai/cudf/pull/16541)) [@mhaseeb123](https://github.com/mhaseeb123) +- Change IPv4 convert APIs to support UINT32 instead of INT64 ([#16489](https://github.com/rapidsai/cudf/pull/16489)) [@davidwendt](https://github.com/davidwendt) +- enable list to be forced as string in JSON reader. ([#16472](https://github.com/rapidsai/cudf/pull/16472)) [@karthikeyann](https://github.com/karthikeyann) +- Disallow cudf.Series to accept column in favor of `._from_column` ([#16454](https://github.com/rapidsai/cudf/pull/16454)) [@mroeschke](https://github.com/mroeschke) +- Align groupby APIs with pandas 2.x ([#16403](https://github.com/rapidsai/cudf/pull/16403)) [@mroeschke](https://github.com/mroeschke) +- Align misc DataFrame and MultiIndex methods with pandas 2.x ([#16402](https://github.com/rapidsai/cudf/pull/16402)) [@mroeschke](https://github.com/mroeschke) +- Align Index APIs with pandas 2.x ([#16361](https://github.com/rapidsai/cudf/pull/16361)) [@mroeschke](https://github.com/mroeschke) +- Add `stream` param to stream compaction APIs ([#16295](https://github.com/rapidsai/cudf/pull/16295)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) + +## 🐛 Bug Fixes + +- Add license to the pylibcudf wheel ([#16976](https://github.com/rapidsai/cudf/pull/16976)) [@raydouglass](https://github.com/raydouglass) +- Parse newline as whitespace character while tokenizing JSONL inputs with non-newline delimiter ([#16950](https://github.com/rapidsai/cudf/pull/16950)) [@shrshi](https://github.com/shrshi) +- Add dask-cudf workaround for missing `rename_axis` support in cudf ([#16899](https://github.com/rapidsai/cudf/pull/16899)) [@rjzamora](https://github.com/rjzamora) +- Update oldest deps for `pyarrow` & `numpy` ([#16883](https://github.com/rapidsai/cudf/pull/16883)) [@galipremsagar](https://github.com/galipremsagar) +- Update labeler for pylibcudf ([#16868](https://github.com/rapidsai/cudf/pull/16868)) [@vyasr](https://github.com/vyasr) +- Revert "Refactor mixed_semi_join using cuco::static_set" ([#16855](https://github.com/rapidsai/cudf/pull/16855)) [@mhaseeb123](https://github.com/mhaseeb123) +- Fix metadata after implicit array conversion from Dask cuDF ([#16842](https://github.com/rapidsai/cudf/pull/16842)) [@rjzamora](https://github.com/rjzamora) +- Add cudf.pandas dependencies.yaml to update-version.sh ([#16840](https://github.com/rapidsai/cudf/pull/16840)) [@raydouglass](https://github.com/raydouglass) +- Use cupy 12.2.0 as oldest dependency pinning on CUDA 12 ARM ([#16808](https://github.com/rapidsai/cudf/pull/16808)) [@bdice](https://github.com/bdice) +- Revert "Fix empty cluster handling in tdigest merge ([#16675)" (#16800](https://github.com/rapidsai/cudf/pull/16675)" (#16800)) [@jihoonson](https://github.com/jihoonson) +- Intentionally leak thread_local CUDA resources to avoid crash (part 1) ([#16787](https://github.com/rapidsai/cudf/pull/16787)) [@kingcrimsontianyu](https://github.com/kingcrimsontianyu) +- Fix `cov`/`corr` bug in dask-cudf ([#16786](https://github.com/rapidsai/cudf/pull/16786)) [@rjzamora](https://github.com/rjzamora) +- Fix slice_strings wide strings logic with multi-byte characters ([#16777](https://github.com/rapidsai/cudf/pull/16777)) [@davidwendt](https://github.com/davidwendt) +- Fix nvbench output for sha512 ([#16773](https://github.com/rapidsai/cudf/pull/16773)) [@davidwendt](https://github.com/davidwendt) +- Allow read_csv(header=None) to return int column labels in `mode.pandas_compatible` ([#16769](https://github.com/rapidsai/cudf/pull/16769)) [@mroeschke](https://github.com/mroeschke) +- Whitespace normalization of nested column coerced as string column in JSONL inputs ([#16759](https://github.com/rapidsai/cudf/pull/16759)) [@shrshi](https://github.com/shrshi) +- Fix DataFrame.drop(columns=cudf.Series/Index, axis=1) ([#16712](https://github.com/rapidsai/cudf/pull/16712)) [@mroeschke](https://github.com/mroeschke) +- Use merge base when calculating changed files ([#16709](https://github.com/rapidsai/cudf/pull/16709)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Ensure we pass the has_nulls tparam to mixed_join kernels ([#16708](https://github.com/rapidsai/cudf/pull/16708)) [@abellina](https://github.com/abellina) +- Add boost-devel to Java CI Docker image ([#16707](https://github.com/rapidsai/cudf/pull/16707)) [@jlowe](https://github.com/jlowe) +- [BUG] Add gpu node type to cudf-pandas 3rd-party integration nightly CI job ([#16704](https://github.com/rapidsai/cudf/pull/16704)) [@Matt711](https://github.com/Matt711) +- Fix typo in column_factories.hpp comment from 'depth 1' to 'depth 2' ([#16700](https://github.com/rapidsai/cudf/pull/16700)) [@a-hirota](https://github.com/a-hirota) +- Fix Series.to_frame(name=None) setting a None name ([#16698](https://github.com/rapidsai/cudf/pull/16698)) [@mroeschke](https://github.com/mroeschke) +- Disable gtests/ERROR_TEST during compute-sanitizer memcheck test ([#16691](https://github.com/rapidsai/cudf/pull/16691)) [@davidwendt](https://github.com/davidwendt) +- Enable batched multi-source reading of JSONL files with large records ([#16687](https://github.com/rapidsai/cudf/pull/16687)) [@shrshi](https://github.com/shrshi) +- Handle `ordered` parameter in `CategoricalIndex.__repr__` ([#16683](https://github.com/rapidsai/cudf/pull/16683)) [@galipremsagar](https://github.com/galipremsagar) +- Fix loc/iloc.__setitem__[:, loc] with non cupy types ([#16677](https://github.com/rapidsai/cudf/pull/16677)) [@mroeschke](https://github.com/mroeschke) +- Fix empty cluster handling in tdigest merge ([#16675](https://github.com/rapidsai/cudf/pull/16675)) [@jihoonson](https://github.com/jihoonson) +- Fix `cudf::rank` not getting enough params ([#16666](https://github.com/rapidsai/cudf/pull/16666)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Fix slowdown in `CategoricalIndex.__repr__` ([#16665](https://github.com/rapidsai/cudf/pull/16665)) [@galipremsagar](https://github.com/galipremsagar) +- Remove java ColumnView.copyWithBooleanColumnAsValidity ([#16660](https://github.com/rapidsai/cudf/pull/16660)) [@revans2](https://github.com/revans2) +- Fix slowdown in DataFrame repr in jupyter notebook ([#16656](https://github.com/rapidsai/cudf/pull/16656)) [@galipremsagar](https://github.com/galipremsagar) +- Preserve Series name in duplicated method. ([#16655](https://github.com/rapidsai/cudf/pull/16655)) [@bdice](https://github.com/bdice) +- Fix interval_range right child non-zero offset ([#16651](https://github.com/rapidsai/cudf/pull/16651)) [@mroeschke](https://github.com/mroeschke) +- fix libcudf wheel publishing, make package-type explicit in wheel publishing ([#16650](https://github.com/rapidsai/cudf/pull/16650)) [@jameslamb](https://github.com/jameslamb) +- Revert "Hide all gtest symbols in cudftestutil ([#16546)" (#16644](https://github.com/rapidsai/cudf/pull/16546)" (#16644)) [@robertmaynard](https://github.com/robertmaynard) +- Fix integer overflow in indexalator pointer logic ([#16643](https://github.com/rapidsai/cudf/pull/16643)) [@davidwendt](https://github.com/davidwendt) +- Allow for binops between two differently sized DecimalDtypes ([#16638](https://github.com/rapidsai/cudf/pull/16638)) [@mroeschke](https://github.com/mroeschke) +- Move pragma once in rolling/jit/operation.hpp. ([#16636](https://github.com/rapidsai/cudf/pull/16636)) [@bdice](https://github.com/bdice) +- Fix overflow bug in low-memory JSON reader ([#16632](https://github.com/rapidsai/cudf/pull/16632)) [@shrshi](https://github.com/shrshi) +- Add the missing `num_aggregations` axis for `groupby_max_cardinality` ([#16630](https://github.com/rapidsai/cudf/pull/16630)) [@PointKernel](https://github.com/PointKernel) +- Fix strings::detail::copy_range when target contains nulls ([#16626](https://github.com/rapidsai/cudf/pull/16626)) [@davidwendt](https://github.com/davidwendt) +- Fix function parameters with common dependency modified during their evaluation ([#16620](https://github.com/rapidsai/cudf/pull/16620)) [@ttnghia](https://github.com/ttnghia) +- bug-fix: Don't enable the CUDA language if testing was requested when finding cudf ([#16615](https://github.com/rapidsai/cudf/pull/16615)) [@cryos](https://github.com/cryos) +- bug-fix: cudf/io/json.hpp use after move ([#16609](https://github.com/rapidsai/cudf/pull/16609)) [@NicolasDenoyelle](https://github.com/NicolasDenoyelle) +- Remove CUDA whole compilation ODR violations ([#16603](https://github.com/rapidsai/cudf/pull/16603)) [@robertmaynard](https://github.com/robertmaynard) +- MAINT: Adapt to numpy hiding flagsobject away ([#16593](https://github.com/rapidsai/cudf/pull/16593)) [@seberg](https://github.com/seberg) +- Revert "Make proxy NumPy arrays pass isinstance check in `cudf.pandas`" ([#16586](https://github.com/rapidsai/cudf/pull/16586)) [@Matt711](https://github.com/Matt711) +- Switch python version to `3.10` in `cudf.pandas` pandas test scripts ([#16559](https://github.com/rapidsai/cudf/pull/16559)) [@galipremsagar](https://github.com/galipremsagar) +- Hide all gtest symbols in cudftestutil ([#16546](https://github.com/rapidsai/cudf/pull/16546)) [@robertmaynard](https://github.com/robertmaynard) +- Update the java code to properly deal with lists being returned as strings ([#16536](https://github.com/rapidsai/cudf/pull/16536)) [@revans2](https://github.com/revans2) +- Register `read_parquet` and `read_csv` with dask-expr ([#16535](https://github.com/rapidsai/cudf/pull/16535)) [@rjzamora](https://github.com/rjzamora) +- Change cudf::empty_like to not include offsets for empty strings columns ([#16529](https://github.com/rapidsai/cudf/pull/16529)) [@davidwendt](https://github.com/davidwendt) +- Fix DataFrame reductions with median returning scalar instead of Series ([#16527](https://github.com/rapidsai/cudf/pull/16527)) [@mroeschke](https://github.com/mroeschke) +- Allow DataFrame.sort_values(by=) to select an index level ([#16519](https://github.com/rapidsai/cudf/pull/16519)) [@mroeschke](https://github.com/mroeschke) +- Fix `date_range(start, end, freq)` when end-start is divisible by freq ([#16516](https://github.com/rapidsai/cudf/pull/16516)) [@mroeschke](https://github.com/mroeschke) +- Preserve array name in MultiIndex.from_arrays ([#16515](https://github.com/rapidsai/cudf/pull/16515)) [@mroeschke](https://github.com/mroeschke) +- Disallow indexing by selecting duplicate labels ([#16514](https://github.com/rapidsai/cudf/pull/16514)) [@mroeschke](https://github.com/mroeschke) +- Fix `.replace(Index, Index)` raising a TypeError ([#16513](https://github.com/rapidsai/cudf/pull/16513)) [@mroeschke](https://github.com/mroeschke) +- Check index bounds in compact protocol reader. ([#16493](https://github.com/rapidsai/cudf/pull/16493)) [@bdice](https://github.com/bdice) +- Fix build failures with GCC 13 ([#16488](https://github.com/rapidsai/cudf/pull/16488)) [@PointKernel](https://github.com/PointKernel) +- Fix all-empty input column for strings split APIs ([#16466](https://github.com/rapidsai/cudf/pull/16466)) [@davidwendt](https://github.com/davidwendt) +- Fix segmented-sort overlapped input/output indices ([#16463](https://github.com/rapidsai/cudf/pull/16463)) [@davidwendt](https://github.com/davidwendt) +- Fix merge conflict for auto merge 16447 ([#16449](https://github.com/rapidsai/cudf/pull/16449)) [@davidwendt](https://github.com/davidwendt) + +## 📖 Documentation + +- Fix links in Dask cuDF documentation ([#16929](https://github.com/rapidsai/cudf/pull/16929)) [@rjzamora](https://github.com/rjzamora) +- Improve aggregation documentation ([#16822](https://github.com/rapidsai/cudf/pull/16822)) [@PointKernel](https://github.com/PointKernel) +- Add best practices page to Dask cuDF docs ([#16821](https://github.com/rapidsai/cudf/pull/16821)) [@rjzamora](https://github.com/rjzamora) +- [DOC] Update Pylibcudf doc strings ([#16810](https://github.com/rapidsai/cudf/pull/16810)) [@Matt711](https://github.com/Matt711) +- Recommending `miniforge` for conda install ([#16782](https://github.com/rapidsai/cudf/pull/16782)) [@mmccarty](https://github.com/mmccarty) +- Add labeling pylibcudf doc pages ([#16779](https://github.com/rapidsai/cudf/pull/16779)) [@mroeschke](https://github.com/mroeschke) +- Migrate dask-cudf README improvements to dask-cudf sphinx docs ([#16765](https://github.com/rapidsai/cudf/pull/16765)) [@rjzamora](https://github.com/rjzamora) +- [DOC] Remove out of date section from cudf.pandas docs ([#16697](https://github.com/rapidsai/cudf/pull/16697)) [@Matt711](https://github.com/Matt711) +- Add performance tips to cudf.pandas FAQ. ([#16693](https://github.com/rapidsai/cudf/pull/16693)) [@bdice](https://github.com/bdice) +- Update documentation for Dask cuDF ([#16671](https://github.com/rapidsai/cudf/pull/16671)) [@rjzamora](https://github.com/rjzamora) +- Add missing pylibcudf strings docs ([#16471](https://github.com/rapidsai/cudf/pull/16471)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- DOC: Refresh pylibcudf guide ([#15856](https://github.com/rapidsai/cudf/pull/15856)) [@lithomas1](https://github.com/lithomas1) + +## 🚀 New Features + +- Build `cudf-polars` with `build.sh` ([#16898](https://github.com/rapidsai/cudf/pull/16898)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Add polars to "all" dependency list. ([#16875](https://github.com/rapidsai/cudf/pull/16875)) [@bdice](https://github.com/bdice) +- nvCOMP GZIP integration ([#16770](https://github.com/rapidsai/cudf/pull/16770)) [@vuule](https://github.com/vuule) +- [FEA] Add support for `cudf.NamedAgg` ([#16744](https://github.com/rapidsai/cudf/pull/16744)) [@Matt711](https://github.com/Matt711) +- Add experimental `filesystem="arrow"` support in `dask_cudf.read_parquet` ([#16684](https://github.com/rapidsai/cudf/pull/16684)) [@rjzamora](https://github.com/rjzamora) +- Relax Arrow pin ([#16681](https://github.com/rapidsai/cudf/pull/16681)) [@vyasr](https://github.com/vyasr) +- Add libcudf wrappers around current_device_resource functions. ([#16679](https://github.com/rapidsai/cudf/pull/16679)) [@harrism](https://github.com/harrism) +- Move NDS-H examples into benchmarks ([#16663](https://github.com/rapidsai/cudf/pull/16663)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- [FEA] Add third-party library integration testing of cudf.pandas to cudf ([#16645](https://github.com/rapidsai/cudf/pull/16645)) [@Matt711](https://github.com/Matt711) +- Make isinstance check pass for proxy ndarrays ([#16601](https://github.com/rapidsai/cudf/pull/16601)) [@Matt711](https://github.com/Matt711) +- [FEA] Add an environment variable to fail on fallback in `cudf.pandas` ([#16562](https://github.com/rapidsai/cudf/pull/16562)) [@Matt711](https://github.com/Matt711) +- [FEA] Add support for `cudf.unique` ([#16554](https://github.com/rapidsai/cudf/pull/16554)) [@Matt711](https://github.com/Matt711) +- [FEA] Support named aggregations in `df.groupby().agg()` ([#16528](https://github.com/rapidsai/cudf/pull/16528)) [@Matt711](https://github.com/Matt711) +- Change IPv4 convert APIs to support UINT32 instead of INT64 ([#16489](https://github.com/rapidsai/cudf/pull/16489)) [@davidwendt](https://github.com/davidwendt) +- enable list to be forced as string in JSON reader. ([#16472](https://github.com/rapidsai/cudf/pull/16472)) [@karthikeyann](https://github.com/karthikeyann) +- Remove cuDF dependency from pylibcudf column from_device tests ([#16441](https://github.com/rapidsai/cudf/pull/16441)) [@brandon-b-miller](https://github.com/brandon-b-miller) +- Enable cudf.pandas REPL and -c command support ([#16428](https://github.com/rapidsai/cudf/pull/16428)) [@bdice](https://github.com/bdice) +- Setup pylibcudf package ([#16299](https://github.com/rapidsai/cudf/pull/16299)) [@lithomas1](https://github.com/lithomas1) +- Add a libcudf/thrust-based TPC-H derived datagen ([#16294](https://github.com/rapidsai/cudf/pull/16294)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Make proxy NumPy arrays pass isinstance check in `cudf.pandas` ([#16286](https://github.com/rapidsai/cudf/pull/16286)) [@Matt711](https://github.com/Matt711) +- Add skiprows and nrows to parquet reader ([#16214](https://github.com/rapidsai/cudf/pull/16214)) [@lithomas1](https://github.com/lithomas1) +- Upgrade to nvcomp 4.0.1 ([#16076](https://github.com/rapidsai/cudf/pull/16076)) [@vuule](https://github.com/vuule) +- Migrate ORC reader to pylibcudf ([#16042](https://github.com/rapidsai/cudf/pull/16042)) [@lithomas1](https://github.com/lithomas1) +- JSON reader validation of values ([#15968](https://github.com/rapidsai/cudf/pull/15968)) [@karthikeyann](https://github.com/karthikeyann) +- Implement exposed null mask APIs in pylibcudf ([#15908](https://github.com/rapidsai/cudf/pull/15908)) [@charlesbluca](https://github.com/charlesbluca) +- Word-based nvtext::minhash function ([#15368](https://github.com/rapidsai/cudf/pull/15368)) [@davidwendt](https://github.com/davidwendt) + +## 🛠️ Improvements + +- Make tests deterministic ([#16910](https://github.com/rapidsai/cudf/pull/16910)) [@galipremsagar](https://github.com/galipremsagar) +- Update update-version.sh to use packaging lib ([#16891](https://github.com/rapidsai/cudf/pull/16891)) [@AyodeAwe](https://github.com/AyodeAwe) +- Pin polars for 24.10 and update polars test suite xfail list ([#16886](https://github.com/rapidsai/cudf/pull/16886)) [@wence-](https://github.com/wence-) +- Add in support for setting delim when parsing JSON through java ([#16867) (#16880](https://github.com/rapidsai/cudf/pull/16867) (#16880)) [@revans2](https://github.com/revans2) +- Remove unnecessary flag from build.sh ([#16879](https://github.com/rapidsai/cudf/pull/16879)) [@vyasr](https://github.com/vyasr) +- Ignore numba warning specific to ARM runners ([#16872](https://github.com/rapidsai/cudf/pull/16872)) [@galipremsagar](https://github.com/galipremsagar) +- Display deltas for `cudf.pandas` test summary ([#16864](https://github.com/rapidsai/cudf/pull/16864)) [@galipremsagar](https://github.com/galipremsagar) +- Switch to using native `traceback` ([#16851](https://github.com/rapidsai/cudf/pull/16851)) [@galipremsagar](https://github.com/galipremsagar) +- JSON tree algorithm code reorg ([#16836](https://github.com/rapidsai/cudf/pull/16836)) [@karthikeyann](https://github.com/karthikeyann) +- Add string.repeats API to pylibcudf ([#16834](https://github.com/rapidsai/cudf/pull/16834)) [@mroeschke](https://github.com/mroeschke) +- Use CI workflow branch 'branch-24.10' again ([#16832](https://github.com/rapidsai/cudf/pull/16832)) [@jameslamb](https://github.com/jameslamb) +- Rename the NDS-H benchmark binaries ([#16831](https://github.com/rapidsai/cudf/pull/16831)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Add string.findall APIs to pylibcudf ([#16825](https://github.com/rapidsai/cudf/pull/16825)) [@mroeschke](https://github.com/mroeschke) +- Add string.extract APIs to pylibcudf ([#16823](https://github.com/rapidsai/cudf/pull/16823)) [@mroeschke](https://github.com/mroeschke) +- use get-pr-info from nv-gha-runners ([#16819](https://github.com/rapidsai/cudf/pull/16819)) [@AyodeAwe](https://github.com/AyodeAwe) +- Add string.contains APIs to pylibcudf ([#16814](https://github.com/rapidsai/cudf/pull/16814)) [@mroeschke](https://github.com/mroeschke) +- Forward-merge branch-24.08 to branch-24.10 ([#16813](https://github.com/rapidsai/cudf/pull/16813)) [@bdice](https://github.com/bdice) +- Add io_type axis with default `PINNED_BUFFER` to nvbench PQ multithreaded reader ([#16809](https://github.com/rapidsai/cudf/pull/16809)) [@mhaseeb123](https://github.com/mhaseeb123) +- Update fmt (to 11.0.2) and spdlog (to 1.14.1). ([#16806](https://github.com/rapidsai/cudf/pull/16806)) [@jameslamb](https://github.com/jameslamb) +- Add ability to set parquet row group max #rows and #bytes in java ([#16805](https://github.com/rapidsai/cudf/pull/16805)) [@pmattione-nvidia](https://github.com/pmattione-nvidia) +- Add in option for Java JSON APIs to do column pruning in CUDF ([#16796](https://github.com/rapidsai/cudf/pull/16796)) [@revans2](https://github.com/revans2) +- Support drop_first in get_dummies ([#16795](https://github.com/rapidsai/cudf/pull/16795)) [@mroeschke](https://github.com/mroeschke) +- Exposed stream-ordering to join API ([#16793](https://github.com/rapidsai/cudf/pull/16793)) [@lamarrr](https://github.com/lamarrr) +- Add string.attributes APIs to pylibcudf ([#16785](https://github.com/rapidsai/cudf/pull/16785)) [@mroeschke](https://github.com/mroeschke) +- Java: Make ColumnVector.fromViewWithContiguousAllocation public ([#16784](https://github.com/rapidsai/cudf/pull/16784)) [@jlowe](https://github.com/jlowe) +- Add partitioning APIs to pylibcudf ([#16781](https://github.com/rapidsai/cudf/pull/16781)) [@mroeschke](https://github.com/mroeschke) +- Optimization of tdigest merge aggregation. ([#16780](https://github.com/rapidsai/cudf/pull/16780)) [@nvdbaranec](https://github.com/nvdbaranec) +- use libkvikio wheels in wheel builds ([#16778](https://github.com/rapidsai/cudf/pull/16778)) [@jameslamb](https://github.com/jameslamb) +- Exposed stream-ordering to datetime API ([#16774](https://github.com/rapidsai/cudf/pull/16774)) [@lamarrr](https://github.com/lamarrr) +- Add io/timezone APIs to pylibcudf ([#16771](https://github.com/rapidsai/cudf/pull/16771)) [@mroeschke](https://github.com/mroeschke) +- Remove `MultiIndex._poplevel` inplace implementation. ([#16767](https://github.com/rapidsai/cudf/pull/16767)) [@mroeschke](https://github.com/mroeschke) +- allow pandas patch version to float in cudf-pandas unit tests ([#16763](https://github.com/rapidsai/cudf/pull/16763)) [@jameslamb](https://github.com/jameslamb) +- Simplify the nvCOMP adapter ([#16762](https://github.com/rapidsai/cudf/pull/16762)) [@vuule](https://github.com/vuule) +- Add labeling APIs to pylibcudf ([#16761](https://github.com/rapidsai/cudf/pull/16761)) [@mroeschke](https://github.com/mroeschke) +- Add transform APIs to pylibcudf ([#16760](https://github.com/rapidsai/cudf/pull/16760)) [@mroeschke](https://github.com/mroeschke) +- Add a benchmark to study Parquet reader's performance for wide tables ([#16751](https://github.com/rapidsai/cudf/pull/16751)) [@mhaseeb123](https://github.com/mhaseeb123) +- Change the Parquet writer's `default_row_group_size_bytes` from 128MB to inf ([#16750](https://github.com/rapidsai/cudf/pull/16750)) [@mhaseeb123](https://github.com/mhaseeb123) +- Add transpose API to pylibcudf ([#16749](https://github.com/rapidsai/cudf/pull/16749)) [@mroeschke](https://github.com/mroeschke) +- Add support for Python 3.12, update Kafka dependencies to 2.5.x ([#16745](https://github.com/rapidsai/cudf/pull/16745)) [@jameslamb](https://github.com/jameslamb) +- Generate GPU vs CPU usage metrics per pytest file in pandas testsuite for `cudf.pandas` ([#16739](https://github.com/rapidsai/cudf/pull/16739)) [@galipremsagar](https://github.com/galipremsagar) +- Refactor cudf pandas integration tests CI ([#16728](https://github.com/rapidsai/cudf/pull/16728)) [@Matt711](https://github.com/Matt711) +- Remove ERROR_TEST gtest from libcudf ([#16722](https://github.com/rapidsai/cudf/pull/16722)) [@davidwendt](https://github.com/davidwendt) +- Use Series._from_column more consistently to avoid validation ([#16716](https://github.com/rapidsai/cudf/pull/16716)) [@mroeschke](https://github.com/mroeschke) +- remove some unnecessary libcudf nightly builds ([#16714](https://github.com/rapidsai/cudf/pull/16714)) [@jameslamb](https://github.com/jameslamb) +- Remove xfail from torch-cudf.pandas integration test ([#16705](https://github.com/rapidsai/cudf/pull/16705)) [@Matt711](https://github.com/Matt711) +- Add return type annotations to MultiIndex ([#16696](https://github.com/rapidsai/cudf/pull/16696)) [@mroeschke](https://github.com/mroeschke) +- Add type annotations to Index classes, utilize _from_column more ([#16695](https://github.com/rapidsai/cudf/pull/16695)) [@mroeschke](https://github.com/mroeschke) +- Have interval_range use IntervalIndex.from_breaks, remove column_empty_same_mask ([#16694](https://github.com/rapidsai/cudf/pull/16694)) [@mroeschke](https://github.com/mroeschke) +- Increase timeouts for couple of tests ([#16692](https://github.com/rapidsai/cudf/pull/16692)) [@galipremsagar](https://github.com/galipremsagar) +- Replace raw device_memory_resource pointer in pylibcudf Cython ([#16674](https://github.com/rapidsai/cudf/pull/16674)) [@harrism](https://github.com/harrism) +- switch from typing.Callable to collections.abc.Callable ([#16670](https://github.com/rapidsai/cudf/pull/16670)) [@jameslamb](https://github.com/jameslamb) +- Update rapidsai/pre-commit-hooks ([#16669](https://github.com/rapidsai/cudf/pull/16669)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Multi-file and Parquet-aware prefetching from remote storage ([#16657](https://github.com/rapidsai/cudf/pull/16657)) [@rjzamora](https://github.com/rjzamora) +- Access Frame attributes instead of ColumnAccessor attributes when available ([#16652](https://github.com/rapidsai/cudf/pull/16652)) [@mroeschke](https://github.com/mroeschke) +- Use non-mangled type names in nvbench output ([#16649](https://github.com/rapidsai/cudf/pull/16649)) [@davidwendt](https://github.com/davidwendt) +- Add pylibcudf build dir in build.sh for `clean` ([#16648](https://github.com/rapidsai/cudf/pull/16648)) [@galipremsagar](https://github.com/galipremsagar) +- Prune workflows based on changed files ([#16642](https://github.com/rapidsai/cudf/pull/16642)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Remove arrow dependency ([#16640](https://github.com/rapidsai/cudf/pull/16640)) [@vyasr](https://github.com/vyasr) +- Support reading multiple PQ sources with mismatching nullability for columns ([#16639](https://github.com/rapidsai/cudf/pull/16639)) [@mhaseeb123](https://github.com/mhaseeb123) +- Drop Python 3.9 support ([#16637](https://github.com/rapidsai/cudf/pull/16637)) [@jameslamb](https://github.com/jameslamb) +- Support DecimalDtype meta in dask_cudf ([#16634](https://github.com/rapidsai/cudf/pull/16634)) [@mroeschke](https://github.com/mroeschke) +- Add `num_multiprocessors` utility ([#16628](https://github.com/rapidsai/cudf/pull/16628)) [@PointKernel](https://github.com/PointKernel) +- Annotate `ColumnAccessor._data` labels as `Hashable` ([#16623](https://github.com/rapidsai/cudf/pull/16623)) [@mroeschke](https://github.com/mroeschke) +- Remove build_categorical_column in favor of CategoricalColumn constructor ([#16617](https://github.com/rapidsai/cudf/pull/16617)) [@mroeschke](https://github.com/mroeschke) +- Move apply_boolean_mask benchmark to nvbench ([#16616](https://github.com/rapidsai/cudf/pull/16616)) [@davidwendt](https://github.com/davidwendt) +- Revise `get_reader_filepath_or_buffer` to handle a list of data sources ([#16613](https://github.com/rapidsai/cudf/pull/16613)) [@rjzamora](https://github.com/rjzamora) +- do not install cudf in cudf_polars wheel tests ([#16612](https://github.com/rapidsai/cudf/pull/16612)) [@jameslamb](https://github.com/jameslamb) +- remove streamz git dependency, standardize build dependency names, consolidate some dependency lists ([#16611](https://github.com/rapidsai/cudf/pull/16611)) [@jameslamb](https://github.com/jameslamb) +- Fix C++ and Cython io types ([#16610](https://github.com/rapidsai/cudf/pull/16610)) [@vyasr](https://github.com/vyasr) +- Remove arrow_io_source ([#16607](https://github.com/rapidsai/cudf/pull/16607)) [@vyasr](https://github.com/vyasr) +- Remove thrust::optional from expression evaluator ([#16604](https://github.com/rapidsai/cudf/pull/16604)) [@bdice](https://github.com/bdice) +- Add stricter typing and validation to ColumnAccessor ([#16602](https://github.com/rapidsai/cudf/pull/16602)) [@mroeschke](https://github.com/mroeschke) +- make more use of YAML anchors in dependencies.yaml ([#16597](https://github.com/rapidsai/cudf/pull/16597)) [@jameslamb](https://github.com/jameslamb) +- Enable testing `cudf.pandas` unit tests for all minor versions of pandas ([#16595](https://github.com/rapidsai/cudf/pull/16595)) [@galipremsagar](https://github.com/galipremsagar) +- Extend the Parquet writer's dictionary encoding benchmark. ([#16591](https://github.com/rapidsai/cudf/pull/16591)) [@mhaseeb123](https://github.com/mhaseeb123) +- Remove legacy Arrow interop APIs ([#16590](https://github.com/rapidsai/cudf/pull/16590)) [@vyasr](https://github.com/vyasr) +- Remove NativeFile support from cudf Python ([#16589](https://github.com/rapidsai/cudf/pull/16589)) [@vyasr](https://github.com/vyasr) +- Add build job for pylibcudf ([#16587](https://github.com/rapidsai/cudf/pull/16587)) [@vyasr](https://github.com/vyasr) +- Add `public` qualifier for some member functions in Java class `Schema` ([#16583](https://github.com/rapidsai/cudf/pull/16583)) [@ttnghia](https://github.com/ttnghia) +- Enable gtests previously disabled for compute-sanitizer bug ([#16581](https://github.com/rapidsai/cudf/pull/16581)) [@davidwendt](https://github.com/davidwendt) +- [FEA] Add filesystem argument to `cudf.read_parquet` ([#16577](https://github.com/rapidsai/cudf/pull/16577)) [@rjzamora](https://github.com/rjzamora) +- Ensure size is always passed to NumericalColumn ([#16576](https://github.com/rapidsai/cudf/pull/16576)) [@mroeschke](https://github.com/mroeschke) +- standardize and consolidate wheel installations in testing scripts ([#16575](https://github.com/rapidsai/cudf/pull/16575)) [@jameslamb](https://github.com/jameslamb) +- Performance improvement for strings::slice for wide strings ([#16574](https://github.com/rapidsai/cudf/pull/16574)) [@davidwendt](https://github.com/davidwendt) +- Add `ToCudfBackend` expression to dask-cudf ([#16573](https://github.com/rapidsai/cudf/pull/16573)) [@rjzamora](https://github.com/rjzamora) +- CI: Test against old versions of key dependencies ([#16570](https://github.com/rapidsai/cudf/pull/16570)) [@seberg](https://github.com/seberg) +- Replace `NativeFile` dependency in dask-cudf Parquet reader ([#16569](https://github.com/rapidsai/cudf/pull/16569)) [@rjzamora](https://github.com/rjzamora) +- Align public utility function signatures with pandas 2.x ([#16565](https://github.com/rapidsai/cudf/pull/16565)) [@mroeschke](https://github.com/mroeschke) +- Move libcudf reduction google-benchmarks to nvbench ([#16564](https://github.com/rapidsai/cudf/pull/16564)) [@davidwendt](https://github.com/davidwendt) +- Rework strings::slice benchmark to use nvbench ([#16563](https://github.com/rapidsai/cudf/pull/16563)) [@davidwendt](https://github.com/davidwendt) +- Reenable arrow tests ([#16556](https://github.com/rapidsai/cudf/pull/16556)) [@vyasr](https://github.com/vyasr) +- Clean up reshaping ops ([#16553](https://github.com/rapidsai/cudf/pull/16553)) [@mroeschke](https://github.com/mroeschke) +- Disallow cudf.Index accepting column in favor of ._from_column ([#16549](https://github.com/rapidsai/cudf/pull/16549)) [@mroeschke](https://github.com/mroeschke) +- Rewrite remaining Python Arrow interop conversions using the C Data Interface ([#16548](https://github.com/rapidsai/cudf/pull/16548)) [@vyasr](https://github.com/vyasr) +- [REVIEW] JSON host tree algorithms ([#16545](https://github.com/rapidsai/cudf/pull/16545)) [@shrshi](https://github.com/shrshi) +- Refactor dictionary encoding in PQ writer to migrate to the new `cuco::static_map` ([#16541](https://github.com/rapidsai/cudf/pull/16541)) [@mhaseeb123](https://github.com/mhaseeb123) +- Remove hardcoded versions from workflows. ([#16540](https://github.com/rapidsai/cudf/pull/16540)) [@bdice](https://github.com/bdice) +- Ensure comparisons with pyints and integer series always succeed ([#16532](https://github.com/rapidsai/cudf/pull/16532)) [@seberg](https://github.com/seberg) +- Remove unneeded output size parameter from internal count_matches utility ([#16531](https://github.com/rapidsai/cudf/pull/16531)) [@davidwendt](https://github.com/davidwendt) +- Remove invalid column_view usage in string-scalar-to-column function ([#16530](https://github.com/rapidsai/cudf/pull/16530)) [@davidwendt](https://github.com/davidwendt) +- Raise NotImplementedError for Series.rename that's not a scalar ([#16525](https://github.com/rapidsai/cudf/pull/16525)) [@mroeschke](https://github.com/mroeschke) +- Remove deprecated public APIs from libcudf ([#16524](https://github.com/rapidsai/cudf/pull/16524)) [@davidwendt](https://github.com/davidwendt) +- Return Interval object in pandas compat mode for IntervalIndex reductions ([#16523](https://github.com/rapidsai/cudf/pull/16523)) [@mroeschke](https://github.com/mroeschke) +- Update json normalization to take device_buffer ([#16520](https://github.com/rapidsai/cudf/pull/16520)) [@karthikeyann](https://github.com/karthikeyann) +- Rework cudf::io::text::byte_range_info class member functions ([#16518](https://github.com/rapidsai/cudf/pull/16518)) [@davidwendt](https://github.com/davidwendt) +- Remove unneeded pair-iterator benchmark ([#16511](https://github.com/rapidsai/cudf/pull/16511)) [@davidwendt](https://github.com/davidwendt) +- Update pre-commit hooks ([#16510](https://github.com/rapidsai/cudf/pull/16510)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA) +- Improve update-version.sh ([#16506](https://github.com/rapidsai/cudf/pull/16506)) [@bdice](https://github.com/bdice) +- Use tool.scikit-build.cmake.version, set scikit-build-core minimum-version ([#16503](https://github.com/rapidsai/cudf/pull/16503)) [@jameslamb](https://github.com/jameslamb) +- Pass batch size to JSON reader using environment variable ([#16502](https://github.com/rapidsai/cudf/pull/16502)) [@shrshi](https://github.com/shrshi) +- Remove a deprecated multibyte_split API ([#16501](https://github.com/rapidsai/cudf/pull/16501)) [@davidwendt](https://github.com/davidwendt) +- Add interop example for `arrow::StringViewArray` to `cudf::column` ([#16498](https://github.com/rapidsai/cudf/pull/16498)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Add keep option to distinct nvbench ([#16497](https://github.com/rapidsai/cudf/pull/16497)) [@bdice](https://github.com/bdice) +- Use more idomatic cudf APIs in dask_cudf meta generation ([#16487](https://github.com/rapidsai/cudf/pull/16487)) [@mroeschke](https://github.com/mroeschke) +- Fix typo in dispatch_row_equal. ([#16473](https://github.com/rapidsai/cudf/pull/16473)) [@bdice](https://github.com/bdice) +- Use explicit construction of column subclass instead of `build_column` when type is known ([#16470](https://github.com/rapidsai/cudf/pull/16470)) [@mroeschke](https://github.com/mroeschke) +- Move exception handler into pylibcudf from cudf ([#16468](https://github.com/rapidsai/cudf/pull/16468)) [@lithomas1](https://github.com/lithomas1) +- Make StructColumn.__init__ strict ([#16467](https://github.com/rapidsai/cudf/pull/16467)) [@mroeschke](https://github.com/mroeschke) +- Make ListColumn.__init__ strict ([#16465](https://github.com/rapidsai/cudf/pull/16465)) [@mroeschke](https://github.com/mroeschke) +- Make Timedelta/DatetimeColumn.__init__ strict ([#16464](https://github.com/rapidsai/cudf/pull/16464)) [@mroeschke](https://github.com/mroeschke) +- Make NumericalColumn.__init__ strict ([#16457](https://github.com/rapidsai/cudf/pull/16457)) [@mroeschke](https://github.com/mroeschke) +- Make CategoricalColumn.__init__ strict ([#16456](https://github.com/rapidsai/cudf/pull/16456)) [@mroeschke](https://github.com/mroeschke) +- Disallow cudf.Series to accept column in favor of `._from_column` ([#16454](https://github.com/rapidsai/cudf/pull/16454)) [@mroeschke](https://github.com/mroeschke) +- Expose `stream` param in transform APIs ([#16452](https://github.com/rapidsai/cudf/pull/16452)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Add upper bound pin for polars ([#16442](https://github.com/rapidsai/cudf/pull/16442)) [@wence-](https://github.com/wence-) +- Make (Indexed)Frame.__init__ require data (and index) ([#16430](https://github.com/rapidsai/cudf/pull/16430)) [@mroeschke](https://github.com/mroeschke) +- Add Java APIs to copy column data to host asynchronously ([#16429](https://github.com/rapidsai/cudf/pull/16429)) [@jlowe](https://github.com/jlowe) +- Update docs of the TPC-H derived examples ([#16423](https://github.com/rapidsai/cudf/pull/16423)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Use RMM adaptor constructors instead of factories. ([#16414](https://github.com/rapidsai/cudf/pull/16414)) [@bdice](https://github.com/bdice) +- Align ewm APIs with pandas 2.x ([#16413](https://github.com/rapidsai/cudf/pull/16413)) [@mroeschke](https://github.com/mroeschke) +- Remove checking for specific tests in memcheck script ([#16412](https://github.com/rapidsai/cudf/pull/16412)) [@davidwendt](https://github.com/davidwendt) +- Add stream parameter to reshape APIs ([#16410](https://github.com/rapidsai/cudf/pull/16410)) [@davidwendt](https://github.com/davidwendt) +- Align groupby APIs with pandas 2.x ([#16403](https://github.com/rapidsai/cudf/pull/16403)) [@mroeschke](https://github.com/mroeschke) +- Align misc DataFrame and MultiIndex methods with pandas 2.x ([#16402](https://github.com/rapidsai/cudf/pull/16402)) [@mroeschke](https://github.com/mroeschke) +- update some branch references in GitHub Actions configs ([#16397](https://github.com/rapidsai/cudf/pull/16397)) [@jameslamb](https://github.com/jameslamb) +- Support reading matching projected and filter cols from Parquet files with otherwise mismatched schemas ([#16394](https://github.com/rapidsai/cudf/pull/16394)) [@mhaseeb123](https://github.com/mhaseeb123) +- Merge branch-24.08 into branch-24.10 ([#16393](https://github.com/rapidsai/cudf/pull/16393)) [@jameslamb](https://github.com/jameslamb) +- Add query 10 to the TPC-H suite ([#16392](https://github.com/rapidsai/cudf/pull/16392)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Use `make_host_vector` instead of `make_std_vector` to facilitate pinned memory optimizations ([#16386](https://github.com/rapidsai/cudf/pull/16386)) [@vuule](https://github.com/vuule) +- Fix some issues with deprecated / removed cccl facilities ([#16377](https://github.com/rapidsai/cudf/pull/16377)) [@miscco](https://github.com/miscco) +- Align IntervalIndex APIs with pandas 2.x ([#16371](https://github.com/rapidsai/cudf/pull/16371)) [@mroeschke](https://github.com/mroeschke) +- Align CategoricalIndex APIs with pandas 2.x ([#16369](https://github.com/rapidsai/cudf/pull/16369)) [@mroeschke](https://github.com/mroeschke) +- Align TimedeltaIndex APIs with pandas 2.x ([#16368](https://github.com/rapidsai/cudf/pull/16368)) [@mroeschke](https://github.com/mroeschke) +- Align DatetimeIndex APIs with pandas 2.x ([#16367](https://github.com/rapidsai/cudf/pull/16367)) [@mroeschke](https://github.com/mroeschke) +- fix [tool.setuptools] reference in custreamz config ([#16365](https://github.com/rapidsai/cudf/pull/16365)) [@jameslamb](https://github.com/jameslamb) +- Align Index APIs with pandas 2.x ([#16361](https://github.com/rapidsai/cudf/pull/16361)) [@mroeschke](https://github.com/mroeschke) +- Rebuild for & Support NumPy 2 ([#16300](https://github.com/rapidsai/cudf/pull/16300)) [@jakirkham](https://github.com/jakirkham) +- Add `stream` param to stream compaction APIs ([#16295](https://github.com/rapidsai/cudf/pull/16295)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub) +- Added batch memset to memset data and validity buffers in parquet reader ([#16281](https://github.com/rapidsai/cudf/pull/16281)) [@sdrp713](https://github.com/sdrp713) +- Deduplicate decimal32/decimal64 to decimal128 conversion function ([#16236](https://github.com/rapidsai/cudf/pull/16236)) [@mhaseeb123](https://github.com/mhaseeb123) +- Refactor mixed_semi_join using cuco::static_set ([#16230](https://github.com/rapidsai/cudf/pull/16230)) [@srinivasyadav18](https://github.com/srinivasyadav18) +- Improve performance of hash_character_ngrams using warp-per-string kernel ([#16212](https://github.com/rapidsai/cudf/pull/16212)) [@davidwendt](https://github.com/davidwendt) +- Add environment variable to log cudf.pandas fallback calls ([#16161](https://github.com/rapidsai/cudf/pull/16161)) [@mroeschke](https://github.com/mroeschke) +- Add libcudf example with large strings ([#15983](https://github.com/rapidsai/cudf/pull/15983)) [@davidwendt](https://github.com/davidwendt) +- JSON tree algorithms refactor I: CSR data structure for column tree ([#15979](https://github.com/rapidsai/cudf/pull/15979)) [@shrshi](https://github.com/shrshi) +- Support multiple new-line characters in regex APIs ([#15961](https://github.com/rapidsai/cudf/pull/15961)) [@davidwendt](https://github.com/davidwendt) +- adding wheel build for libcudf ([#15483](https://github.com/rapidsai/cudf/pull/15483)) [@msarahan](https://github.com/msarahan) +- Replace usages of `thrust::optional` with `std::optional` ([#15091](https://github.com/rapidsai/cudf/pull/15091)) [@miscco](https://github.com/miscco) + # cudf 24.08.00 (7 Aug 2024) ## 🚨 Breaking Changes diff --git a/README.md b/README.md index 1ab6a2d7457..8f8c2adac2f 100644 --- a/README.md +++ b/README.md @@ -79,17 +79,17 @@ pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12 ### Conda -cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects/miniconda/en/latest/) or the full [Anaconda distribution](https://www.anaconda.com/download) from the `rapidsai` channel: +cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge/miniforge)) from the `rapidsai` channel: ```bash conda install -c rapidsai -c conda-forge -c nvidia \ - cudf=24.08 python=3.11 cuda-version=12.5 + cudf=24.10 python=3.12 cuda-version=12.5 ``` We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD of our latest development branch. -Note: cuDF is supported only on Linux, and with Python versions 3.9 and later. +Note: cuDF is supported only on Linux, and with Python versions 3.10 and later. See the [RAPIDS installation guide](https://docs.rapids.ai/install) for more OS and version info. diff --git a/VERSION b/VERSION index 1a039311a74..7c7ba04436f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -24.08.03 +24.10.00 diff --git a/build.sh b/build.sh index 52bb1e64d16..56359eae235 100755 --- a/build.sh +++ b/build.sh @@ -17,12 +17,14 @@ ARGS=$* # script, and that this script resides in the repo dir! REPODIR=$(cd $(dirname $0); pwd) -VALIDARGS="clean libcudf cudf cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings" -HELP="$0 [clean] [libcudf] [cudf] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"\\\"] +VALIDARGS="clean libcudf pylibcudf cudf cudf_polars cudfjar dask_cudf benchmarks tests libcudf_kafka cudf_kafka custreamz -v -g -n --pydevelop -l --allgpuarch --disable_nvtx --opensource_nvcomp --show_depr_warn --ptds -h --build_metrics --incl_cache_stats --disable_large_strings" +HELP="$0 [clean] [libcudf] [pylibcudf] [cudf] [cudf_polars] [cudfjar] [dask_cudf] [benchmarks] [tests] [libcudf_kafka] [cudf_kafka] [custreamz] [-v] [-g] [-n] [-h] [--cmake-args=\\\"\\\"] clean - remove all existing build artifacts and configuration (start over) libcudf - build the cudf C++ code only + pylibcudf - build the pylibcudf Python package cudf - build the cudf Python package + cudf_polars - build the cudf_polars Python package cudfjar - build cudf JAR with static libcudf using devtoolset toolchain dask_cudf - build the dask_cudf Python package benchmarks - build benchmarks @@ -53,10 +55,11 @@ KAFKA_LIB_BUILD_DIR=${KAFKA_LIB_BUILD_DIR:=${REPODIR}/cpp/libcudf_kafka/build} CUDF_KAFKA_BUILD_DIR=${REPODIR}/python/cudf_kafka/build CUDF_BUILD_DIR=${REPODIR}/python/cudf/build DASK_CUDF_BUILD_DIR=${REPODIR}/python/dask_cudf/build +PYLIBCUDF_BUILD_DIR=${REPODIR}/python/pylibcudf/build CUSTREAMZ_BUILD_DIR=${REPODIR}/python/custreamz/build CUDF_JAR_JAVA_BUILD_DIR="$REPODIR/java/target" -BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR}" +BUILD_DIRS="${LIB_BUILD_DIR} ${CUDF_BUILD_DIR} ${DASK_CUDF_BUILD_DIR} ${KAFKA_LIB_BUILD_DIR} ${CUDF_KAFKA_BUILD_DIR} ${CUSTREAMZ_BUILD_DIR} ${CUDF_JAR_JAVA_BUILD_DIR} ${PYLIBCUDF_BUILD_DIR}" # Set defaults for vars modified by flags to this script VERBOSE_FLAG="" @@ -237,11 +240,6 @@ if hasArg --pydevelop; then PYTHON_ARGS_FOR_INSTALL="${PYTHON_ARGS_FOR_INSTALL} -e" fi -# Append `-DFIND_CUDF_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. -if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUDF_CPP"* ]]; then - EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DFIND_CUDF_CPP=ON" -fi - if hasArg --disable_large_strings; then BUILD_DISABLE_LARGE_STRINGS="ON" fi @@ -268,7 +266,7 @@ fi ################################################################################ # Configure, build, and install libcudf -if buildAll || hasArg libcudf || hasArg cudf || hasArg cudfjar; then +if buildAll || hasArg libcudf || hasArg pylibcudf || hasArg cudf || hasArg cudfjar; then if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then CUDF_CMAKE_CUDA_ARCHITECTURES="${CUDF_CMAKE_CUDA_ARCHITECTURES:-NATIVE}" if [[ "$CUDF_CMAKE_CUDA_ARCHITECTURES" == "NATIVE" ]]; then @@ -340,6 +338,14 @@ if buildAll || hasArg libcudf; then fi fi +# Build and install the pylibcudf Python package +if buildAll || hasArg pylibcudf; then + + cd ${REPODIR}/python/pylibcudf + SKBUILD_CMAKE_ARGS="-DCMAKE_PREFIX_PATH=${INSTALL_PREFIX};-DCMAKE_LIBRARY_PATH=${LIBCUDF_BUILD_DIR};-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES};${EXTRA_CMAKE_ARGS}" \ + python ${PYTHON_ARGS_FOR_INSTALL} . +fi + # Build and install the cudf Python package if buildAll || hasArg cudf; then @@ -348,6 +354,12 @@ if buildAll || hasArg cudf; then python ${PYTHON_ARGS_FOR_INSTALL} . fi +# Build and install the cudf_polars Python package +if buildAll || hasArg cudf_polars; then + + cd ${REPODIR}/python/cudf_polars + python ${PYTHON_ARGS_FOR_INSTALL} . +fi # Build and install the dask_cudf Python package if buildAll || hasArg dask_cudf; then diff --git a/ci/build_docs.sh b/ci/build_docs.sh index 14dc7a59048..c67d127e635 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -29,7 +29,7 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python) rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ --channel "${PYTHON_CHANNEL}" \ - libcudf cudf dask-cudf + libcudf pylibcudf cudf dask-cudf export RAPIDS_DOCS_DIR="$(mktemp -d)" diff --git a/ci/build_python.sh b/ci/build_python.sh index 79e09432779..2e3f70ba767 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -22,9 +22,16 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp) # TODO: Remove `--no-test` flag once importing on a CPU # node works correctly # With boa installed conda build forwards to the boa builder + +RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ + --no-test \ + --channel "${CPP_CHANNEL}" \ + conda/recipes/pylibcudf + RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ + --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ conda/recipes/cudf RAPIDS_PACKAGE_VERSION=$(head -1 ./VERSION) rapids-conda-retry mambabuild \ diff --git a/ci/build_wheel_cudf.sh b/ci/build_wheel_cudf.sh index 1b563bc499c..fb93b06dbe2 100755 --- a/ci/build_wheel_cudf.sh +++ b/ci/build_wheel_cudf.sh @@ -5,12 +5,25 @@ set -euo pipefail package_dir="python/cudf" -export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON" +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -./ci/build_wheel.sh ${package_dir} +# Downloads libcudf and pylibcudf wheels from this current build, +# then ensures 'cudf' wheel builds always use the 'libcudf' and 'pylibcudf' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python /tmp/pylibcudf_dist +echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt +echo "pylibcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/pylibcudf_dist/pylibcudf_*.whl)" >> /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* +./ci/build_wheel.sh ${package_dir} +python -m auditwheel repair \ + --exclude libcudf.so \ + --exclude libnvcomp.so \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* -RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/build_wheel_libcudf.sh b/ci/build_wheel_libcudf.sh new file mode 100755 index 00000000000..8975381ceba --- /dev/null +++ b/ci/build_wheel_libcudf.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Copyright (c) 2023-2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/libcudf" + +./ci/build_wheel.sh ${package_dir} + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +mkdir -p ${package_dir}/final_dist +python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/* + +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist diff --git a/ci/build_wheel_pylibcudf.sh b/ci/build_wheel_pylibcudf.sh new file mode 100755 index 00000000000..5e9f7f8a0c4 --- /dev/null +++ b/ci/build_wheel_pylibcudf.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Copyright (c) 2023-2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir="python/pylibcudf" + +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# Downloads libcudf wheel from this current build, +# then ensures 'pylibcudf' wheel builds always use the 'libcudf' just built in the same CI run. +# +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# are used when creating the isolated build environment. +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libcudf_dist +echo "libcudf-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libcudf_dist/libcudf_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" + +./ci/build_wheel.sh ${package_dir} + +python -m auditwheel repair \ + --exclude libcudf.so \ + --exclude libnvcomp.so \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* + +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 ${package_dir}/final_dist diff --git a/ci/cudf_pandas_scripts/fetch_pandas_versions.py b/ci/cudf_pandas_scripts/fetch_pandas_versions.py new file mode 100644 index 00000000000..b6913f947e8 --- /dev/null +++ b/ci/cudf_pandas_scripts/fetch_pandas_versions.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import requests +from packaging.version import Version +from packaging.specifiers import SpecifierSet +import argparse + +def get_pandas_versions(pandas_range): + url = "https://pypi.org/pypi/pandas/json" + response = requests.get(url) + data = response.json() + versions = [Version(v) for v in data['releases']] + specifier = SpecifierSet(pandas_range.lstrip("pandas")) + matching_versions = [v for v in versions if v in specifier] + matching_minors = sorted(set(".".join((str(v.major), str(v.minor))) for v in matching_versions), key=Version) + return matching_minors + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Filter pandas versions by prefix.") + parser.add_argument("pandas_range", type=str, help="The version prefix to filter by.") + args = parser.parse_args() + + versions = get_pandas_versions(args.pandas_range) + print(','.join(versions)) diff --git a/ci/cudf_pandas_scripts/pandas-tests/diff.sh b/ci/cudf_pandas_scripts/pandas-tests/diff.sh index 6cf70a2347f..5dbb4ba991c 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/diff.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/diff.sh @@ -12,7 +12,7 @@ RAPIDS_FULL_VERSION=$(<./VERSION) rapids-logger "Github job name: ${GH_JOB_NAME}" rapids-logger "Rapids version: ${RAPIDS_FULL_VERSION}" -PY_VER="39" +PY_VER="310" MAIN_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.main-${RAPIDS_FULL_VERSION}-results.json PR_ARTIFACT=$(rapids-s3-path)cuda12_$(arch)_py${PY_VER}.pr-${RAPIDS_FULL_VERSION}-results.json diff --git a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py index 93a815838b7..485b2ac8a51 100644 --- a/ci/cudf_pandas_scripts/pandas-tests/job-summary.py +++ b/ci/cudf_pandas_scripts/pandas-tests/job-summary.py @@ -67,10 +67,33 @@ def emoji_failed(x): # convert pr_results to a pandas DataFrame and then a markdown table pr_df = pd.DataFrame.from_dict(pr_results, orient="index").sort_index() main_df = pd.DataFrame.from_dict(main_results, orient="index").sort_index() +total_usage = main_df["_slow_function_call"] + main_df["_fast_function_call"] +main_df["CPU Usage"] = ((main_df["_slow_function_call"] / total_usage) * 100.0).round(1) +main_df["GPU Usage"] = ((main_df["_fast_function_call"] / total_usage) * 100.0).round(1) + +total_usage = pr_df["_slow_function_call"] + pr_df["_fast_function_call"] +pr_df["CPU Usage"] = ((pr_df["_slow_function_call"] / total_usage) * 100.0).round(1) +pr_df["GPU Usage"] = ((pr_df["_fast_function_call"] / total_usage) * 100.0).round(1) + +cpu_usage_mean = pr_df["CPU Usage"].mean().round(2) +gpu_usage_mean = pr_df["GPU Usage"].mean().round(2) + +gpu_usage_rate_change = abs(pr_df["GPU Usage"].mean() - main_df["GPU Usage"].mean()) +pr_df["CPU Usage"] = pr_df["CPU Usage"].fillna(0) +pr_df["GPU Usage"] = pr_df["GPU Usage"].fillna(0) +main_df["CPU Usage"] = main_df["CPU Usage"].fillna(0) +main_df["GPU Usage"] = main_df["GPU Usage"].fillna(0) + diff_df = pr_df - main_df +diff_df["CPU Usage"] = diff_df["CPU Usage"].round(1).fillna(0) +diff_df["GPU Usage"] = diff_df["GPU Usage"].round(1).fillna(0) + +# Add '%' suffix to "CPU Usage" and "GPU Usage" columns +pr_df["CPU Usage"] = pr_df["CPU Usage"].astype(str) + "%" +pr_df["GPU Usage"] = pr_df["GPU Usage"].astype(str) + "%" -pr_df = pr_df[["total", "passed", "failed", "skipped"]] -diff_df = diff_df[["total", "passed", "failed", "skipped"]] +pr_df = pr_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]] +diff_df = diff_df[["total", "passed", "failed", "skipped", "CPU Usage", "GPU Usage"]] diff_df.columns = diff_df.columns + "_diff" diff_df["passed_diff"] = diff_df["passed_diff"].map(emoji_passed) diff_df["failed_diff"] = diff_df["failed_diff"].map(emoji_failed) @@ -89,12 +112,37 @@ def emoji_failed(x): "passed_diff": "Passed delta", "failed_diff": "Failed delta", "skipped_diff": "Skipped delta", + "CPU Usage_diff": "CPU Usage delta", + "GPU Usage_diff": "GPU Usage delta", } ) -df = df.sort_values(by=["Failed tests", "Skipped tests"], ascending=False) - +df = df.sort_values(by=["CPU Usage delta", "Total tests"], ascending=False) +df["CPU Usage delta"] = df["CPU Usage delta"].map(emoji_failed) +df["GPU Usage delta"] = df["GPU Usage delta"].map(emoji_passed) +df = df[ + [ + "Total tests", + "CPU Usage delta", + "GPU Usage delta", + "Passed tests", + "Failed tests", + "Skipped tests", + "CPU Usage", + "GPU Usage", + "Total delta", + "Passed delta", + "Failed delta", + "Skipped delta", + ] +] print(comment) print() +print( + f"Average GPU usage: {gpu_usage_mean}% {'an increase' if gpu_usage_rate_change > 0 else 'a decrease'} by {gpu_usage_rate_change}%" +) +print() +print(f"Average CPU usage: {cpu_usage_mean}%") +print() print("Here are the results of running the Pandas tests against this PR:") print() print(df.to_markdown()) diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh index 48ee4a05628..e5cd4436a3a 100755 --- a/ci/cudf_pandas_scripts/pandas-tests/run.sh +++ b/ci/cudf_pandas_scripts/pandas-tests/run.sh @@ -11,8 +11,17 @@ rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch and rapids rapids-logger "PR number: ${RAPIDS_REF_NAME:-"unknown"}" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep -python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,pandas-tests] + +# Download the cudf, libcudf, and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + +# echo to expand wildcard before adding `[extra]` requires for pip +python -m pip install \ + "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,pandas-tests]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh index 1c3b99953fb..f6bdc6f9484 100755 --- a/ci/cudf_pandas_scripts/run_tests.sh +++ b/ci/cudf_pandas_scripts/run_tests.sh @@ -9,13 +9,20 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"} RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"} mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}" +DEPENDENCIES_PATH="dependencies.yaml" +package_name="pandas" + +# Use grep to find the line containing the package name and version constraint +pandas_version_constraint=$(grep -oP "pandas>=\d+\.\d+,\<\d+\.\d+\.\d+dev\d+" $DEPENDENCIES_PATH) + # Function to display script usage function display_usage { - echo "Usage: $0 [--no-cudf]" + echo "Usage: $0 [--no-cudf] [pandas-version]" } # Default value for the --no-cudf option no_cudf=false +PANDAS_VERSION="" # Parse command-line arguments while [[ $# -gt 0 ]]; do @@ -25,9 +32,14 @@ while [[ $# -gt 0 ]]; do shift ;; *) - echo "Error: Unknown option $1" - display_usage - exit 1 + if [[ -z "$PANDAS_VERSION" ]]; then + PANDAS_VERSION=$1 + shift + else + echo "Error: Unknown option $1" + display_usage + exit 1 + fi ;; esac done @@ -36,13 +48,55 @@ if [ "$no_cudf" = true ]; then echo "Skipping cudf install" else RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" - RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep - python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf-pandas-tests] + + # Download the cudf, libcudf, and pylibcudf built in the previous step + RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist + RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + + echo "" > ./constraints.txt + if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then + # `test_python_cudf_pandas` constraints are for `[test]` not `[cudf-pandas-tests]` + rapids-dependency-file-generator \ + --output requirements \ + --file-key test_python_cudf_pandas \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ + | tee ./constraints.txt + fi + + python -m pip install \ + -v \ + --constraint ./constraints.txt \ + "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test,cudf-pandas-tests]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" fi +python -m pip install ipykernel +python -m ipykernel install --user --name python3 + +# The third-party integration tests are ignored because they are run nightly in seperate CI job python -m pytest -p cudf.pandas \ + --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \ --cov-config=./python/cudf/.coveragerc \ --cov=cudf \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \ --cov-report=term \ ./python/cudf/cudf_pandas_tests/ + +output=$(python ci/cudf_pandas_scripts/fetch_pandas_versions.py $pandas_version_constraint) + +# Convert the comma-separated list into an array +IFS=',' read -r -a versions <<< "$output" + +for version in "${versions[@]}"; do + echo "Installing pandas version: ${version}" + python -m pip install "numpy>=1.23,<2.0a0" "pandas==${version}.*" + python -m pytest -p cudf.pandas \ + --ignore=./python/cudf/cudf_pandas_tests/third_party_integration_tests/ \ + --cov-config=./python/cudf/.coveragerc \ + --cov=cudf \ + --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \ + --cov-report=term \ + ./python/cudf/cudf_pandas_tests/ +done diff --git a/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh new file mode 100755 index 00000000000..d44d25d658c --- /dev/null +++ b/ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +cleanup() { + rm ${TEST_DIR}/results-*.pickle +} + +trap cleanup EXIT + +runtest() { + local lib=$1 + local mode=$2 + + local plugin="" + if [ "$mode" = "cudf" ]; then + plugin="-p cudf.pandas" + fi + + pytest \ + $plugin \ + -v \ + --continue-on-collection-errors \ + --cache-clear \ + --numprocesses=${NUM_PROCESSES} \ + --dist=worksteal \ + ${TEST_DIR}/test_${lib}*.py +} + +main() { + local lib=$1 + + # generation phase + runtest ${lib} "gold" + runtest ${lib} "cudf" + + # assertion phase + pytest \ + --compare \ + -p cudf.pandas \ + -v \ + --continue-on-collection-errors \ + --cache-clear \ + --numprocesses=${NUM_PROCESSES} \ + --dist=worksteal \ + ${TEST_DIR}/test_${lib}*.py +} + +main $@ diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh new file mode 100755 index 00000000000..f8ddbaba0f3 --- /dev/null +++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# Copyright (c) 2023-2024, NVIDIA CORPORATION. + +# Common setup steps shared by Python test jobs + +set -euo pipefail + +write_output() { + local key="$1" + local value="$2" + echo "$key=$value" | tee --append "${GITHUB_OUTPUT:-/dev/null}" +} + +extract_lib_from_dependencies_yaml() { + local file=$1 + # Parse all keys in dependencies.yaml under the "files" section, + # extract all the keys that start with "test_", and extract the rest + local extracted_libs="$(yq -o json $file | jq -rc '.files | with_entries(select(.key | contains("test_"))) | keys | map(sub("^test_"; ""))')" + echo $extracted_libs +} + +main() { + local dependencies_yaml="$1" + + LIBS=$(extract_lib_from_dependencies_yaml "$dependencies_yaml") + LIBS=${LIBS#[} + LIBS=${LIBS%]} + + for lib in ${LIBS//,/ }; do + lib=$(echo "$lib" | tr -d '""') + echo "Running tests for library $lib" + + CUDA_MAJOR=$(if [ "$lib" = "tensorflow" ]; then echo "11"; else echo "12"; fi) + + . /opt/conda/etc/profile.d/conda.sh + + rapids-logger "Generate Python testing dependencies" + rapids-dependency-file-generator \ + --config "$dependencies_yaml" \ + --output conda \ + --file-key test_${lib} \ + --matrix "cuda=${CUDA_MAJOR};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml + + rapids-mamba-retry env create --yes -f env.yaml -n test + + # Temporarily allow unbound variables for conda activation. + set +u + conda activate test + set -u + + repo_root=$(git rev-parse --show-toplevel) + TEST_DIR=${repo_root}/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests + + rapids-print-env + + rapids-logger "Check GPU usage" + nvidia-smi + + EXITCODE=0 + trap "EXITCODE=1" ERR + set +e + + rapids-logger "pytest ${lib}" + + NUM_PROCESSES=8 + serial_libraries=( + "tensorflow" + ) + for serial_library in "${serial_libraries[@]}"; do + if [ "${lib}" = "${serial_library}" ]; then + NUM_PROCESSES=1 + fi + done + + TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib} + + rapids-logger "Test script exiting with value: ${EXITCODE}" + done + + exit ${EXITCODE} +} + +main "$@" diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index ad96aff3930..f73e88bc0c8 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -18,18 +18,16 @@ CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} -#Get . for next version +# Get . for next version NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}') NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} -NEXT_UCX_PY_VERSION="$(curl -sL https://version.gpuci.io/rapids/${NEXT_SHORT_TAG}).*" # Need to distutils-normalize the versions for some use cases -CURRENT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${CURRENT_SHORT_TAG}'))") -NEXT_SHORT_TAG_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_SHORT_TAG}'))") -PATCH_PEP440=$(python -c "from setuptools.extern import packaging; print(packaging.version.Version('${NEXT_PATCH}'))") -echo "current is ${CURRENT_SHORT_TAG_PEP440}, next is ${NEXT_SHORT_TAG_PEP440}" +CURRENT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${CURRENT_SHORT_TAG}'))") +NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))") +PATCH_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_PATCH}'))") echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" @@ -47,21 +45,25 @@ sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh DEPENDENCIES=( cudf cudf_kafka + cugraph + cuml custreamz dask-cuda dask-cudf kvikio + libcudf libkvikio librmm + pylibcudf rapids-dask-dependency rmm ) for DEP in "${DEPENDENCIES[@]}"; do - for FILE in dependencies.yaml conda/environments/*.yaml; do + for FILE in dependencies.yaml conda/environments/*.yaml python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml; do sed_runner "/-.* ${DEP}\(-cu[[:digit:]]\{2\}\)\{0,1\}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0/g" "${FILE}" done for FILE in python/*/pyproject.toml; do - sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" ${FILE} + sed_runner "/\"${DEP}==/ s/==.*\"/==${NEXT_SHORT_TAG_PEP440}.*,>=0.0.0a0\"/g" "${FILE}" done done @@ -77,9 +79,9 @@ sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_ # CI files for FILE in .github/workflows/*.yaml .github/workflows/*.yml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" - sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" ${FILE}; + sed_runner "s/dask-cuda.git@branch-[^\"\s]\+/dask-cuda.git@branch-${NEXT_SHORT_TAG}/g" "${FILE}" done -sed_runner "s/branch-[0-9]+\.[0-9]+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh +sed_runner "s/branch-[0-9]\+\.[0-9]\+/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_cudf_polars.sh # Java files NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT" diff --git a/ci/run_cudf_memcheck_ctests.sh b/ci/run_cudf_memcheck_ctests.sh index aacd93e3b96..653829db419 100755 --- a/ci/run_cudf_memcheck_ctests.sh +++ b/ci/run_cudf_memcheck_ctests.sh @@ -15,9 +15,6 @@ export LIBCUDF_MEMCHECK_ENABLED=1 for gt in ./*_TEST ; do test_name=$(basename ${gt}) # Run gtests with compute-sanitizer - if [[ "$test_name" == "ERROR_TEST" ]] || [[ "$test_name" == "STREAM_IDENTIFICATION_TEST" ]]; then - continue - fi echo "Running compute-sanitizer on $test_name" compute-sanitizer --tool memcheck ${gt} "$@" done diff --git a/ci/run_cudf_polars_polars_tests.sh b/ci/run_cudf_polars_polars_tests.sh index 52a827af94c..95f78f17f2f 100755 --- a/ci/run_cudf_polars_polars_tests.sh +++ b/ci/run_cudf_polars_polars_tests.sh @@ -21,7 +21,7 @@ python -m pytest \ -m "" \ -p cudf_polars.testing.plugin \ -v \ - --tb=short \ + --tb=native \ ${DESELECTED_TESTS} \ "$@" \ py-polars/tests diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh index 924fc4ef28b..bfc8fd37565 100755 --- a/ci/test_cudf_polars_polars_tests.sh +++ b/ci/test_cudf_polars_polars_tests.sh @@ -10,7 +10,7 @@ set -eou pipefail # files in cudf_polars/pylibcudf", rather than "are there changes # between upstream and this branch which touch cudf_polars/pylibcudf" # TODO: is the target branch exposed anywhere in an environment variable? -if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ]; +if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ]; then HAS_CHANGES=1 rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure" @@ -24,17 +24,16 @@ rapids-logger "Download wheels" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist -# Download the cudf built in the previous step -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep +# Download the pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep -rapids-logger "Install cudf" -python -m pip install ./local-cudf-dep/cudf*.whl +rapids-logger "Install pylibcudf" +python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl rapids-logger "Install cudf_polars" python -m pip install $(echo ./dist/cudf_polars*.whl) -# TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")') -TAG="py-1.7.0" +TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")') rapids-logger "Clone polars to ${TAG}" git clone https://github.com/pola-rs/polars.git --branch ${TAG} --depth 1 diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh index e8849588aa5..dc70661a17a 100755 --- a/ci/test_python_common.sh +++ b/ci/test_python_common.sh @@ -10,11 +10,12 @@ set -euo pipefail rapids-logger "Generate Python testing dependencies" ENV_YAML_DIR="$(mktemp -d)" - +FILE_KEY=$1 rapids-dependency-file-generator \ --output conda \ - --file-key test_python \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee "${ENV_YAML_DIR}/env.yaml" + --file-key ${FILE_KEY} \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ + | tee "${ENV_YAML_DIR}/env.yaml" rapids-mamba-retry env create --yes -f "${ENV_YAML_DIR}/env.yaml" -n test diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh index 217dd2fd9a8..2386414b32e 100755 --- a/ci/test_python_cudf.sh +++ b/ci/test_python_cudf.sh @@ -5,7 +5,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../; # Common setup steps shared by Python test jobs -source ./ci/test_python_common.sh +source ./ci/test_python_common.sh test_python_cudf rapids-logger "Check GPU usage" nvidia-smi @@ -15,7 +15,7 @@ trap "EXITCODE=1" ERR set +e rapids-logger "pytest pylibcudf" -pushd python/cudf/cudf/pylibcudf_tests +pushd python/pylibcudf/pylibcudf/tests python -m pytest \ --cache-clear \ --dist=worksteal \ diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh index 06a24773cae..67c97ad29a5 100755 --- a/ci/test_python_other.sh +++ b/ci/test_python_other.sh @@ -5,7 +5,7 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../ # Common setup steps shared by Python test jobs -source ./ci/test_python_common.sh +source ./ci/test_python_common.sh test_python_other rapids-mamba-retry install \ --channel "${CPP_CHANNEL}" \ diff --git a/ci/test_wheel_cudf.sh b/ci/test_wheel_cudf.sh index fdb61278d36..a701bfe15e0 100755 --- a/ci/test_wheel_cudf.sh +++ b/ci/test_wheel_cudf.sh @@ -4,10 +4,31 @@ set -eou pipefail RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist + +# Download the cudf, libcudf, and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + +rapids-logger "Install cudf, pylibcudf, and test requirements" + +# Constrain to minimum dependency versions if job is set up as "oldest" +echo "" > ./constraints.txt +if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then + rapids-dependency-file-generator \ + --output requirements \ + --file-key py_test_cudf \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ + | tee ./constraints.txt +fi # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/cudf*.whl)[test] +python -m pip install \ + -v \ + --constraint ./constraints.txt \ + "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ @@ -15,9 +36,10 @@ mkdir -p "${RAPIDS_TESTS_DIR}" rapids-logger "pytest pylibcudf" -pushd python/cudf/cudf/pylibcudf_tests +pushd python/pylibcudf/pylibcudf/tests python -m pytest \ --cache-clear \ + --numprocesses=8 \ --dist=worksteal \ . popd diff --git a/ci/test_wheel_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh index d25601428a6..3116bd820e9 100755 --- a/ci/test_wheel_cudf_polars.sh +++ b/ci/test_wheel_cudf_polars.sh @@ -10,7 +10,7 @@ set -eou pipefail # files in cudf_polars/pylibcudf", rather than "are there changes # between upstream and this branch which touch cudf_polars/pylibcudf" # TODO: is the target branch exposed anywhere in an environment variable? -if [ -n "$(git diff --name-only origin/branch-24.08...HEAD -- python/cudf_polars/ python/cudf/cudf/_lib/pylibcudf/)" ]; +if [ -n "$(git diff --name-only origin/branch-24.10...HEAD -- python/cudf_polars/ python/pylibcudf/)" ]; then HAS_CHANGES=1 rapids-logger "PR has changes in cudf-polars/pylibcudf, test fails treated as failure" @@ -22,19 +22,30 @@ fi rapids-logger "Download wheels" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist -# Download the cudf built in the previous step -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep +# Download libcudf and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist -rapids-logger "Install cudf" -python -m pip install ./local-cudf-dep/cudf*.whl - -rapids-logger "Install cudf_polars" -python -m pip install $(echo ./dist/cudf_polars*.whl)[test] +rapids-logger "Installing cudf_polars and its dependencies" +# Constraint to minimum dependency versions if job is set up as "oldest" +echo "" > ./constraints.txt +if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then + rapids-dependency-file-generator \ + --output requirements \ + --file-key py_test_cudf_polars \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ + | tee ./constraints.txt +fi -rapids-logger "Pin to 1.7.0 Temporarily" -python -m pip install polars==1.7.0 +# echo to expand wildcard before adding `[test]` requires for pip +python -m pip install \ + -v \ + --constraint ./constraints.txt \ + "$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" rapids-logger "Run cudf_polars tests" diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh index c3800d3cc25..361a42ccda9 100755 --- a/ci/test_wheel_dask_cudf.sh +++ b/ci/test_wheel_dask_cudf.sh @@ -4,14 +4,32 @@ set -eou pipefail RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 python ./dist -# Download the cudf built in the previous step -RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep -python -m pip install ./local-cudf-dep/cudf*.whl +# Download the cudf, libcudf, and pylibcudf built in the previous step +RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist +RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + +rapids-logger "Install dask_cudf, cudf, pylibcudf, and test requirements" +# Constraint to minimum dependency versions if job is set up as "oldest" +echo "" > ./constraints.txt +if [[ $RAPIDS_DEPENDENCIES == "oldest" ]]; then + rapids-dependency-file-generator \ + --output requirements \ + --file-key py_test_dask_cudf \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};dependencies=${RAPIDS_DEPENDENCIES}" \ + | tee ./constraints.txt +fi # echo to expand wildcard before adding `[extra]` requires for pip -python -m pip install $(echo ./dist/dask_cudf*.whl)[test] +python -m pip install \ + -v \ + --constraint ./constraints.txt \ + "$(echo ./dist/cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \ + "$(echo ./dist/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"} RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/ @@ -23,6 +41,7 @@ pushd python/dask_cudf/dask_cudf DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \ --numprocesses=8 \ + --dist=worksteal \ . popd @@ -32,5 +51,6 @@ pushd python/dask_cudf/dask_cudf DASK_DATAFRAME__QUERY_PLANNING=False python -m pytest \ --junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \ --numprocesses=8 \ + --dist=worksteal \ . popd diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 0d7c2d53aea..5a05dfd0530 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -26,65 +26,66 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.8.* +- dask-cuda==24.10.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 - flatbuffers==24.3.25 -- fmt>=10.1.1,<11 +- fmt>=11.0.2,<12 - fsspec>=0.6.0 - gcc_linux-64=11.* - hypothesis - identify>=2.5.20 - ipython -- libarrow-acero==16.1.0.* -- libarrow-dataset==16.1.0.* -- libarrow==16.1.0.* +- jupyter_client - libcufile-dev=1.4.0.31 - libcufile=1.4.0.31 - libcurand-dev=10.3.0.86 - libcurand=10.3.0.86 -- libkvikio==24.8.* -- libparquet==16.1.0.* -- librdkafka>=1.9.0,<1.10.0a0 -- librmm==24.8.* +- libkvikio==24.10.*,>=0.0.0a0 +- librdkafka>=2.5.0,<2.6.0a0 +- librmm==24.10.*,>=0.0.0a0 - make - moto>=4.0.8 - msgpack-python - myst-nb +- nbconvert +- nbformat - nbsphinx - ninja - notebook - numba>=0.57 -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc - nvcc_linux-64=11.8 -- nvcomp==3.0.6 +- nvcomp==4.0.1 - nvtx>=0.2.1 +- openpyxl - packaging +- pandas - pandas>=2.0,<2.2.3dev0 - pandoc -- pip +- polars>=1.8,<1.9 - pre-commit - ptxcompiler -- pyarrow==16.1.0.* +- pyarrow>=14.0.0,<18.0.0a0 - pydata-sphinx-theme!=0.14.2 - pytest-benchmark - pytest-cases>=3.8.2 - pytest-cov - pytest-xdist - pytest<8 -- python-confluent-kafka>=1.9.0,<1.10.0a0 -- python>=3.9,<3.12 +- python-confluent-kafka>=2.5.0,<2.6.0a0 +- python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.8.* +- rapids-dask-dependency==24.10.*,>=0.0.0a0 - rich -- rmm==24.8.* +- rmm==24.10.*,>=0.0.0a0 - s3fs>=2022.3.0 -- scikit-build-core>=0.7.0 +- scikit-build-core>=0.10.0 - scipy -- spdlog>=1.12.0,<1.13 +- spdlog>=1.14.1,<1.15 - sphinx - sphinx-autobuild - sphinx-copybutton @@ -97,6 +98,4 @@ dependencies: - transformers==4.39.3 - typing_extensions>=4.0.0 - zlib>=1.2.13 -- pip: - - git+https://github.com/python-streamz/streamz.git@master name: all_cuda-118_arch-x86_64 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 848483edecb..8490296233d 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -27,62 +27,63 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.3 -- dask-cuda==24.8.* +- dask-cuda==24.10.*,>=0.0.0a0 - dlpack>=0.8,<1.0 - doxygen=1.9.1 - fastavro>=0.22.9 - flatbuffers==24.3.25 -- fmt>=10.1.1,<11 +- fmt>=11.0.2,<12 - fsspec>=0.6.0 - gcc_linux-64=11.* - hypothesis - identify>=2.5.20 - ipython -- libarrow-acero==16.1.0.* -- libarrow-dataset==16.1.0.* -- libarrow==16.1.0.* +- jupyter_client - libcufile-dev - libcurand-dev -- libkvikio==24.8.* -- libparquet==16.1.0.* -- librdkafka>=1.9.0,<1.10.0a0 -- librmm==24.8.* +- libkvikio==24.10.*,>=0.0.0a0 +- librdkafka>=2.5.0,<2.6.0a0 +- librmm==24.10.*,>=0.0.0a0 - make - moto>=4.0.8 - msgpack-python - myst-nb +- nbconvert +- nbformat - nbsphinx - ninja - notebook - numba>=0.57 -- numpy>=1.23,<2.0a0 +- numpy>=1.23,<3.0a0 - numpydoc -- nvcomp==3.0.6 +- nvcomp==4.0.1 - nvtx>=0.2.1 +- openpyxl - packaging +- pandas - pandas>=2.0,<2.2.3dev0 - pandoc -- pip +- polars>=1.8,<1.9 - pre-commit -- pyarrow==16.1.0.* +- pyarrow>=14.0.0,<18.0.0a0 - pydata-sphinx-theme!=0.14.2 -- pynvjitlink +- pynvjitlink>=0.0.0a0 - pytest-benchmark - pytest-cases>=3.8.2 - pytest-cov - pytest-xdist - pytest<8 -- python-confluent-kafka>=1.9.0,<1.10.0a0 -- python>=3.9,<3.12 +- python-confluent-kafka>=2.5.0,<2.6.0a0 +- python>=3.10,<3.13 - pytorch>=2.1.0 - rapids-build-backend>=0.3.0,<0.4.0.dev0 -- rapids-dask-dependency==24.8.* +- rapids-dask-dependency==24.10.*,>=0.0.0a0 - rich -- rmm==24.8.* +- rmm==24.10.*,>=0.0.0a0 - s3fs>=2022.3.0 -- scikit-build-core>=0.7.0 +- scikit-build-core>=0.10.0 - scipy -- spdlog>=1.12.0,<1.13 +- spdlog>=1.14.1,<1.15 - sphinx - sphinx-autobuild - sphinx-copybutton @@ -95,6 +96,4 @@ dependencies: - transformers==4.39.3 - typing_extensions>=4.0.0 - zlib>=1.2.13 -- pip: - - git+https://github.com/python-streamz/streamz.git@master name: all_cuda-125_arch-x86_64 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 9137f099ad1..e22b4a4eddc 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -62,12 +62,10 @@ requirements: - python - cython >=3.0.3 - rapids-build-backend >=0.3.0,<0.4.0.dev0 - - scikit-build-core >=0.7.0 + - scikit-build-core >=0.10.0 - dlpack >=0.8,<1.0 - # TODO: Change to `2.0` for NumPy 2 - - numpy 1.23 - - pyarrow ==16.1.0.* - libcudf ={{ version }} + - pylibcudf ={{ version }} - rmm ={{ minor_version }} {% if cuda_major == "11" %} - cudatoolkit @@ -83,10 +81,10 @@ requirements: - pandas >=2.0,<2.2.3dev0 - cupy >=12.0.0 - numba >=0.57 - # TODO: Update `numpy` in `host` when dropping `<2.0a0` - - numpy >=1.23,<2.0a0 - - {{ pin_compatible('pyarrow', max_pin='x.x') }} + - numpy >=1.23,<3.0a0 + - pyarrow>=14.0.0,<18.0.0a0 - libcudf ={{ version }} + - pylibcudf ={{ version }} - {{ pin_compatible('rmm', max_pin='x.x') }} - fsspec >=0.6.0 {% if cuda_major == "11" %} diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml index 1b0e0e2c236..d04d9b21a46 100644 --- a/conda/recipes/cudf_kafka/meta.yaml +++ b/conda/recipes/cudf_kafka/meta.yaml @@ -58,10 +58,10 @@ requirements: - python - cython >=3.0.3 - cuda-version ={{ cuda_version }} - - cudf ={{ version }} + - pylibcudf ={{ version }} - libcudf_kafka ={{ version }} - rapids-build-backend >=0.3.0,<0.4.0.dev0 - - scikit-build-core >=0.7.0 + - scikit-build-core >=0.10.0 {% if cuda_major != "11" %} - cuda-cudart-dev {% endif %} @@ -69,7 +69,7 @@ requirements: - python - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} - libcudf_kafka ={{ version }} - - cudf ={{ version }} + - pylibcudf ={{ version }} {% if cuda_major != "11" %} - cuda-cudart {% endif %} diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml index f5ea426e0b1..a031f05a73a 100644 --- a/conda/recipes/custreamz/meta.yaml +++ b/conda/recipes/custreamz/meta.yaml @@ -39,7 +39,7 @@ requirements: - python - rapids-build-backend >=0.3.0,<0.4.0.dev0 - setuptools - - python-confluent-kafka >=1.9.0,<1.10.0a0 + - python-confluent-kafka >=2.5.0,<2.6.0a0 - cudf_kafka ={{ version }} - cuda-version ={{ cuda_version }} run: @@ -48,7 +48,7 @@ requirements: - cudf ={{ version }} - cudf_kafka ={{ version }} - rapids-dask-dependency ={{ minor_version }} - - python-confluent-kafka >=1.9.0,<1.10.0a0 + - python-confluent-kafka >=2.5.0,<2.6.0a0 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml index ff7458caf82..dc75eb4b252 100644 --- a/conda/recipes/libcudf/conda_build_config.yaml +++ b/conda/recipes/libcudf/conda_build_config.yaml @@ -19,26 +19,23 @@ c_stdlib_version: cmake_version: - ">=3.26.4,!=3.30.0" -libarrow_version: - - "==16.1.0" - dlpack_version: - ">=0.8,<1.0" librdkafka_version: - - ">=1.9.0,<1.10.0a0" + - ">=2.5.0,<2.6.0a0" fmt_version: - - ">=10.1.1,<11" + - ">=11.0.2,<12" flatbuffers_version: - "=24.3.25" spdlog_version: - - ">=1.12.0,<1.13" + - ">=1.14.1,<1.15" nvcomp_version: - - "=3.0.6" + - "=4.0.1" zlib_version: - ">=1.2.13" diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index aa1c94a4bca..1c2e9e8dd98 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -64,7 +64,6 @@ requirements: {% endif %} - cuda-version ={{ cuda_version }} - nvcomp {{ nvcomp_version }} - - libarrow {{ libarrow_version }} - dlpack {{ dlpack_version }} - librdkafka {{ librdkafka_version }} - fmt {{ fmt_version }} @@ -92,7 +91,6 @@ outputs: - cmake {{ cmake_version }} host: - cuda-version ={{ cuda_version }} - - libarrow {{ libarrow_version }} run: - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} {% if cuda_major == "11" %} diff --git a/conda/recipes/pylibcudf/build.sh b/conda/recipes/pylibcudf/build.sh new file mode 100644 index 00000000000..483346504db --- /dev/null +++ b/conda/recipes/pylibcudf/build.sh @@ -0,0 +1,4 @@ +# Copyright (c) 2018-2024, NVIDIA CORPORATION. + +# This assumes the script is executed from the root of the repo directory +./build.sh pylibcudf diff --git a/conda/recipes/pylibcudf/conda_build_config.yaml b/conda/recipes/pylibcudf/conda_build_config.yaml new file mode 100644 index 00000000000..af894cccda0 --- /dev/null +++ b/conda/recipes/pylibcudf/conda_build_config.yaml @@ -0,0 +1,20 @@ +c_compiler_version: + - 11 + +cxx_compiler_version: + - 11 + +c_stdlib: + - sysroot + +c_stdlib_version: + - "2.17" + +cmake_version: + - ">=3.26.4,!=3.30.0" + +cuda_compiler: + - cuda-nvcc + +cuda11_compiler: + - nvcc diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml new file mode 100644 index 00000000000..7c1efa0176c --- /dev/null +++ b/conda/recipes/pylibcudf/meta.yaml @@ -0,0 +1,104 @@ +# Copyright (c) 2018-2024, NVIDIA CORPORATION. + +{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %} +{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} +{% set py_version = environ['CONDA_PY'] %} +{% set cuda_version = '.'.join(environ['RAPIDS_CUDA_VERSION'].split('.')[:2]) %} +{% set cuda_major = cuda_version.split('.')[0] %} +{% set date_string = environ['RAPIDS_DATE_STRING'] %} + +package: + name: pylibcudf + version: {{ version }} + +source: + path: ../../.. + +build: + number: {{ GIT_DESCRIBE_NUMBER }} + string: cuda{{ cuda_major }}_py{{ py_version }}_{{ date_string }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }} + script_env: + - AWS_ACCESS_KEY_ID + - AWS_SECRET_ACCESS_KEY + - AWS_SESSION_TOKEN + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_GENERATOR + - PARALLEL_LEVEL + - SCCACHE_BUCKET + - SCCACHE_IDLE_TIMEOUT + - SCCACHE_REGION + - SCCACHE_S3_KEY_PREFIX=pylibcudf-aarch64 # [aarch64] + - SCCACHE_S3_KEY_PREFIX=pylibcudf-linux64 # [linux64] + - SCCACHE_S3_USE_SSL + - SCCACHE_S3_NO_CREDENTIALS + ignore_run_exports: + # libcudf's run_exports pinning is looser than we would like + - libcudf + ignore_run_exports_from: + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} + {% else %} + - {{ compiler('cuda') }} + - cuda-cudart-dev + - libcufile-dev # [linux64] + {% endif %} + +requirements: + build: + - cmake {{ cmake_version }} + - ninja + - {{ compiler('c') }} + - {{ compiler('cxx') }} + {% if cuda_major == "11" %} + - {{ compiler('cuda11') }} ={{ cuda_version }} + {% else %} + - {{ compiler('cuda') }} + {% endif %} + - cuda-version ={{ cuda_version }} + - {{ stdlib("c") }} + host: + - python + - cython >=3.0.3 + - rapids-build-backend >=0.3.0,<0.4.0.dev0 + - scikit-build-core >=0.10.0 + - dlpack >=0.8,<1.0 + - libcudf ={{ version }} + - rmm ={{ minor_version }} + {% if cuda_major == "11" %} + - cudatoolkit + {% else %} + - cuda-cudart-dev + - cuda-nvrtc + - libcufile-dev # [linux64] + {% endif %} + - cuda-version ={{ cuda_version }} + run: + - python + - typing_extensions >=4.0.0 + - pandas >=2.0,<2.2.3dev0 + - numpy >=1.23,<3.0a0 + - pyarrow>=14.0.0,<18.0.0a0 + - {{ pin_compatible('rmm', max_pin='x.x') }} + - fsspec >=0.6.0 + {% if cuda_major == "11" %} + - cuda-python >=11.7.1,<12.0a0 + {% else %} + - cuda-python >=12.0,<13.0a0 + {% endif %} + - nvtx >=0.2.1 + - packaging + +test: + requires: + - cuda-version ={{ cuda_version }} + imports: + - pylibcudf + +about: + home: https://rapids.ai/ + license: Apache-2.0 + license_family: APACHE + license_file: LICENSE + summary: pylibcudf library diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 95c509efc5b..136f43ee706 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -54,11 +54,6 @@ mark_as_advanced(CUDF_BUILD_TESTUTIL) option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON) option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF) mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED) -option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF) -option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF) -option(CUDF_ENABLE_ARROW_PYTHON "Find (or build) Arrow with Python support" OFF) -option(CUDF_ENABLE_ARROW_PARQUET "Find (or build) Arrow with Parquet support" OFF) -option(CUDF_ENABLE_ARROW_S3 "Build/Enable AWS S3 Arrow filesystem support" OFF) option( CUDF_USE_PER_THREAD_DEFAULT_STREAM "Build cuDF with per-thread default stream, including passing the per-thread default @@ -81,8 +76,6 @@ option(CUDA_ENABLE_LINEINFO option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON) # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF) -option(USE_LIBARROW_FROM_PYARROW "Only use the libarrow contained in pyarrow" OFF) -mark_as_advanced(USE_LIBARROW_FROM_PYARROW) set(DEFAULT_CUDF_BUILD_STREAMS_TEST_UTIL ON) if(CUDA_STATIC_RUNTIME OR NOT BUILD_SHARED_LIBS) @@ -100,8 +93,6 @@ message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}") message(VERBOSE "CUDF: Configure CMake to build (google & nvbench) benchmarks: ${BUILD_BENCHMARKS}") message(VERBOSE "CUDF: Build cuDF shared libraries: ${BUILD_SHARED_LIBS}") message(VERBOSE "CUDF: Use a file cache for JIT compiled kernels: ${JITIFY_USE_CACHE}") -message(VERBOSE "CUDF: Build and statically link Arrow libraries: ${CUDF_USE_ARROW_STATIC}") -message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_ENABLE_ARROW_S3}") message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}") message( VERBOSE @@ -192,8 +183,6 @@ include(cmake/thirdparty/get_nvcomp.cmake) include(cmake/thirdparty/get_cccl.cmake) # find rmm include(cmake/thirdparty/get_rmm.cmake) -# find arrow -include(cmake/thirdparty/get_arrow.cmake) # find flatbuffers include(cmake/thirdparty/get_flatbuffers.cmake) # find dlpack @@ -363,16 +352,14 @@ add_library( src/hash/sha512_hash.cu src/hash/xxhash_64.cu src/interop/dlpack.cpp - src/interop/from_arrow.cu src/interop/arrow_utilities.cpp - src/interop/to_arrow.cu + src/interop/decimal_conversion_utilities.cu src/interop/to_arrow_device.cu src/interop/to_arrow_host.cu src/interop/from_arrow_device.cu src/interop/from_arrow_host.cu src/interop/from_arrow_stream.cu src/interop/to_arrow_schema.cpp - src/interop/detail/arrow_allocator.cpp src/io/avro/avro.cpp src/io/avro/avro_gpu.cu src/io/avro/reader_impl.cu @@ -391,13 +378,15 @@ add_library( src/io/csv/reader_impl.cu src/io/csv/writer_impl.cu src/io/functions.cpp - src/io/json/byte_range_info.cu + src/io/json/host_tree_algorithms.cu src/io/json/json_column.cu + src/io/json/column_tree_construction.cu src/io/json/json_normalization.cu src/io/json/json_tree.cu src/io/json/nested_json_gpu.cu src/io/json/read_json.cu src/io/json/parser_features.cpp + src/io/json/process_tokens.cu src/io/json/write_json.cu src/io/orc/aggregate_orc_metadata.cpp src/io/orc/dict_enc.cu @@ -438,7 +427,6 @@ add_library( src/io/text/bgzip_data_chunk_source.cu src/io/text/bgzip_utils.cpp src/io/text/multibyte_split.cu - src/io/utilities/arrow_io_source.cpp src/io/utilities/base64_utilities.cpp src/io/utilities/column_buffer.cpp src/io/utilities/column_buffer_strings.cu @@ -670,6 +658,7 @@ add_library( src/unary/math_ops.cu src/unary/nan_ops.cu src/unary/null_ops.cu + src/utilities/cuda.cpp src/utilities/cuda_memcpy.cu src/utilities/default_stream.cpp src/utilities/host_memory.cpp @@ -810,7 +799,7 @@ add_dependencies(cudf jitify_preprocess_run) # Specify the target module library dependencies target_link_libraries( cudf - PUBLIC ${ARROW_LIBRARIES} CCCL::CCCL rmm::rmm $ + PUBLIC CCCL::CCCL rmm::rmm $ spdlog::spdlog_header_only PRIVATE $ cuco::cuco ZLIB::ZLIB nvcomp::nvcomp kvikio::kvikio $ nanoarrow ) @@ -1059,37 +1048,12 @@ following IMPORTED GLOBAL targets: ]=] ) -if(CUDF_ENABLE_ARROW_PARQUET) - string( - APPEND - install_code_string - [=[ - if(NOT Parquet_DIR) - set(Parquet_DIR "${Arrow_DIR}") - endif() - set(ArrowDataset_DIR "${Arrow_DIR}") - find_dependency(ArrowDataset) - ]=] - ) -endif() - -string( - APPEND - install_code_string - [=[ -if(testing IN_LIST cudf_FIND_COMPONENTS) - enable_language(CUDA) -endif() -]=] -) - rapids_export( INSTALL cudf EXPORT_SET cudf-exports ${_components_export_string} GLOBAL_TARGETS cudf cudftestutil NAMESPACE cudf:: DOCUMENTATION doc_string - FINAL_CODE_BLOCK install_code_string ) # ################################################################################################## diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index ff431c7f260..4113e38dcf4 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -35,6 +35,30 @@ target_include_directories( "$" ) +add_library( + ndsh_data_generator STATIC + common/ndsh_data_generator/ndsh_data_generator.cpp common/ndsh_data_generator/table_helpers.cpp + common/ndsh_data_generator/random_column_generator.cu +) +target_compile_features(ndsh_data_generator PUBLIC cxx_std_17 cuda_std_17) + +target_compile_options( + ndsh_data_generator PUBLIC "$<$:${CUDF_CXX_FLAGS}>" + "$<$:${CUDF_CUDA_FLAGS}>" +) + +target_link_libraries( + ndsh_data_generator + PUBLIC cudf cudftestutil nvtx3::nvtx3-cpp + PRIVATE $ +) + +target_include_directories( + ndsh_data_generator + PUBLIC "$" "$" + "$" +) + # ################################################################################################## # * compiler function ----------------------------------------------------------------------------- @@ -103,8 +127,8 @@ function(ConfigureNVBench CMAKE_BENCH_NAME) INSTALL_RPATH "\$ORIGIN/../../../lib" ) target_link_libraries( - ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen nvbench::nvbench - $ + ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen + nvbench::nvbench $ ) install( TARGETS ${CMAKE_BENCH_NAME} @@ -152,16 +176,22 @@ ConfigureBench(COPY_IF_ELSE_BENCH copying/copy_if_else.cpp) ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cpp) # ################################################################################################## -# * apply_boolean_mask benchmark ------------------------------------------------------------------ -ConfigureBench(APPLY_BOOLEAN_MASK_BENCH stream_compaction/apply_boolean_mask.cpp) +# * nds-h benchmark -------------------------------------------------------------------------------- +ConfigureNVBench(NDSH_Q01_NVBENCH ndsh/q01.cpp ndsh/utilities.cpp) +ConfigureNVBench(NDSH_Q05_NVBENCH ndsh/q05.cpp ndsh/utilities.cpp) +ConfigureNVBench(NDSH_Q06_NVBENCH ndsh/q06.cpp ndsh/utilities.cpp) +ConfigureNVBench(NDSH_Q09_NVBENCH ndsh/q09.cpp ndsh/utilities.cpp) +ConfigureNVBench(NDSH_Q10_NVBENCH ndsh/q10.cpp ndsh/utilities.cpp) # ################################################################################################## # * stream_compaction benchmark ------------------------------------------------------------------- ConfigureNVBench( STREAM_COMPACTION_NVBENCH + stream_compaction/apply_boolean_mask.cpp stream_compaction/distinct.cpp stream_compaction/distinct_count.cpp stream_compaction/stable_distinct.cpp + stream_compaction/stream_compaction_common.cpp stream_compaction/unique.cpp stream_compaction/unique_count.cpp ) @@ -200,18 +230,27 @@ ConfigureNVBench(STRUCT_CREATION_NVBENCH structs/create_structs.cpp) # -------------------------------------------------------------------------------- ConfigureBench(QUANTILES_BENCH quantiles/quantiles.cpp) +# ################################################################################################## +# * tdigest benchmark +# -------------------------------------------------------------------------------- +ConfigureNVBench(TDIGEST_NVBENCH quantiles/tdigest.cu) + # ################################################################################################## # * type_dispatcher benchmark --------------------------------------------------------------------- ConfigureBench(TYPE_DISPATCHER_BENCH type_dispatcher/type_dispatcher.cu) # ################################################################################################## # * reduction benchmark --------------------------------------------------------------------------- -ConfigureBench( - REDUCTION_BENCH reduction/anyall.cpp reduction/dictionary.cpp reduction/minmax.cpp - reduction/reduce.cpp reduction/scan.cpp -) ConfigureNVBench( - REDUCTION_NVBENCH reduction/rank.cpp reduction/scan_structs.cpp reduction/segmented_reduce.cpp + REDUCTION_NVBENCH + reduction/anyall.cpp + reduction/dictionary.cpp + reduction/minmax.cpp + reduction/rank.cpp + reduction/reduce.cpp + reduction/scan.cpp + reduction/scan_structs.cpp + reduction/segmented_reduce.cpp ) # ################################################################################################## @@ -303,7 +342,7 @@ ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp) ConfigureNVBench( TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp - text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp + text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp text/word_minhash.cpp ) # ################################################################################################## @@ -320,7 +359,6 @@ ConfigureBench( string/filter.cpp string/repeat_strings.cpp string/replace.cpp - string/slice.cpp string/translate.cpp string/url_decode.cu ) @@ -341,6 +379,7 @@ ConfigureNVBench( string/like.cpp string/replace_re.cpp string/reverse.cpp + string/slice.cpp string/split.cpp string/split_re.cpp ) @@ -353,6 +392,11 @@ ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader ConfigureNVBench(JSON_READER_OPTION_NVBENCH io/json/json_reader_option.cpp) ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp) +# ################################################################################################## +# * multi buffer memset benchmark +# ---------------------------------------------------------------------- +ConfigureNVBench(BATCHED_MEMSET_BENCH io/utilities/batched_memset_bench.cpp) + # ################################################################################################## # * io benchmark --------------------------------------------------------------------- ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp) diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index 6df2cb44adc..dc258e32dc5 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -28,10 +28,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -507,7 +507,7 @@ std::unique_ptr create_random_column(data_profile const& profile, null_mask.end(), thrust::identity{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return std::make_unique( dtype, @@ -591,7 +591,7 @@ std::unique_ptr create_random_utf8_string_column(data_profile cons null_mask.end() - 1, thrust::identity{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return cudf::make_strings_column( num_rows, std::make_unique(std::move(offsets), rmm::device_buffer{}, 0), @@ -626,7 +626,7 @@ std::unique_ptr create_random_column(data_profi cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return std::move(str_table->release()[0]); } @@ -688,7 +688,7 @@ std::unique_ptr create_random_column(data_profi valids.end(), thrust::identity{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } return std::pair{}; }(); @@ -718,7 +718,7 @@ std::unique_ptr create_random_column(data_profi } template -struct clamp_down : public thrust::unary_function { +struct clamp_down { T max; clamp_down(T max) : max(max) {} __host__ __device__ T operator()(T x) const { return min(x, max); } @@ -782,7 +782,7 @@ std::unique_ptr create_random_column(data_profile valids.end(), thrust::identity{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); list_column = cudf::make_lists_column( current_num_rows, std::move(offsets_column), @@ -933,7 +933,7 @@ std::pair create_random_null_mask( thrust::make_counting_iterator(size), bool_generator{seed, 1.0 - *null_probability}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } } diff --git a/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp new file mode 100644 index 00000000000..fa7edd225ba --- /dev/null +++ b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.cpp @@ -0,0 +1,996 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ndsh_data_generator.hpp" + +#include "random_column_generator.hpp" +#include "table_helpers.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace cudf::datagen { + +namespace { +constexpr std::array nations{ + "ALGERIA", "ARGENTINA", "BRAZIL", "CANADA", "EGYPT", "ETHIOPIA", "FRANCE", + "GERMANY", "INDIA", "INDONESIA", "IRAN", "IRAQ", "JAPAN", "JORDAN", + "KENYA", "MOROCCO", "MOZAMBIQUE", "PERU", "CHINA", "ROMANIA", "SAUDI ARABIA", + "VIETNAM", "RUSSIA", "UNITED KINGDOM", "UNITED STATES"}; + +constexpr std::array years{"1992", "1993", "1994", "1995", "1996", "1997", "1998"}; +constexpr std::array months{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"}; +constexpr std::array days{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", + "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", + "23", "24", "25", "26", "27", "28", "29", "30", "31"}; + +constexpr std::array vocab_p_name{ + "almond", "antique", "aquamarine", "azure", "beige", "bisque", "black", + "blanched", "blue", "blush", "brown", "burlywood", "burnished", "chartreuse", + "chiffon", "chocolate", "coral", "cornflower", "cornsilk", "cream", "cyan", + "dark", "deep", "dim", "dodger", "drab", "firebrick", "floral", + "forest", "frosted", "gainsboro", "ghost", "goldenrod", "green", "grey", + "honeydew", "hot", "indian", "ivory", "khaki", "lace", "lavender", + "lawn", "lemon", "light", "lime", "linen", "magenta", "maroon", + "medium", "metallic", "midnight", "mint", "misty", "moccasin", "navajo", + "navy", "olive", "orange", "orchid", "pale", "papaya", "peach", + "peru", "pink", "plum", "powder", "puff", "purple", "red", + "rose", "rosy", "royal", "saddle", "salmon", "sandy", "seashell", + "sienna", "sky", "slate", "smoke", "snow", "spring", "steel", + "tan", "thistle", "tomato", "turquoise", "violet", "wheat", "white", + "yellow"}; + +constexpr std::array vocab_modes{"REG AIR", "AIR", "RAIL", "SHIP", "TRUCK", "MAIL", "FOB"}; + +constexpr std::array vocab_instructions{ + "DELIVER IN PERSON", "COLLECT COD", "NONE", "TAKE BACK RETURN"}; + +constexpr std::array vocab_priorities{"1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED", "5-LOW"}; + +constexpr std::array vocab_segments{ + "AUTOMOBILE", "BUILDING", "FURNITURE", "MACHINERY", "HOUSEHOLD"}; + +constexpr std::array vocab_types{ + "STANDARD ANODIZED TIN", "STANDARD ANODIZED NICKEL", "STANDARD ANODIZED BRASS", + "STANDARD ANODIZED STEEL", "STANDARD ANODIZED COPPER", "STANDARD BURNISHED TIN", + "STANDARD BURNISHED NICKEL", "STANDARD BURNISHED BRASS", "STANDARD BURNISHED STEEL", + "STANDARD BURNISHED COPPER", "STANDARD PLATED TIN", "STANDARD PLATED NICKEL", + "STANDARD PLATED BRASS", "STANDARD PLATED STEEL", "STANDARD PLATED COPPER", + "STANDARD POLISHED TIN", "STANDARD POLISHED NICKEL", "STANDARD POLISHED BRASS", + "STANDARD POLISHED STEEL", "STANDARD POLISHED COPPER", "STANDARD BRUSHED TIN", + "STANDARD BRUSHED NICKEL", "STANDARD BRUSHED BRASS", "STANDARD BRUSHED STEEL", + "STANDARD BRUSHED COPPER", "SMALL ANODIZED TIN", "SMALL ANODIZED NICKEL", + "SMALL ANODIZED BRASS", "SMALL ANODIZED STEEL", "SMALL ANODIZED COPPER", + "SMALL BURNISHED TIN", "SMALL BURNISHED NICKEL", "SMALL BURNISHED BRASS", + "SMALL BURNISHED STEEL", "SMALL BURNISHED COPPER", "SMALL PLATED TIN", + "SMALL PLATED NICKEL", "SMALL PLATED BRASS", "SMALL PLATED STEEL", + "SMALL PLATED COPPER", "SMALL POLISHED TIN", "SMALL POLISHED NICKEL", + "SMALL POLISHED BRASS", "SMALL POLISHED STEEL", "SMALL POLISHED COPPER", + "SMALL BRUSHED TIN", "SMALL BRUSHED NICKEL", "SMALL BRUSHED BRASS", + "SMALL BRUSHED STEEL", "SMALL BRUSHED COPPER", "MEDIUM ANODIZED TIN", + "MEDIUM ANODIZED NICKEL", "MEDIUM ANODIZED BRASS", "MEDIUM ANODIZED STEEL", + "MEDIUM ANODIZED COPPER", "MEDIUM BURNISHED TIN", "MEDIUM BURNISHED NICKEL", + "MEDIUM BURNISHED BRASS", "MEDIUM BURNISHED STEEL", "MEDIUM BURNISHED COPPER", + "MEDIUM PLATED TIN", "MEDIUM PLATED NICKEL", "MEDIUM PLATED BRASS", + "MEDIUM PLATED STEEL", "MEDIUM PLATED COPPER", "MEDIUM POLISHED TIN", + "MEDIUM POLISHED NICKEL", "MEDIUM POLISHED BRASS", "MEDIUM POLISHED STEEL", + "MEDIUM POLISHED COPPER", "MEDIUM BRUSHED TIN", "MEDIUM BRUSHED NICKEL", + "MEDIUM BRUSHED BRASS", "MEDIUM BRUSHED STEEL", "MEDIUM BRUSHED COPPER", + "LARGE ANODIZED TIN", "LARGE ANODIZED NICKEL", "LARGE ANODIZED BRASS", + "LARGE ANODIZED STEEL", "LARGE ANODIZED COPPER", "LARGE BURNISHED TIN", + "LARGE BURNISHED NICKEL", "LARGE BURNISHED BRASS", "LARGE BURNISHED STEEL", + "LARGE BURNISHED COPPER", "LARGE PLATED TIN", "LARGE PLATED NICKEL", + "LARGE PLATED BRASS", "LARGE PLATED STEEL", "LARGE PLATED COPPER", + "LARGE POLISHED TIN", "LARGE POLISHED NICKEL", "LARGE POLISHED BRASS", + "LARGE POLISHED STEEL", "LARGE POLISHED COPPER", "LARGE BRUSHED TIN", + "LARGE BRUSHED NICKEL", "LARGE BRUSHED BRASS", "LARGE BRUSHED STEEL", + "LARGE BRUSHED COPPER", "ECONOMY ANODIZED TIN", "ECONOMY ANODIZED NICKEL", + "ECONOMY ANODIZED BRASS", "ECONOMY ANODIZED STEEL", "ECONOMY ANODIZED COPPER", + "ECONOMY BURNISHED TIN", "ECONOMY BURNISHED NICKEL", "ECONOMY BURNISHED BRASS", + "ECONOMY BURNISHED STEEL", "ECONOMY BURNISHED COPPER", "ECONOMY PLATED TIN", + "ECONOMY PLATED NICKEL", "ECONOMY PLATED BRASS", "ECONOMY PLATED STEEL", + "ECONOMY PLATED COPPER", "ECONOMY POLISHED TIN", "ECONOMY POLISHED NICKEL", + "ECONOMY POLISHED BRASS", "ECONOMY POLISHED STEEL", "ECONOMY POLISHED COPPER", + "ECONOMY BRUSHED TIN", "ECONOMY BRUSHED NICKEL", "ECONOMY BRUSHED BRASS", + "ECONOMY BRUSHED STEEL", "ECONOMY BRUSHED COPPER", "PROMO ANODIZED TIN", + "PROMO ANODIZED NICKEL", "PROMO ANODIZED BRASS", "PROMO ANODIZED STEEL", + "PROMO ANODIZED COPPER", "PROMO BURNISHED TIN", "PROMO BURNISHED NICKEL", + "PROMO BURNISHED BRASS", "PROMO BURNISHED STEEL", "PROMO BURNISHED COPPER", + "PROMO PLATED TIN", "PROMO PLATED NICKEL", "PROMO PLATED BRASS", + "PROMO PLATED STEEL", "PROMO PLATED COPPER", "PROMO POLISHED TIN", + "PROMO POLISHED NICKEL", "PROMO POLISHED BRASS", "PROMO POLISHED STEEL", + "PROMO POLISHED COPPER", "PROMO BRUSHED TIN", "PROMO BRUSHED NICKEL", + "PROMO BRUSHED BRASS", "PROMO BRUSHED STEEL", "PROMO BRUSHED COPPER"}; + +constexpr std::array vocab_containers{ + "SM CASE", "SM BOX", "SM BAG", "SM JAR", "SM PKG", "SM PACK", "SM CAN", + "SM DRUM", "LG CASE", "LG BOX", "LG BAG", "LG JAR", "LG PKG", "LG PACK", + "LG CAN", "LG DRUM", "MED CASE", "MED BOX", "MED BAG", "MED JAR", "MED PKG", + "MED PACK", "MED CAN", "MED DRUM", "JUMBO CASE", "JUMBO BOX", "JUMBO BAG", "JUMBO JAR", + "JUMBO PKG", "JUMBO PACK", "JUMBO CAN", "JUMBO DRUM", "WRAP CASE", "WRAP BOX", "WRAP BAG", + "WRAP JAR", "WRAP PKG", "WRAP PACK", "WRAP CAN", "WRAP DRUM"}; + +} // namespace + +/** + * @brief Generate a table out of the independent columns of the `orders` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_orders_independent(double scale_factor, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + cudf::size_type const o_num_rows = scale_factor * 1'500'000; + + // Generate the `o_orderkey` column + auto o_orderkey = [&]() { + auto const o_orderkey_candidates = generate_primary_key_column( + cudf::numeric_scalar(1), 4 * o_num_rows, stream, mr); + auto const o_orderkey_unsorted = cudf::sample(cudf::table_view({o_orderkey_candidates->view()}), + o_num_rows, + cudf::sample_with_replacement::FALSE, + 0, + stream, + mr); + auto const sort_result = + cudf::sort_by_key(o_orderkey_unsorted->view(), + cudf::table_view({o_orderkey_unsorted->view().column(0)}), + {}, + {}, + stream, + mr); + return std::move(sort_result->release()[0]); + }(); + + // Generate the `o_custkey` column + auto o_custkey = [&]() { + auto const col = generate_random_numeric_column( + 1, scale_factor * 49'000, o_num_rows, stream, mr); + auto const col_mul_3 = cudf::binary_operation(col->view(), + cudf::numeric_scalar(3), + cudf::binary_operator::MUL, + cudf::data_type{cudf::type_id::INT32}, + stream, + mr); + return cudf::binary_operation(col_mul_3->view(), + cudf::numeric_scalar(1), + cudf::binary_operator::ADD, + cudf::data_type{cudf::type_id::INT32}, + stream, + mr); + }(); + + // Generate the `o_orderdate` column + auto o_orderdate_ts = [&]() { + auto const o_orderdate_year = generate_random_string_column_from_set( + cudf::host_span(years.data(), years.size()), o_num_rows, stream, mr); + auto const o_orderdate_month = generate_random_string_column_from_set( + cudf::host_span(months.data(), months.size()), o_num_rows, stream, mr); + auto const o_orderdate_day = generate_random_string_column_from_set( + cudf::host_span(days.data(), days.size()), o_num_rows, stream, mr); + auto const o_orderdate_str = cudf::strings::concatenate( + cudf::table_view( + {o_orderdate_year->view(), o_orderdate_month->view(), o_orderdate_day->view()}), + cudf::string_scalar("-"), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); + + return cudf::strings::to_timestamps(o_orderdate_str->view(), + cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}, + std::string("%Y-%m-%d"), + stream, + mr); + }(); + + // Generate the `o_orderpriority` column + auto o_orderpriority = generate_random_string_column_from_set( + cudf::host_span(vocab_priorities.data(), vocab_priorities.size()), + o_num_rows, + stream, + mr); + + // Generate the `o_clerk` column + auto o_clerk = [&]() { + auto const clerk_repeat = generate_repeat_string_column("Clerk#", o_num_rows, stream, mr); + auto const random_c = generate_random_numeric_column( + 1, scale_factor * 1'000, o_num_rows, stream, mr); + auto const random_c_str = cudf::strings::from_integers(random_c->view(), stream, mr); + auto const random_c_str_padded = cudf::strings::zfill(random_c_str->view(), 9, stream, mr); + return cudf::strings::concatenate( + cudf::table_view({clerk_repeat->view(), random_c_str_padded->view()}), + cudf::string_scalar(""), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); + }(); + + // Generate the `o_shippriority` column + auto o_shippriority = [&]() { + auto const empty = cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT8}, o_num_rows, cudf::mask_state::UNALLOCATED, stream); + return cudf::fill(empty->view(), 0, o_num_rows, cudf::numeric_scalar(0), stream, mr); + }(); + + // Generate the `o_comment` column + // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification + auto o_comment = generate_random_string_column(19, 78, o_num_rows, stream, mr); + + // Generate the `orders_independent` table + std::vector> columns; + columns.push_back(std::move(o_orderkey)); + columns.push_back(std::move(o_custkey)); + columns.push_back(std::move(o_orderdate_ts)); + columns.push_back(std::move(o_orderpriority)); + columns.push_back(std::move(o_clerk)); + columns.push_back(std::move(o_shippriority)); + columns.push_back(std::move(o_comment)); + return std::make_unique(std::move(columns)); +} + +/** + * @brief Generate the `lineitem` table partially + * + * @param orders_independent Table with the independent columns of the `orders` table + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_lineitem_partial(cudf::table_view const& orders_independent, + double scale_factor, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto const o_num_rows = orders_independent.num_rows(); + // Generate the `lineitem` table. For each row in the `orders` table, + // we have a random number (between 1 and 7) of rows in the `lineitem` table + + // For each `o_orderkey`, generate a random number (between 1 and 7), + // which will be the number of rows in the `lineitem` table that will + // have the same `l_orderkey` + auto const o_rep_freqs = generate_random_numeric_column(1, 7, o_num_rows, stream, mr); + + // Sum up the `o_rep_freqs` to get the number of rows in the + // `lineitem` table. This is required to generate the independent columns + // in the `lineitem` table + auto const l_num_rows = calculate_l_cardinality(o_rep_freqs->view(), stream, mr); + + // We create a table out of `o_orderkey` and `o_orderdate_ts` by repeating + // the rows of `orders` according to the frequencies in `o_rep_freqs` + auto const o_orderkey = orders_independent.column(0); + auto const o_orderdate_ts = orders_independent.column(2); + auto const l_base = + cudf::repeat(cudf::table_view({o_orderkey, o_orderdate_ts}), o_rep_freqs->view(), stream, mr); + auto l_base_columns = l_base->release(); + + // Generate the `l_orderkey` column + auto l_orderkey = std::move(l_base_columns[0]); + + // Generate the `l_partkey` column + auto l_partkey = generate_random_numeric_column( + 1, scale_factor * 200'000, l_num_rows, stream, mr); + + // Generate the `l_suppkey` column + auto l_suppkey = calculate_l_suppkey(l_partkey->view(), scale_factor, l_num_rows, stream, mr); + + // Generate the `l_linenumber` column + auto l_linenumber = generate_repeat_sequence_column(7, false, l_num_rows, stream, mr); + + // Generate the `l_quantity` column + auto l_quantity = generate_random_numeric_column(1, 50, l_num_rows, stream, mr); + + // Generate the `l_discount` column + auto l_discount = [&]() { + auto const col = generate_random_numeric_column(0.00, 0.10, l_num_rows, stream, mr); + return cudf::round(col->view(), 2); + }(); + + // Generate the `l_tax` column + auto l_tax = [&]() { + auto const col = generate_random_numeric_column(0.00, 0.08, l_num_rows, stream, mr); + return cudf::round(col->view(), 2); + }(); + + // Get the orderdate column from the `l_base` table + auto const ol_orderdate_ts = std::move(l_base_columns[1]); + + // Generate the `l_shipdate` column + auto l_shipdate_ts = [&]() { + auto const l_shipdate_rand_add_days = + generate_random_numeric_column(1, 121, l_num_rows, stream, mr); + return add_calendrical_days( + ol_orderdate_ts->view(), l_shipdate_rand_add_days->view(), stream, mr); + }(); + + // Generate the `l_commitdate` column + auto l_commitdate_ts = [&]() { + auto const l_commitdate_rand_add_days = + generate_random_numeric_column(30, 90, l_num_rows, stream, mr); + return add_calendrical_days( + ol_orderdate_ts->view(), l_commitdate_rand_add_days->view(), stream, mr); + }(); + + // Generate the `l_receiptdate` column + auto l_receiptdate_ts = [&]() { + auto const l_receiptdate_rand_add_days = + generate_random_numeric_column(1, 30, l_num_rows, stream, mr); + return add_calendrical_days( + l_shipdate_ts->view(), l_receiptdate_rand_add_days->view(), stream, mr); + }(); + + // Define the current date as per clause 4.2.2.12 of the TPC-H specification + constexpr cudf::size_type current_date_days_since_epoch = 9'298; + auto current_date = + cudf::timestamp_scalar(current_date_days_since_epoch, true); + auto current_date_literal = cudf::ast::literal(current_date); + + // Generate the `l_returnflag` column + // if `l_receiptdate` <= current_date then "R" or "A" else "N" + auto l_returnflag = [&]() { + auto const col_ref = cudf::ast::column_reference(0); + auto const pred = + cudf::ast::operation(cudf::ast::ast_operator::LESS_EQUAL, col_ref, current_date_literal); + auto const binary_mask = + cudf::compute_column(cudf::table_view({l_receiptdate_ts->view()}), pred, stream, mr); + + auto const multiplier = + generate_repeat_sequence_column(2, false, l_num_rows, stream, mr); + auto const ternary_mask = cudf::binary_operation(binary_mask->view(), + multiplier->view(), + cudf::binary_operator::MUL, + cudf::data_type{cudf::type_id::INT8}, + stream, + mr); + auto const indices = cudf::test::fixed_width_column_wrapper({0, 1, 2}).release(); + auto const keys = cudf::test::strings_column_wrapper({"N", "A", "R"}).release(); + auto const gather_map = cudf::table_view({indices->view(), keys->view()}); + auto const gathered_table = cudf::gather( + gather_map, ternary_mask->view(), cudf::out_of_bounds_policy::DONT_CHECK, stream, mr); + return std::move(gathered_table->release()[1]); + }(); + + // Generate the `l_linestatus` column + // if `l_shipdate` > current_date then "F" else "O" + auto [l_linestatus, l_linestatus_mask] = [&]() { + auto const col_ref = cudf::ast::column_reference(0); + auto const pred = + cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref, current_date_literal); + auto mask = cudf::compute_column(cudf::table_view({l_shipdate_ts->view()}), pred, stream, mr); + auto mask_index_type = cudf::cast(mask->view(), cudf::data_type{cudf::type_id::INT8}); + auto const indices = cudf::test::fixed_width_column_wrapper({0, 1}).release(); + auto const keys = cudf::test::strings_column_wrapper({"O", "F"}).release(); + auto const gather_map = cudf::table_view({indices->view(), keys->view()}); + auto const gathered_table = cudf::gather( + gather_map, mask_index_type->view(), cudf::out_of_bounds_policy::DONT_CHECK, stream, mr); + return std::make_tuple(std::move(gathered_table->release()[1]), std::move(mask_index_type)); + }(); + + // Generate the `l_shipinstruct` column + auto l_shipinstruct = generate_random_string_column_from_set( + cudf::host_span(vocab_instructions.data(), vocab_instructions.size()), + l_num_rows, + stream, + mr); + + // Generate the `l_shipmode` column + auto l_shipmode = generate_random_string_column_from_set( + cudf::host_span(vocab_modes.data(), vocab_modes.size()), + l_num_rows, + stream, + mr); + + // Generate the `l_comment` column + // NOTE: This column is not compliant with + // clause 4.2.2.10 of the TPC-H specification + auto l_comment = generate_random_string_column(10, 43, l_num_rows, stream, mr); + + // Generate the `lineitem_partial` table + std::vector> columns; + columns.push_back(std::move(l_linestatus_mask)); + columns.push_back(std::move(l_orderkey)); + columns.push_back(std::move(l_partkey)); + columns.push_back(std::move(l_suppkey)); + columns.push_back(std::move(l_linenumber)); + columns.push_back(std::move(l_quantity)); + columns.push_back(std::move(l_discount)); + columns.push_back(std::move(l_tax)); + columns.push_back(std::move(l_returnflag)); + columns.push_back(std::move(l_linestatus)); + columns.push_back(std::move(l_shipdate_ts)); + columns.push_back(std::move(l_commitdate_ts)); + columns.push_back(std::move(l_receiptdate_ts)); + columns.push_back(std::move(l_shipinstruct)); + columns.push_back(std::move(l_shipmode)); + columns.push_back(std::move(l_comment)); + return std::make_unique(std::move(columns)); +} + +/** + * @brief Generate the part of the `orders` table dependent on the `lineitem` table + * + * @param lineitem_partial The partially generated `lineitem` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_orders_dependent(cudf::table_view const& lineitem_partial, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto const l_linestatus_mask = lineitem_partial.column(0); + auto const l_orderkey = lineitem_partial.column(1); + auto const l_extendedprice = lineitem_partial.column(6); + auto const l_discount = lineitem_partial.column(7); + auto const l_tax = lineitem_partial.column(8); + + std::vector> orders_dependent_columns; + + // Generate the `o_orderstatus` column + auto o_orderstatus = [&]() { + auto const keys = cudf::table_view({l_orderkey}); + cudf::groupby::groupby gb(keys); + std::vector requests; + + // Perform a `count` aggregation on `l_orderkey` + requests.push_back(cudf::groupby::aggregation_request()); + requests[0].aggregations.push_back(cudf::make_count_aggregation()); + requests[0].values = l_orderkey; + + // Perform a `sum` aggregation on `l_linestatus_mask` + requests.push_back(cudf::groupby::aggregation_request()); + requests[1].aggregations.push_back(cudf::make_sum_aggregation()); + requests[1].values = l_linestatus_mask; + + // Perform the aggregations + auto agg_result = gb.aggregate(requests); + + // Create a `table_view` out of the `l_orderkey`, `count`, and `sum` columns + auto const count = std::move(agg_result.second[0].results[0]); + auto const sum = cudf::cast( + agg_result.second[1].results[0]->view(), cudf::data_type{cudf::type_id::INT32}, stream, mr); + + auto const table = + cudf::table_view({agg_result.first->get_column(0).view(), count->view(), sum->view()}); + + // Now on this table, + // if `sum` == `count` then "O", + // if `sum` == 0, then "F", + // else "P" + + // So, we first evaluate an expression `sum == count` and generate a boolean mask + auto const count_ref = cudf::ast::column_reference(1); + auto const sum_ref = cudf::ast::column_reference(2); + auto const expr_a = cudf::ast::operation(cudf::ast::ast_operator::EQUAL, sum_ref, count_ref); + auto const mask_a = cudf::compute_column(table, expr_a); + auto const o_orderstatus_intermediate = + cudf::copy_if_else(cudf::string_scalar("O"), cudf::string_scalar("F"), mask_a->view()); + + // Then, we evaluate an expression `sum == 0` and generate a boolean mask + auto zero_scalar = cudf::numeric_scalar(0); + auto const zero_literal = cudf::ast::literal(zero_scalar); + auto const expr_b_left = + cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, sum_ref, count_ref); + auto const expr_b_right = + cudf::ast::operation(cudf::ast::ast_operator::NOT_EQUAL, sum_ref, zero_literal); + auto const expr_b = + cudf::ast::operation(cudf::ast::ast_operator::LOGICAL_AND, expr_b_left, expr_b_right); + auto const mask_b = cudf::compute_column(table, expr_b); + return cudf::copy_if_else( + cudf::string_scalar("P"), o_orderstatus_intermediate->view(), mask_b->view()); + }(); + orders_dependent_columns.push_back(std::move(o_orderstatus)); + + // Generate the `o_totalprice` column + // We calculate the `charge` column, which is a function of `l_extendedprice`, + // `l_tax`, and `l_discount` and then group by `l_orderkey` and sum the `charge` + auto const l_charge = calculate_charge(l_extendedprice, l_tax, l_discount, stream, mr); + auto o_totalprice = [&]() { + auto const keys = cudf::table_view({l_orderkey}); + cudf::groupby::groupby gb(keys); + std::vector requests; + requests.push_back(cudf::groupby::aggregation_request()); + requests[0].aggregations.push_back(cudf::make_sum_aggregation()); + requests[0].values = l_charge->view(); + auto agg_result = gb.aggregate(requests); + return cudf::round(agg_result.second[0].results[0]->view(), 2); + }(); + orders_dependent_columns.push_back(std::move(o_totalprice)); + return std::make_unique(std::move(orders_dependent_columns)); +} + +/** + * @brief Generate the `partsupp` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_partsupp(double scale_factor, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Define the number of rows in the `part` and `partsupp` tables + cudf::size_type const p_num_rows = scale_factor * 200'000; + cudf::size_type const ps_num_rows = scale_factor * 800'000; + + // Generate the `ps_partkey` column + auto ps_partkey = [&]() { + auto const p_partkey = + generate_primary_key_column(cudf::numeric_scalar(1), p_num_rows, stream, mr); + auto const rep_table = cudf::repeat(cudf::table_view({p_partkey->view()}), 4, stream, mr); + return std::move(rep_table->release()[0]); + }(); + + // Generate the `ps_suppkey` column + auto ps_suppkey = calculate_ps_suppkey(ps_partkey->view(), scale_factor, ps_num_rows, stream, mr); + + // Generate the `ps_availqty` column + auto ps_availqty = generate_random_numeric_column(1, 9999, ps_num_rows, stream, mr); + + // Generate the `ps_supplycost` column + auto ps_supplycost = [&]() { + auto const col = generate_random_numeric_column(1.00, 1000.00, ps_num_rows, stream, mr); + return cudf::round(col->view(), 2); + }(); + + // Generate the `ps_comment` column + // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification + auto ps_comment = generate_random_string_column(49, 198, ps_num_rows, stream, mr); + + // Create the `partsupp` table + std::vector> columns; + columns.push_back(std::move(ps_partkey)); + columns.push_back(std::move(ps_suppkey)); + columns.push_back(std::move(ps_availqty)); + columns.push_back(std::move(ps_supplycost)); + columns.push_back(std::move(ps_comment)); + return std::make_unique(std::move(columns)); +} + +/** + * @brief Generate the `part` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_part(double scale_factor, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + cudf::size_type const num_rows = scale_factor * 200'000; + + // Generate the `p_partkey` column + auto p_partkey = + generate_primary_key_column(cudf::numeric_scalar(1), num_rows, stream, mr); + + // Generate the `p_name` column + auto p_name = [&]() { + auto const p_name_a = generate_random_string_column_from_set( + cudf::host_span(vocab_p_name.data(), vocab_p_name.size()), + num_rows, + stream, + mr); + auto const p_name_b = generate_random_string_column_from_set( + cudf::host_span(vocab_p_name.data(), vocab_p_name.size()), + num_rows, + stream, + mr); + auto const p_name_c = generate_random_string_column_from_set( + cudf::host_span(vocab_p_name.data(), vocab_p_name.size()), + num_rows, + stream, + mr); + auto const p_name_d = generate_random_string_column_from_set( + cudf::host_span(vocab_p_name.data(), vocab_p_name.size()), + num_rows, + stream, + mr); + auto const p_name_e = generate_random_string_column_from_set( + cudf::host_span(vocab_p_name.data(), vocab_p_name.size()), + num_rows, + stream, + mr); + return cudf::strings::concatenate( + cudf::table_view( + {p_name_a->view(), p_name_b->view(), p_name_c->view(), p_name_d->view(), p_name_e->view()}), + cudf::string_scalar(" "), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); + }(); + + // Generate the `p_mfgr` and `p_brand` columns + auto const random_values_m = generate_random_numeric_column(1, 5, num_rows, stream, mr); + auto const random_values_m_str = + cudf::strings::from_integers(random_values_m->view(), stream, mr); + + auto const random_values_n = generate_random_numeric_column(1, 5, num_rows, stream, mr); + auto const random_values_n_str = + cudf::strings::from_integers(random_values_n->view(), stream, mr); + + auto p_mfgr = [&]() { + auto const mfgr_repeat = generate_repeat_string_column("Manufacturer#", num_rows, stream, mr); + return cudf::strings::concatenate( + cudf::table_view({mfgr_repeat->view(), random_values_m_str->view()}), + cudf::string_scalar(""), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); + }(); + + auto p_brand = [&]() { + auto const brand_repeat = generate_repeat_string_column("Brand#", num_rows, stream, mr); + return cudf::strings::concatenate( + cudf::table_view( + {brand_repeat->view(), random_values_m_str->view(), random_values_n_str->view()}), + cudf::string_scalar(""), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); + }(); + + // Generate the `p_type` column + auto p_type = generate_random_string_column_from_set( + cudf::host_span(vocab_types.data(), vocab_types.size()), + num_rows, + stream, + mr); + + // Generate the `p_size` column + auto p_size = generate_random_numeric_column(1, 50, num_rows, stream, mr); + + // Generate the `p_container` column + auto p_container = generate_random_string_column_from_set( + cudf::host_span(vocab_containers.data(), vocab_containers.size()), + num_rows, + stream, + mr); + + // Generate the `p_retailprice` column + auto p_retailprice = calculate_p_retailprice(p_partkey->view(), stream, mr); + + // Generate the `p_comment` column + // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification + auto p_comment = generate_random_string_column(5, 22, num_rows, stream, mr); + + // Create the `part` table + std::vector> columns; + columns.push_back(std::move(p_partkey)); + columns.push_back(std::move(p_name)); + columns.push_back(std::move(p_mfgr)); + columns.push_back(std::move(p_brand)); + columns.push_back(std::move(p_type)); + columns.push_back(std::move(p_size)); + columns.push_back(std::move(p_container)); + columns.push_back(std::move(p_retailprice)); + columns.push_back(std::move(p_comment)); + return std::make_unique(std::move(columns)); +} + +/** + * @brief Generate the `orders`, `lineitem`, and `part` tables + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::tuple, std::unique_ptr, std::unique_ptr> +generate_orders_lineitem_part(double scale_factor, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Generate a table with the independent columns of the `orders` table + auto orders_independent = generate_orders_independent(scale_factor, stream, mr); + + // Generate the `lineitem` table partially + auto lineitem_partial = + generate_lineitem_partial(orders_independent->view(), scale_factor, stream, mr); + + // Generate the `part` table + auto part = generate_part(scale_factor, stream, mr); + + // Join the `part` and partial `lineitem` tables, then calculate the `l_extendedprice` column + auto l_extendedprice = [&]() { + auto const left = cudf::table_view( + {lineitem_partial->get_column(2).view(), lineitem_partial->get_column(5).view()}); + auto const right = cudf::table_view({part->get_column(0).view(), part->get_column(7).view()}); + auto const joined_table = perform_left_join(left, right, {0}, {0}, stream, mr); + auto joined_table_columns = joined_table->release(); + auto const l_quantity = std::move(joined_table_columns[1]); + auto const l_quantity_fp = + cudf::cast(l_quantity->view(), cudf::data_type{cudf::type_id::FLOAT64}); + auto const p_retailprice = std::move(joined_table_columns[3]); + auto const col = cudf::binary_operation(l_quantity_fp->view(), + p_retailprice->view(), + cudf::binary_operator::MUL, + cudf::data_type{cudf::type_id::FLOAT64}, + stream, + mr); + return cudf::round(col->view(), 2); + }(); + + // Insert the `l_extendedprice` column into the partial columns of the `lineitem` table + auto lineitem_partial_columns = lineitem_partial->release(); + lineitem_partial_columns.insert(lineitem_partial_columns.begin() + 6, std::move(l_extendedprice)); + auto lineitem_temp = std::make_unique(std::move(lineitem_partial_columns)); + + // Generate the dependent columns of the `orders` table + // and merge them with the independent columns + auto orders_dependent = generate_orders_dependent(lineitem_temp->view(), stream, mr); + + auto orders_independent_columns = orders_independent->release(); + auto orders_dependent_columns = orders_dependent->release(); + orders_independent_columns.insert(orders_independent_columns.begin() + 2, + std::make_move_iterator(orders_dependent_columns.begin()), + std::make_move_iterator(orders_dependent_columns.end())); + + // Create the `orders` table + auto orders = std::make_unique(std::move(orders_independent_columns)); + + // Create the `lineitem` table + auto lineitem_temp_columns = lineitem_temp->release(); + lineitem_temp_columns.erase(lineitem_temp_columns.begin()); + auto lineitem = std::make_unique(std::move(lineitem_temp_columns)); + + return std::make_tuple(std::move(orders), std::move(lineitem), std::move(part)); +} + +/** + * @brief Generate the `supplier` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_supplier(double scale_factor, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Calculate the number of rows based on the scale factor + cudf::size_type const num_rows = scale_factor * 10'000; + + // Generate the `s_suppkey` column + auto s_suppkey = + generate_primary_key_column(cudf::numeric_scalar(1), num_rows, stream, mr); + + // Generate the `s_name` column + auto s_name = [&]() { + auto const supplier_repeat = generate_repeat_string_column("Supplier#", num_rows, stream, mr); + auto const s_suppkey_str = cudf::strings::from_integers(s_suppkey->view(), stream, mr); + auto const s_suppkey_str_padded = cudf::strings::zfill(s_suppkey_str->view(), 9, stream, mr); + return cudf::strings::concatenate( + cudf::table_view({supplier_repeat->view(), s_suppkey_str_padded->view()}), + cudf::string_scalar(""), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); + }(); + + // Generate the `s_address` column + auto s_address = generate_address_column(num_rows, stream, mr); + + // Generate the `s_nationkey` column + auto s_nationkey = generate_random_numeric_column(0, 24, num_rows, stream, mr); + + // Generate the `s_phone` column + auto s_phone = generate_phone_column(num_rows, stream, mr); + + // Generate the `s_acctbal` column + auto s_acctbal = [&]() { + auto const col = generate_random_numeric_column(-999.99, 9999.99, num_rows, stream, mr); + return cudf::round(col->view(), 2); + }(); + + // Generate the `s_comment` column + // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification + auto s_comment = generate_random_string_column(25, 100, num_rows, stream, mr); + + // Create the `supplier` table + std::vector> columns; + columns.push_back(std::move(s_suppkey)); + columns.push_back(std::move(s_name)); + columns.push_back(std::move(s_address)); + columns.push_back(std::move(s_nationkey)); + columns.push_back(std::move(s_phone)); + columns.push_back(std::move(s_acctbal)); + columns.push_back(std::move(s_comment)); + return std::make_unique(std::move(columns)); +} + +/** + * @brief Generate the `customer` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_customer(double scale_factor, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Calculate the number of rows based on the scale factor + cudf::size_type const num_rows = scale_factor * 150'000; + + // Generate the `c_custkey` column + auto c_custkey = + generate_primary_key_column(cudf::numeric_scalar(1), num_rows, stream, mr); + + // Generate the `c_name` column + auto c_name = [&]() { + auto const customer_repeat = generate_repeat_string_column("Customer#", num_rows, stream, mr); + auto const c_custkey_str = cudf::strings::from_integers(c_custkey->view(), stream, mr); + auto const c_custkey_str_padded = cudf::strings::zfill(c_custkey_str->view(), 9, stream, mr); + return cudf::strings::concatenate( + cudf::table_view({customer_repeat->view(), c_custkey_str_padded->view()}), + cudf::string_scalar(""), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); + }(); + + // Generate the `c_address` column + auto c_address = generate_address_column(num_rows, stream, mr); + + // Generate the `c_nationkey` column + auto c_nationkey = generate_random_numeric_column(0, 24, num_rows, stream, mr); + + // Generate the `c_phone` column + auto c_phone = generate_phone_column(num_rows, stream, mr); + + // Generate the `c_acctbal` column + auto c_acctbal = [&]() { + auto const col = generate_random_numeric_column(-999.99, 9999.99, num_rows, stream, mr); + return cudf::round(col->view(), 2); + }(); + + // Generate the `c_mktsegment` column + auto c_mktsegment = generate_random_string_column_from_set( + cudf::host_span(vocab_segments.data(), vocab_segments.size()), + num_rows, + stream, + mr); + + // Generate the `c_comment` column + // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification + auto c_comment = generate_random_string_column(29, 116, num_rows, stream, mr); + + // Create the `customer` table + std::vector> columns; + columns.push_back(std::move(c_custkey)); + columns.push_back(std::move(c_name)); + columns.push_back(std::move(c_address)); + columns.push_back(std::move(c_nationkey)); + columns.push_back(std::move(c_phone)); + columns.push_back(std::move(c_acctbal)); + columns.push_back(std::move(c_mktsegment)); + columns.push_back(std::move(c_comment)); + return std::make_unique(std::move(columns)); +} + +/** + * @brief Generate the `nation` table + * + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_nation(rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Define the number of rows + constexpr cudf::size_type num_rows = 25; + + // Generate the `n_nationkey` column + auto n_nationkey = + generate_primary_key_column(cudf::numeric_scalar(0), num_rows, stream, mr); + + // Generate the `n_name` column + auto n_name = cudf::test::strings_column_wrapper(nations.begin(), nations.end()).release(); + + // Generate the `n_regionkey` column + std::vector region_keys{0, 1, 1, 1, 4, 0, 3, 3, 2, 2, 4, 4, 2, + 4, 0, 0, 0, 1, 2, 3, 4, 2, 3, 3, 1}; + auto n_regionkey = + cudf::test::fixed_width_column_wrapper(region_keys.begin(), region_keys.end()) + .release(); + + // Generate the `n_comment` column + // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification + auto n_comment = generate_random_string_column(31, 114, num_rows, stream, mr); + + // Create the `nation` table + std::vector> columns; + columns.push_back(std::move(n_nationkey)); + columns.push_back(std::move(n_name)); + columns.push_back(std::move(n_regionkey)); + columns.push_back(std::move(n_comment)); + return std::make_unique(std::move(columns)); +} + +/** + * @brief Generate the `region` table + * + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_region(rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Define the number of rows + constexpr cudf::size_type num_rows = 5; + + // Generate the `r_regionkey` column + auto r_regionkey = + generate_primary_key_column(cudf::numeric_scalar(0), num_rows, stream, mr); + + // Generate the `r_name` column + auto r_name = + cudf::test::strings_column_wrapper({"AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST"}) + .release(); + + // Generate the `r_comment` column + // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification + auto r_comment = generate_random_string_column(31, 115, num_rows, stream, mr); + + // Create the `region` table + std::vector> columns; + columns.push_back(std::move(r_regionkey)); + columns.push_back(std::move(r_name)); + columns.push_back(std::move(r_comment)); + return std::make_unique(std::move(columns)); +} + +} // namespace cudf::datagen diff --git a/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.hpp b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.hpp new file mode 100644 index 00000000000..6e09c1e5708 --- /dev/null +++ b/cpp/benchmarks/common/ndsh_data_generator/ndsh_data_generator.hpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace CUDF_EXPORT cudf { +namespace datagen { + +/** + * @brief Generate the `orders`, `lineitem`, and `part` tables + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::tuple, std::unique_ptr, std::unique_ptr> +generate_orders_lineitem_part( + double scale_factor, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `partsupp` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_partsupp( + double scale_factor, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `supplier` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_supplier( + double scale_factor, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `customer` table + * + * @param scale_factor The scale factor to generate + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_customer( + double scale_factor, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `nation` table + * + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_nation( + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `region` table + * + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_region( + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +} // namespace datagen +} // namespace CUDF_EXPORT cudf diff --git a/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.cu b/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.cu new file mode 100644 index 00000000000..4246bd1a83b --- /dev/null +++ b/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.cu @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "random_column_generator.hpp" + +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +namespace cudf::datagen { + +namespace { + +// Functor for generating random strings +struct random_string_generator { + char* chars; + thrust::default_random_engine engine; + thrust::uniform_int_distribution char_dist; + + CUDF_HOST_DEVICE random_string_generator(char* c) : chars(c), char_dist(44, 122) {} + + __device__ void operator()(thrust::tuple str_begin_end) + { + auto begin = thrust::get<0>(str_begin_end); + auto end = thrust::get<1>(str_begin_end); + engine.discard(begin); + for (auto i = begin; i < end; ++i) { + auto ch = char_dist(engine); + if (i == end - 1 && ch >= '\x7F') ch = ' '; // last element ASCII only. + if (ch >= '\x7F') // x7F is at the top edge of ASCII + chars[i++] = '\xC4'; // these characters are assigned two bytes + chars[i] = static_cast(ch + (ch >= '\x7F')); + } + } +}; + +// Functor for generating random numbers +template +struct random_number_generator { + T lower; + T upper; + + CUDF_HOST_DEVICE random_number_generator(T lower, T upper) : lower(lower), upper(upper) {} + + __device__ T operator()(const int64_t idx) const + { + if constexpr (cudf::is_integral()) { + thrust::default_random_engine engine; + thrust::uniform_int_distribution dist(lower, upper); + engine.discard(idx); + return dist(engine); + } else { + thrust::default_random_engine engine; + thrust::uniform_real_distribution dist(lower, upper); + engine.discard(idx); + return dist(engine); + } + } +}; + +} // namespace + +std::unique_ptr generate_random_string_column(cudf::size_type lower, + cudf::size_type upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto offsets_begin = cudf::detail::make_counting_transform_iterator( + 0, random_number_generator(lower, upper)); + auto [offsets_column, computed_bytes] = cudf::strings::detail::make_offsets_child_column( + offsets_begin, offsets_begin + num_rows, stream, mr); + rmm::device_uvector chars(computed_bytes, stream); + + auto const offset_itr = + cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); + + // We generate the strings in parallel into the `chars` vector using the + // offsets vector generated above. + thrust::for_each_n(rmm::exec_policy(stream), + thrust::make_zip_iterator(offset_itr, offset_itr + 1), + num_rows, + random_string_generator(chars.data())); + + return cudf::make_strings_column( + num_rows, std::move(offsets_column), chars.release(), 0, rmm::device_buffer{}); +} + +template +std::unique_ptr generate_random_numeric_column(T lower, + T upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto col = cudf::make_numeric_column( + cudf::data_type{cudf::type_to_id()}, num_rows, cudf::mask_state::UNALLOCATED, stream, mr); + cudf::size_type begin = 0; + cudf::size_type end = num_rows; + thrust::transform(rmm::exec_policy(stream), + thrust::make_counting_iterator(begin), + thrust::make_counting_iterator(end), + col->mutable_view().begin(), + random_number_generator(lower, upper)); + return col; +} + +template std::unique_ptr generate_random_numeric_column( + int8_t lower, + int8_t upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template std::unique_ptr generate_random_numeric_column( + int16_t lower, + int16_t upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template std::unique_ptr generate_random_numeric_column( + cudf::size_type lower, + cudf::size_type upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template std::unique_ptr generate_random_numeric_column( + double lower, + double upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +std::unique_ptr generate_primary_key_column(cudf::scalar const& start, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return cudf::sequence(num_rows, start, stream, mr); +} + +std::unique_ptr generate_repeat_string_column(std::string const& value, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto const scalar = cudf::string_scalar(value); + return cudf::make_column_from_scalar(scalar, num_rows, stream, mr); +} + +std::unique_ptr generate_random_string_column_from_set( + cudf::host_span set, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Build a gather map of random strings to choose from + // The size of the string sets always fits within 16-bit integers + auto const indices = + generate_primary_key_column(cudf::numeric_scalar(0), set.size(), stream, mr); + auto const keys = cudf::test::strings_column_wrapper(set.begin(), set.end()).release(); + auto const gather_map = cudf::table_view({indices->view(), keys->view()}); + + // Build a column of random keys to gather from the set + auto const gather_keys = + generate_random_numeric_column(0, set.size() - 1, num_rows, stream, mr); + + // Perform the gather operation + auto const gathered_table = cudf::gather( + gather_map, gather_keys->view(), cudf::out_of_bounds_policy::DONT_CHECK, stream, mr); + auto gathered_table_columns = gathered_table->release(); + return std::move(gathered_table_columns[1]); +} + +template +std::unique_ptr generate_repeat_sequence_column(T seq_length, + bool zero_indexed, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto pkey = + generate_primary_key_column(cudf::numeric_scalar(0), num_rows, stream, mr); + auto repeat_seq_zero_indexed = cudf::binary_operation(pkey->view(), + cudf::numeric_scalar(seq_length), + cudf::binary_operator::MOD, + cudf::data_type{cudf::type_to_id()}, + stream, + mr); + if (zero_indexed) { return repeat_seq_zero_indexed; } + return cudf::binary_operation(repeat_seq_zero_indexed->view(), + cudf::numeric_scalar(1), + cudf::binary_operator::ADD, + cudf::data_type{cudf::type_to_id()}, + stream, + mr); +} + +template std::unique_ptr generate_repeat_sequence_column( + int8_t seq_length, + bool zero_indexed, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +template std::unique_ptr generate_repeat_sequence_column( + cudf::size_type seq_length, + bool zero_indexed, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +} // namespace cudf::datagen diff --git a/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.hpp b/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.hpp new file mode 100644 index 00000000000..0bf1eee4e85 --- /dev/null +++ b/cpp/benchmarks/common/ndsh_data_generator/random_column_generator.hpp @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace cudf::datagen { + +/** + * @brief Generate a column of random strings + * + * @param lower The lower bound of the length of the strings + * @param upper The upper bound of the length of the strings + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_random_string_column( + cudf::size_type lower, + cudf::size_type upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate a column of random numbers + * + * Example: + * + * lower = 10 + * upper = 15 + * num_rows = 10 + * result = [10, 11, 14, 14, 13, 12, 11, 11, 12, 14] + + * + * @param lower The lower bound of the random numbers + * @param upper The upper bound of the random numbers + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +template +std::unique_ptr generate_random_numeric_column( + T lower, + T upper, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate a primary key column + * + * Example: + * + * start = 1 + * num_rows = 10 + * result = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + * + * @param start The starting value of the primary key + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_primary_key_column( + cudf::scalar const& start, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate a column where all the rows have the same string value + * + * Example: + * + * value = "abc" + * num_rows = 5 + * result = ["abc", "abc", "abc", "abc", "abc"] + * + * @param value The string value to fill the column with + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_repeat_string_column( + std::string const& value, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate a column by randomly choosing from set of strings + * + * Example: + * + * set = {"s1", "s2", "s3"} + * num_rows = 10 + * result = ["s1", "s2", "s2", "s1", "s3", "s3", "s3", "s2", "s1", "s1"] + * + * @param set The set of strings to choose from + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr generate_random_string_column_from_set( + cudf::host_span set, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate a column consisting of a repeating sequence of integers + * + * Example: + * + * seq_length = 3 + * zero_indexed = false + * num_rows = 10 + * result = [1, 2, 3, 1, 2, 3, 1, 2, 3, 1] + * + * @param seq_length The length of the repeating sequence + * @param zero_indexed Whether the sequence is zero or one indexed + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +template +std::unique_ptr generate_repeat_sequence_column( + T seq_length, + bool zero_indexed, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +} // namespace cudf::datagen diff --git a/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp new file mode 100644 index 00000000000..54d177df401 --- /dev/null +++ b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.cpp @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "table_helpers.hpp" + +#include "random_column_generator.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +namespace cudf::datagen { + +/** + * @brief Add a column of days to a column of timestamp_days + * + * @param timestamp_days The column of timestamp_days + * @param days The column of days to add + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr add_calendrical_days(cudf::column_view const& timestamp_days, + cudf::column_view const& days, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto const days_duration_type = cudf::cast(days, cudf::data_type{cudf::type_id::DURATION_DAYS}); + auto const data_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; + return cudf::binary_operation( + timestamp_days, days_duration_type->view(), cudf::binary_operator::ADD, data_type, stream, mr); +} + +/** + * @brief Perform a left join operation between two tables + * + * @param left_input The left table + * @param right_input The right table + * @param left_on The indices of the columns to join on in the left table + * @param right_on The indices of the columns to join on in the right table + * @param compare_nulls The null equality comparison + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table's device memory + */ +std::unique_ptr perform_left_join(cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + constexpr auto oob_policy = cudf::out_of_bounds_policy::NULLIFY; + auto const left_selected = left_input.select(left_on); + auto const right_selected = right_input.select(right_on); + auto const [left_join_indices, right_join_indices] = + cudf::left_join(left_selected, right_selected, cudf::null_equality::EQUAL, stream, mr); + + auto const left_indices_span = cudf::device_span{*left_join_indices}; + auto const right_indices_span = cudf::device_span{*right_join_indices}; + + auto const left_indices_col = cudf::column_view{left_indices_span}; + auto const right_indices_col = cudf::column_view{right_indices_span}; + + auto const left_result = cudf::gather(left_input, left_indices_col, oob_policy, stream, mr); + auto const right_result = cudf::gather(right_input, right_indices_col, oob_policy, stream, mr); + + auto joined_cols = left_result->release(); + auto right_cols = right_result->release(); + joined_cols.insert(joined_cols.end(), + std::make_move_iterator(right_cols.begin()), + std::make_move_iterator(right_cols.end())); + return std::make_unique(std::move(joined_cols)); +} + +/** + * @brief Generate the `p_retailprice` column of the `part` table + * + * @param p_partkey The `p_partkey` column of the `part` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_p_retailprice( + cudf::column_view const& p_partkey, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Expression: (90000 + ((p_partkey/10) modulo 20001) + 100 * (p_partkey modulo 1000)) / 100 + auto table = cudf::table_view({p_partkey}); + auto p_partkey_col_ref = cudf::ast::column_reference(0); + + auto scalar_10 = cudf::numeric_scalar(10); + auto scalar_100 = cudf::numeric_scalar(100); + auto scalar_1000 = cudf::numeric_scalar(1000); + auto scalar_20001 = cudf::numeric_scalar(20001); + auto scalar_90000 = cudf::numeric_scalar(90000); + + auto literal_10 = cudf::ast::literal(scalar_10); + auto literal_100 = cudf::ast::literal(scalar_100); + auto literal_1000 = cudf::ast::literal(scalar_1000); + auto literal_20001 = cudf::ast::literal(scalar_20001); + auto literal_90000 = cudf::ast::literal(scalar_90000); + + auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::DIV, p_partkey_col_ref, literal_10); + auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_a, literal_20001); + auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::MOD, p_partkey_col_ref, literal_1000); + auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::MUL, expr_c, literal_100); + auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_b, expr_d); + auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_e, literal_90000); + auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::TRUE_DIV, expr_f, literal_100); + + // Execute the AST expression + return cudf::compute_column(table, final_expr, stream, mr); +} + +/** + * @brief Generate the `l_suppkey` column of the `lineitem` table + * + * @param l_partkey The `l_partkey` column of the `lineitem` table + * @param scale_factor The scale factor to use + * @param num_rows The number of rows in the `lineitem` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_l_suppkey(cudf::column_view const& l_partkey, + cudf::size_type scale_factor, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Expression: (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) % s + 1 + + // Generate the `s` col + auto s_empty = cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT32}, num_rows, cudf::mask_state::UNALLOCATED, stream); + + auto s = cudf::fill(s_empty->view(), + 0, + num_rows, + cudf::numeric_scalar(scale_factor * 10'000), + stream, + mr); + + // Generate the `i` col + auto i = generate_repeat_sequence_column(4, true, num_rows, stream, mr); + + // Create a table view out of `l_partkey`, `s`, and `i` + auto table = cudf::table_view({l_partkey, s->view(), i->view()}); + + // Create the AST expression + auto scalar_1 = cudf::numeric_scalar(1); + auto scalar_4 = cudf::numeric_scalar(4); + auto literal_1 = cudf::ast::literal(scalar_1); + auto literal_4 = cudf::ast::literal(scalar_4); + + auto l_partkey_col_ref = cudf::ast::column_reference(0); + auto s_col_ref = cudf::ast::column_reference(1); + auto i_col_ref = cudf::ast::column_reference(2); + + // (int)(l_partkey - 1)/s + auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::SUB, l_partkey_col_ref, literal_1); + auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::DIV, expr_a, s_col_ref); + + // s/4 + auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::DIV, s_col_ref, literal_4); + + // (s/4 + (int)(l_partkey - 1)/s) + auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_c, expr_b); + + // (i * (s/4 + (int)(l_partkey - 1)/s)) + auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::MUL, i_col_ref, expr_d); + + // (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) + auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, l_partkey_col_ref, expr_e); + + // (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) % s + auto expr_g = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_f, s_col_ref); + + // (l_partkey + (i * (s/4 + (int)(l_partkey - 1)/s))) % s + 1 + auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_g, literal_1); + + // Execute the AST expression + return cudf::compute_column(table, final_expr, stream, mr); +} + +/** + * @brief Generate the `ps_suppkey` column of the `partsupp` table + * + * @param ps_partkey The `ps_partkey` column of the `partsupp` table + * @param scale_factor The scale factor to use + * @param num_rows The number of rows in the `partsupp` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_ps_suppkey( + cudf::column_view const& ps_partkey, + cudf::size_type scale_factor, + cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + // Expression: ps_suppkey = (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + 1 + + // Generate the `s` col + auto s_empty = cudf::make_numeric_column( + cudf::data_type{cudf::type_id::INT32}, num_rows, cudf::mask_state::UNALLOCATED, stream); + + auto s = cudf::fill(s_empty->view(), + 0, + num_rows, + cudf::numeric_scalar(scale_factor * 10'000), + stream, + mr); + + // Generate the `i` col + auto i = generate_repeat_sequence_column(4, true, num_rows, stream, mr); + + // Create a table view out of `p_partkey`, `s`, and `i` + auto table = cudf::table_view({ps_partkey, s->view(), i->view()}); + + // Create the AST expression + auto scalar_1 = cudf::numeric_scalar(1); + auto scalar_4 = cudf::numeric_scalar(4); + auto literal_1 = cudf::ast::literal(scalar_1); + auto literal_4 = cudf::ast::literal(scalar_4); + + auto ps_partkey_col_ref = cudf::ast::column_reference(0); + auto s_col_ref = cudf::ast::column_reference(1); + auto i_col_ref = cudf::ast::column_reference(2); + + // (int)(ps_partkey - 1)/s + auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::SUB, ps_partkey_col_ref, literal_1); + auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::DIV, expr_a, s_col_ref); + + // s/4 + auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::DIV, s_col_ref, literal_4); + + // (s/4 + (int)(ps_partkey - 1)/s) + auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_c, expr_b); + + // (i * (s/4 + (int)(ps_partkey - 1)/s)) + auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::MUL, i_col_ref, expr_d); + + // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) + auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, ps_partkey_col_ref, expr_e); + + // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + auto expr_g = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_f, s_col_ref); + + // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + 1 + auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_g, literal_1); + + // Execute the AST expression + return cudf::compute_column(table, final_expr, stream, mr); +} + +/** + * @brief Calculate the cardinality of the `lineitem` table + * + * @param o_rep_freqs The frequency of each `o_orderkey` value in the `lineitem` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] cudf::size_type calculate_l_cardinality(cudf::column_view const& o_rep_freqs, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto const sum_agg = cudf::make_sum_aggregation(); + auto const l_num_rows_scalar = + cudf::reduce(o_rep_freqs, *sum_agg, cudf::data_type{cudf::type_id::INT32}, stream, mr); + return reinterpret_cast*>(l_num_rows_scalar.get()) + ->value(stream); +} + +/** + * @brief Calculate the charge column for the `lineitem` table + * + * @param extendedprice The `l_extendedprice` column + * @param tax The `l_tax` column + * @param discount The `l_discount` column + * @param stream The CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_charge(cudf::column_view const& extendedprice, + cudf::column_view const& tax, + cudf::column_view const& discount, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto const one = cudf::numeric_scalar(1); + auto const one_minus_discount = cudf::binary_operation( + one, discount, cudf::binary_operator::SUB, cudf::data_type{cudf::type_id::FLOAT64}, stream, mr); + auto disc_price = cudf::binary_operation(extendedprice, + one_minus_discount->view(), + cudf::binary_operator::MUL, + cudf::data_type{cudf::type_id::FLOAT64}, + stream, + mr); + auto const one_plus_tax = + cudf::binary_operation(one, tax, cudf::binary_operator::ADD, tax.type(), stream, mr); + return cudf::binary_operation(disc_price->view(), + one_plus_tax->view(), + cudf::binary_operator::MUL, + cudf::data_type{cudf::type_id::FLOAT64}, + stream, + mr); +} + +/** + * @brief Generate a column of random addresses according to TPC-H specification clause 4.2.2.7 + * + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr generate_address_column( + cudf::size_type num_rows, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return generate_random_string_column(10, 40, num_rows, stream, mr); +} + +/** + * @brief Generate a phone number column according to TPC-H specification clause 4.2.2.9 + * + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr generate_phone_column(cudf::size_type num_rows, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + auto const part_a = cudf::strings::from_integers( + generate_random_numeric_column(10, 34, num_rows, stream, mr)->view()); + auto const part_b = cudf::strings::from_integers( + generate_random_numeric_column(100, 999, num_rows, stream, mr)->view()); + auto const part_c = cudf::strings::from_integers( + generate_random_numeric_column(100, 999, num_rows, stream, mr)->view()); + auto const part_d = cudf::strings::from_integers( + generate_random_numeric_column(1000, 9999, num_rows, stream, mr)->view()); + auto const phone_parts_table = + cudf::table_view({part_a->view(), part_b->view(), part_c->view(), part_d->view()}); + return cudf::strings::concatenate(phone_parts_table, + cudf::string_scalar("-"), + cudf::string_scalar("", false), + cudf::strings::separator_on_nulls::NO, + stream, + mr); +} + +} // namespace cudf::datagen diff --git a/cpp/benchmarks/common/ndsh_data_generator/table_helpers.hpp b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.hpp new file mode 100644 index 00000000000..7d862afe755 --- /dev/null +++ b/cpp/benchmarks/common/ndsh_data_generator/table_helpers.hpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace cudf::datagen { + +/** + * @brief Add a column of days to a column of timestamp_days + * + * @param timestamp_days The column of timestamp_days + * @param days The column of days to add + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr add_calendrical_days( + cudf::column_view const& timestamp_days, + cudf::column_view const& days, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Perform a left join operation between two tables + * + * @param left_input The left table + * @param right_input The right table + * @param left_on The indices of the columns to join on in the left table + * @param right_on The indices of the columns to join on in the right table + * @param compare_nulls The null equality comparison + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table's device memory + */ +std::unique_ptr perform_left_join( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `p_retailprice` column of the `part` table + * + * @param p_partkey The `p_partkey` column of the `part` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_p_retailprice( + cudf::column_view const& p_partkey, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `l_suppkey` column of the `lineitem` table + * + * @param l_partkey The `l_partkey` column of the `lineitem` table + * @param scale_factor The scale factor to use + * @param num_rows The number of rows in the `lineitem` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_l_suppkey( + cudf::column_view const& l_partkey, + cudf::size_type scale_factor, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate the `ps_suppkey` column of the `partsupp` table + * + * @param ps_partkey The `ps_partkey` column of the `partsupp` table + * @param scale_factor The scale factor to use + * @param num_rows The number of rows in the `partsupp` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_ps_suppkey( + cudf::column_view const& ps_partkey, + cudf::size_type scale_factor, + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** + * @brief Calculate the cardinality of the `lineitem` table + * + * @param o_rep_freqs The frequency of each `o_orderkey` value in the `lineitem` table + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] cudf::size_type calculate_l_cardinality( + cudf::column_view const& o_rep_freqs, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** + * @brief Calculate the charge column for the `lineitem` table + * + * @param extendedprice The `l_extendedprice` column + * @param tax The `l_tax` column + * @param discount The `l_discount` column + * @param stream The CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr calculate_charge( + cudf::column_view const& extendedprice, + cudf::column_view const& tax, + cudf::column_view const& discount, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate a column of random addresses according to TPC-H specification clause 4.2.2.7 + * + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr generate_address_column( + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Generate a phone number column according to TPC-H specification clause 4.2.2.9 + * + * @param num_rows The number of rows in the column + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +[[nodiscard]] std::unique_ptr generate_phone_column( + cudf::size_type num_rows, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +} // namespace cudf::datagen diff --git a/cpp/benchmarks/copying/contiguous_split.cu b/cpp/benchmarks/copying/contiguous_split.cu index 910fc689c0b..161f67425c1 100644 --- a/cpp/benchmarks/copying/contiguous_split.cu +++ b/cpp/benchmarks/copying/contiguous_split.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include +#include #include @@ -32,7 +33,7 @@ void contiguous_split(cudf::table_view const& src_table, std::vector const&) { - auto const mr = rmm::mr::get_current_device_resource(); + auto const mr = cudf::get_current_device_resource_ref(); auto const stream = cudf::get_default_stream(); auto user_buffer = rmm::device_uvector(100L * 1024 * 1024, stream, mr); auto chunked_pack = cudf::chunked_pack::create(src_table, user_buffer.size(), mr); diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu index efc385cf10b..8f8e17ad4d0 100644 --- a/cpp/benchmarks/copying/shift.cu +++ b/cpp/benchmarks/copying/shift.cu @@ -20,14 +20,13 @@ #include #include #include - -#include +#include template > std::unique_ptr make_scalar( T value = 0, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto s = new ScalarType(value, true, stream, mr); return std::unique_ptr(s); diff --git a/cpp/benchmarks/fixture/benchmark_fixture.hpp b/cpp/benchmarks/fixture/benchmark_fixture.hpp index 8c8d6756b00..2f697ab0459 100644 --- a/cpp/benchmarks/fixture/benchmark_fixture.hpp +++ b/cpp/benchmarks/fixture/benchmark_fixture.hpp @@ -16,10 +16,11 @@ #pragma once +#include + #include #include #include -#include #include #include @@ -83,13 +84,13 @@ class benchmark : public ::benchmark::Fixture { void SetUp(::benchmark::State const& state) override { mr = make_pool_instance(); - rmm::mr::set_current_device_resource(mr.get()); // set default resource to pool + cudf::set_current_device_resource(mr.get()); // set default resource to pool } void TearDown(::benchmark::State const& state) override { // reset default resource to the initial resource - rmm::mr::set_current_device_resource(nullptr); + cudf::set_current_device_resource(nullptr); mr.reset(); } @@ -106,13 +107,13 @@ class benchmark : public ::benchmark::Fixture { class memory_stats_logger { public: memory_stats_logger() - : existing_mr(rmm::mr::get_current_device_resource()), - statistics_mr(rmm::mr::make_statistics_adaptor(existing_mr)) + : existing_mr(cudf::get_current_device_resource()), + statistics_mr(rmm::mr::statistics_resource_adaptor(existing_mr)) { - rmm::mr::set_current_device_resource(&statistics_mr); + cudf::set_current_device_resource(&statistics_mr); } - ~memory_stats_logger() { rmm::mr::set_current_device_resource(existing_mr); } + ~memory_stats_logger() { cudf::set_current_device_resource(existing_mr); } [[nodiscard]] size_t peak_memory_usage() const noexcept { diff --git a/cpp/benchmarks/fixture/nvbench_fixture.hpp b/cpp/benchmarks/fixture/nvbench_fixture.hpp index df1492690bb..63f09285a26 100644 --- a/cpp/benchmarks/fixture/nvbench_fixture.hpp +++ b/cpp/benchmarks/fixture/nvbench_fixture.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -24,10 +25,8 @@ #include #include #include -#include #include #include -#include #include @@ -110,7 +109,7 @@ struct nvbench_base_fixture { } mr = create_memory_resource(rmm_mode); - rmm::mr::set_current_device_resource(mr.get()); + cudf::set_current_device_resource(mr.get()); std::cout << "RMM memory resource = " << rmm_mode << "\n"; cudf::set_pinned_memory_resource(create_cuio_host_memory_resource(cuio_host_mode)); diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp index f41285008c4..b9a701a71f4 100644 --- a/cpp/benchmarks/groupby/group_max.cpp +++ b/cpp/benchmarks/groupby/group_max.cpp @@ -101,4 +101,5 @@ NVBENCH_BENCH_TYPES(bench_groupby_max, NVBENCH_BENCH_TYPES(bench_groupby_max_cardinality, NVBENCH_TYPE_AXES(nvbench::type_list)) .set_name("groupby_max_cardinality") + .add_int64_axis("num_aggregations", {1}) .add_int64_axis("cardinality", {10, 20, 50, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000}); diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp index 61e79a47a50..e4ff0c8c4a7 100644 --- a/cpp/benchmarks/hashing/hash.cpp +++ b/cpp/benchmarks/hashing/hash.cpp @@ -50,7 +50,7 @@ static void bench_hash(nvbench::state& state) state.add_global_memory_reads(num_rows); // add memory read from bitmaks if (!no_nulls) { - state.add_global_memory_reads(2 * + state.add_global_memory_reads(2L * cudf::bitmask_allocation_size_bytes(num_rows)); } // memory written depends on used hash @@ -63,37 +63,37 @@ static void bench_hash(nvbench::state& state) }); } else if (hash_name == "md5") { // md5 creates a 32-byte string - state.add_global_memory_writes(32 * num_rows); + state.add_global_memory_writes(32L * num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::md5(data->view()); }); } else if (hash_name == "sha1") { // sha1 creates a 40-byte string - state.add_global_memory_writes(40 * num_rows); + state.add_global_memory_writes(40L * num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::sha1(data->view()); }); } else if (hash_name == "sha224") { // sha224 creates a 56-byte string - state.add_global_memory_writes(56 * num_rows); + state.add_global_memory_writes(56L * num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::sha224(data->view()); }); } else if (hash_name == "sha256") { // sha256 creates a 64-byte string - state.add_global_memory_writes(64 * num_rows); + state.add_global_memory_writes(64L * num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::sha256(data->view()); }); } else if (hash_name == "sha384") { // sha384 creates a 96-byte string - state.add_global_memory_writes(96 * num_rows); + state.add_global_memory_writes(96L * num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::sha384(data->view()); }); } else if (hash_name == "sha512") { // sha512 creates a 128-byte string - state.add_global_memory_writes(128 * num_rows); + state.add_global_memory_writes(128L * num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { auto result = cudf::hashing::sha512(data->view()); }); diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp index 645994f3f0d..fe24fb58728 100644 --- a/cpp/benchmarks/io/cuio_common.cpp +++ b/cpp/benchmarks/io/cuio_common.cpp @@ -18,9 +18,9 @@ #include #include +#include #include -#include #include @@ -34,7 +34,7 @@ temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"}; // Don't use cudf's pinned pool for the source data rmm::host_async_resource_ref pinned_memory_resource() { - static rmm::mr::pinned_host_memory_resource mr = rmm::mr::pinned_host_memory_resource{}; + static auto mr = rmm::mr::pinned_host_memory_resource{}; return mr; } diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp index 9fd8de172a3..ae3528b783c 100644 --- a/cpp/benchmarks/io/json/nested_json.cpp +++ b/cpp/benchmarks/io/json/nested_json.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -170,7 +171,7 @@ void BM_NESTED_JSON(nvbench::state& state) cudf::device_span{input->data(), static_cast(input->size())}, default_options, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); @@ -201,7 +202,7 @@ void BM_NESTED_JSON_DEPTH(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { // Allocate device-side temporary storage & run algorithm cudf::io::json::detail::device_parse_nested_json( - input, default_options, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + input, default_options, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); diff --git a/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp index e91bf06fdfa..6f20b4bd457 100644 --- a/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp +++ b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -109,7 +110,7 @@ void BM_orc_multithreaded_read_common(nvbench::state& state, auto const stream = streams[index % num_threads]; cudf::io::orc_reader_options read_opts = cudf::io::orc_reader_options::builder(source_info_vector[index]); - cudf::io::read_orc(read_opts, stream, rmm::mr::get_current_device_resource()); + cudf::io::read_orc(read_opts, stream, cudf::get_current_device_resource_ref()); }; threads.pause(); diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp index 7563c823454..ce115fd7723 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp @@ -32,7 +32,8 @@ constexpr cudf::size_type num_cols = 64; void parquet_read_common(cudf::size_type num_rows_to_read, cudf::size_type num_cols_to_read, cuio_source_sink_pair& source_sink, - nvbench::state& state) + nvbench::state& state, + size_t table_data_size = data_size) { cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); @@ -52,7 +53,7 @@ void parquet_read_common(cudf::size_type num_rows_to_read, }); auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); - state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_element_count(static_cast(table_data_size) / time, "bytes_per_second"); state.add_buffer_size( mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); @@ -231,6 +232,70 @@ void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list +void BM_parquet_read_wide_tables(nvbench::state& state, + nvbench::type_list> type_list) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + + auto const n_col = static_cast(state.get_int64("num_cols")); + auto const data_size_bytes = static_cast(state.get_int64("data_size_mb") << 20); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const source_type = io_type::DEVICE_BUFFER; + cuio_source_sink_pair source_sink(source_type); + + auto const num_rows_written = [&]() { + auto const tbl = create_random_table( + cycle_dtypes(d_type, n_col), + table_size_bytes{data_size_bytes}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(write_opts); + return view.num_rows(); + }(); + + parquet_read_common(num_rows_written, n_col, source_sink, state, data_size_bytes); +} + +void BM_parquet_read_wide_tables_mixed(nvbench::state& state) +{ + auto const d_type = []() { + auto d_type1 = get_type_or_group(static_cast(data_type::INTEGRAL)); + auto d_type2 = get_type_or_group(static_cast(data_type::FLOAT)); + d_type1.reserve(d_type1.size() + d_type2.size()); + std::move(d_type2.begin(), d_type2.end(), std::back_inserter(d_type1)); + return d_type1; + }(); + + auto const n_col = static_cast(state.get_int64("num_cols")); + auto const data_size_bytes = static_cast(state.get_int64("data_size_mb") << 20); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const source_type = io_type::DEVICE_BUFFER; + cuio_source_sink_pair source_sink(source_type); + + auto const num_rows_written = [&]() { + auto const tbl = create_random_table( + cycle_dtypes(d_type, n_col), + table_size_bytes{data_size_bytes}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(cudf::io::compression_type::NONE); + cudf::io::write_parquet(write_opts); + return view.num_rows(); + }(); + + parquet_read_common(num_rows_written, n_col, source_sink, state, data_size_bytes); +} + using d_type_list = nvbench::enum_type_list; +NVBENCH_BENCH_TYPES(BM_parquet_read_wide_tables, NVBENCH_TYPE_AXES(d_type_list_wide_table)) + .set_name("parquet_read_wide_tables") + .set_min_samples(4) + .set_type_axes_names({"data_type"}) + .add_int64_axis("data_size_mb", {1024, 2048, 4096}) + .add_int64_axis("num_cols", {256, 512, 1024}) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + +NVBENCH_BENCH(BM_parquet_read_wide_tables_mixed) + .set_name("parquet_read_wide_tables_mixed") + .set_min_samples(4) + .add_int64_axis("data_size_mb", {1024, 2048, 4096}) + .add_int64_axis("num_cols", {256, 512, 1024}) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); + // a benchmark for structs that only contain fixed-width types using d_type_list_struct_only = nvbench::enum_type_list; NVBENCH_BENCH_TYPES(BM_parquet_read_fixed_width_struct, NVBENCH_TYPE_AXES(d_type_list_struct_only)) diff --git a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp index 9e76ebb71ab..7121cb9f034 100644 --- a/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp +++ b/cpp/benchmarks/io/parquet/parquet_reader_multithread.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ std::string get_label(std::string const& test_name, nvbench::state const& state) } std::tuple, size_t, size_t> write_file_data( - nvbench::state& state, std::vector const& d_types) + nvbench::state& state, std::vector const& d_types, io_type io_source_type) { cudf::size_type const cardinality = state.get_int64("cardinality"); cudf::size_type const run_length = state.get_int64("run_length"); @@ -62,7 +63,7 @@ std::tuple, size_t, size_t> write_file_data( size_t total_file_size = 0; for (size_t i = 0; i < num_files; ++i) { - cuio_source_sink_pair source_sink{io_type::HOST_BUFFER}; + cuio_source_sink_pair source_sink{io_source_type}; auto const tbl = create_random_table( cycle_dtypes(d_types, num_cols), @@ -91,11 +92,13 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, { size_t const data_size = state.get_int64("total_data_size"); auto const num_threads = state.get_int64("num_threads"); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); BS::thread_pool threads(num_threads); - auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + auto [source_sink_vector, total_file_size, num_files] = + write_file_data(state, d_types, source_type); std::vector source_info_vector; std::transform(source_sink_vector.begin(), source_sink_vector.end(), @@ -111,7 +114,7 @@ void BM_parquet_multithreaded_read_common(nvbench::state& state, auto const stream = streams[index % num_threads]; cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(source_info_vector[index]); - cudf::io::read_parquet(read_opts, stream, rmm::mr::get_current_device_resource()); + cudf::io::read_parquet(read_opts, stream, cudf::get_current_device_resource_ref()); }; threads.pause(); @@ -172,10 +175,12 @@ void BM_parquet_multithreaded_read_chunked_common(nvbench::state& state, auto const num_threads = state.get_int64("num_threads"); size_t const input_limit = state.get_int64("input_limit"); size_t const output_limit = state.get_int64("output_limit"); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads); BS::thread_pool threads(num_threads); - auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types); + auto [source_sink_vector, total_file_size, num_files] = + write_file_data(state, d_types, source_type); std::vector source_info_vector; std::transform(source_sink_vector.begin(), source_sink_vector.end(), @@ -263,7 +268,8 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_mixed) .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) .add_int64_axis("num_threads", {1, 2, 4, 8}) .add_int64_axis("num_cols", {4}) - .add_int64_axis("run_length", {8}); + .add_int64_axis("run_length", {8}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); NVBENCH_BENCH(BM_parquet_multithreaded_read_fixed_width) .set_name("parquet_multithreaded_read_decode_fixed_width") @@ -272,7 +278,8 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_fixed_width) .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) .add_int64_axis("num_threads", {1, 2, 4, 8}) .add_int64_axis("num_cols", {4}) - .add_int64_axis("run_length", {8}); + .add_int64_axis("run_length", {8}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); NVBENCH_BENCH(BM_parquet_multithreaded_read_string) .set_name("parquet_multithreaded_read_decode_string") @@ -281,7 +288,8 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_string) .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) .add_int64_axis("num_threads", {1, 2, 4, 8}) .add_int64_axis("num_cols", {4}) - .add_int64_axis("run_length", {8}); + .add_int64_axis("run_length", {8}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); NVBENCH_BENCH(BM_parquet_multithreaded_read_list) .set_name("parquet_multithreaded_read_decode_list") @@ -290,7 +298,8 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_list) .add_int64_axis("total_data_size", {512 * 1024 * 1024, 1024 * 1024 * 1024}) .add_int64_axis("num_threads", {1, 2, 4, 8}) .add_int64_axis("num_cols", {4}) - .add_int64_axis("run_length", {8}); + .add_int64_axis("run_length", {8}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); // mixed data types: fixed width, strings NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_mixed) @@ -302,7 +311,8 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_mixed) .add_int64_axis("num_cols", {4}) .add_int64_axis("run_length", {8}) .add_int64_axis("input_limit", {640 * 1024 * 1024}) - .add_int64_axis("output_limit", {640 * 1024 * 1024}); + .add_int64_axis("output_limit", {640 * 1024 * 1024}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_fixed_width) .set_name("parquet_multithreaded_read_decode_chunked_fixed_width") @@ -313,7 +323,8 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_fixed_width) .add_int64_axis("num_cols", {4}) .add_int64_axis("run_length", {8}) .add_int64_axis("input_limit", {640 * 1024 * 1024}) - .add_int64_axis("output_limit", {640 * 1024 * 1024}); + .add_int64_axis("output_limit", {640 * 1024 * 1024}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_string) .set_name("parquet_multithreaded_read_decode_chunked_string") @@ -324,7 +335,8 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_string) .add_int64_axis("num_cols", {4}) .add_int64_axis("run_length", {8}) .add_int64_axis("input_limit", {640 * 1024 * 1024}) - .add_int64_axis("output_limit", {640 * 1024 * 1024}); + .add_int64_axis("output_limit", {640 * 1024 * 1024}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_list) .set_name("parquet_multithreaded_read_decode_chunked_list") @@ -335,4 +347,5 @@ NVBENCH_BENCH(BM_parquet_multithreaded_read_chunked_list) .add_int64_axis("num_cols", {4}) .add_int64_axis("run_length", {8}) .add_int64_axis("input_limit", {640 * 1024 * 1024}) - .add_int64_axis("output_limit", {640 * 1024 * 1024}); + .add_int64_axis("output_limit", {640 * 1024 * 1024}) + .add_string_axis("io_type", {"PINNED_BUFFER"}); diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp index 46d2927a92b..256e50f0e64 100644 --- a/cpp/benchmarks/io/parquet/parquet_writer.cpp +++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp @@ -202,8 +202,8 @@ NVBENCH_BENCH_TYPES(BM_parq_write_encode, NVBENCH_TYPE_AXES(d_type_list)) .set_name("parquet_write_encode") .set_type_axes_names({"data_type"}) .set_min_samples(4) - .add_int64_axis("cardinality", {0, 1000}) - .add_int64_axis("run_length", {1, 32}); + .add_int64_axis("cardinality", {0, 1000, 10'000, 100'000}) + .add_int64_axis("run_length", {1, 8, 32}); NVBENCH_BENCH_TYPES(BM_parq_write_io_compression, NVBENCH_TYPE_AXES(io_list, compression_list)) .set_name("parquet_write_io_compression") diff --git a/cpp/benchmarks/io/utilities/batched_memset_bench.cpp b/cpp/benchmarks/io/utilities/batched_memset_bench.cpp new file mode 100644 index 00000000000..2905895a63b --- /dev/null +++ b/cpp/benchmarks/io/utilities/batched_memset_bench.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include + +#include + +// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to +// run on most GPUs, but large enough to allow highest throughput +constexpr size_t data_size = 512 << 20; + +void parquet_read_common(cudf::size_type num_rows_to_read, + cudf::size_type num_cols_to_read, + cuio_source_sink_pair& source_sink, + nvbench::state& state) +{ + cudf::io::parquet_reader_options read_opts = + cudf::io::parquet_reader_options::builder(source_sink.make_source_info()); + + auto mem_stats_logger = cudf::memory_stats_logger(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + state.exec( + nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) { + try_drop_l3_cache(); + + timer.start(); + auto const result = cudf::io::read_parquet(read_opts); + timer.stop(); + + CUDF_EXPECTS(result.tbl->num_columns() == num_cols_to_read, "Unexpected number of columns"); + CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows"); + }); + + auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value"); + state.add_element_count(static_cast(data_size) / time, "bytes_per_second"); + state.add_buffer_size( + mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage"); + state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size"); +} + +template +void bench_batched_memset(nvbench::state& state, nvbench::type_list>) +{ + auto const d_type = get_type_or_group(static_cast(DataType)); + auto const num_cols = static_cast(state.get_int64("num_cols")); + auto const cardinality = static_cast(state.get_int64("cardinality")); + auto const run_length = static_cast(state.get_int64("run_length")); + auto const source_type = retrieve_io_type_enum(state.get_string("io_type")); + auto const compression = cudf::io::compression_type::NONE; + cuio_source_sink_pair source_sink(source_type); + auto const tbl = + create_random_table(cycle_dtypes(d_type, num_cols), + table_size_bytes{data_size}, + data_profile_builder().cardinality(cardinality).avg_run_length(run_length)); + auto const view = tbl->view(); + + cudf::io::parquet_writer_options write_opts = + cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view) + .compression(compression); + cudf::io::write_parquet(write_opts); + auto const num_rows = view.num_rows(); + + parquet_read_common(num_rows, num_cols, source_sink, state); +} + +using d_type_list = nvbench::enum_type_list; + +NVBENCH_BENCH_TYPES(bench_batched_memset, NVBENCH_TYPE_AXES(d_type_list)) + .set_name("batched_memset") + .set_type_axes_names({"data_type"}) + .add_int64_axis("num_cols", {1000}) + .add_string_axis("io_type", {"DEVICE_BUFFER"}) + .set_min_samples(4) + .add_int64_axis("cardinality", {0, 1000}) + .add_int64_axis("run_length", {1, 32}); diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu index ada7a9bd73d..e2576c0d690 100644 --- a/cpp/benchmarks/iterator/iterator.cu +++ b/cpp/benchmarks/iterator/iterator.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include #include @@ -139,7 +139,7 @@ void BM_iterator(benchmark::State& state) // Initialize dev_result to false auto dev_result = cudf::detail::make_zeroed_device_uvector_sync( - 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + 1, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); for (auto _ : state) { cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { @@ -161,68 +161,6 @@ void BM_iterator(benchmark::State& state) sizeof(TypeParam)); } -// operator+ defined for pair iterator reduction -template -__device__ thrust::pair operator+(thrust::pair lhs, thrust::pair rhs) -{ - return thrust::pair{lhs.first * lhs.second + rhs.first * rhs.second, - lhs.second + rhs.second}; -} -// ----------------------------------------------------------------------------- -template -void pair_iterator_bench_cub(cudf::column_view& col, - rmm::device_uvector>& result) -{ - thrust::pair init{0, false}; - auto d_col = cudf::column_device_view::create(col); - int num_items = col.size(); - auto begin = d_col->pair_begin(); - reduce_by_cub(result.begin(), begin, num_items, init); -} - -template -void pair_iterator_bench_thrust(cudf::column_view& col, - rmm::device_uvector>& result) -{ - thrust::pair init{0, false}; - auto d_col = cudf::column_device_view::create(col); - auto d_in = d_col->pair_begin(); - auto d_end = d_in + col.size(); - thrust::reduce(thrust::device, d_in, d_end, init, cudf::DeviceSum{}); -} - -template -void BM_pair_iterator(benchmark::State& state) -{ - cudf::size_type const column_size{(cudf::size_type)state.range(0)}; - using T = TypeParam; - auto num_gen = thrust::counting_iterator(0); - auto null_gen = - thrust::make_transform_iterator(num_gen, [](cudf::size_type row) { return row % 2 == 0; }); - - cudf::test::fixed_width_column_wrapper wrap_hasnull_F(num_gen, num_gen + column_size); - cudf::test::fixed_width_column_wrapper wrap_hasnull_T( - num_gen, num_gen + column_size, null_gen); - cudf::column_view hasnull_F = wrap_hasnull_F; - cudf::column_view hasnull_T = wrap_hasnull_T; - - // Initialize dev_result to false - auto dev_result = cudf::detail::make_zeroed_device_uvector_sync>( - 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); - for (auto _ : state) { - cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - if (cub_or_thrust) { - pair_iterator_bench_cub(hasnull_T, - dev_result); // driven by pair iterator with nulls - } else { - pair_iterator_bench_thrust(hasnull_T, - dev_result); // driven by pair iterator with nulls - } - } - state.SetBytesProcessed(static_cast(state.iterations()) * column_size * - sizeof(TypeParam)); -} - #define ITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust, raw_or_iterator) \ BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state) \ { \ @@ -238,17 +176,3 @@ ITER_BM_BENCHMARK_DEFINE(double_cub_raw, double, true, true); ITER_BM_BENCHMARK_DEFINE(double_cub_iter, double, true, false); ITER_BM_BENCHMARK_DEFINE(double_thrust_raw, double, false, true); ITER_BM_BENCHMARK_DEFINE(double_thrust_iter, double, false, false); - -#define PAIRITER_BM_BENCHMARK_DEFINE(name, type, cub_or_thrust) \ - BENCHMARK_DEFINE_F(Iterator, name)(::benchmark::State & state) \ - { \ - BM_pair_iterator(state); \ - } \ - BENCHMARK_REGISTER_F(Iterator, name) \ - ->RangeMultiplier(10) \ - ->Range(1000, 10000000) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -PAIRITER_BM_BENCHMARK_DEFINE(double_cub_pair, double, true); -PAIRITER_BM_BENCHMARK_DEFINE(double_thrust_pair, double, false); diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh index f7984b29d6b..75bbe8174d3 100644 --- a/cpp/benchmarks/join/generate_input_tables.cuh +++ b/cpp/benchmarks/join/generate_input_tables.cuh @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -150,13 +151,8 @@ void generate_input_tables(key_type* const build_tbl, CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor( &num_blocks_init_probe_tbl, init_probe_tbl, block_size, 0)); - int dev_id{-1}; - CUDF_CUDA_TRY(cudaGetDevice(&dev_id)); - - int num_sms{-1}; - CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, dev_id)); - - int const num_states = + auto const num_sms = cudf::detail::num_multiprocessors(); + auto const num_states = num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size; rmm::device_uvector devStates(num_states, cudf::get_default_stream()); diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 3d9d9c57548..1f1ca414ad1 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -86,7 +87,7 @@ void BM_join(state_type& state, Join JoinFunc) validity + size, thrust::identity{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); }; std::unique_ptr right_key_column0 = [&]() { diff --git a/cpp/benchmarks/json/json.cu b/cpp/benchmarks/json/json.cu index 06b793bf5f1..6d01f132189 100644 --- a/cpp/benchmarks/json/json.cu +++ b/cpp/benchmarks/json/json.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -171,7 +172,7 @@ auto build_json_string_column(int desired_bytes, int num_rows) json_benchmark_row_builder jb{ desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order}; auto [offsets, chars] = cudf::strings::detail::make_strings_children( - jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + jb, num_rows, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); return cudf::make_strings_column(num_rows, std::move(offsets), chars.release(), 0, {}); } diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu index 570decf410f..526a43d9ff5 100644 --- a/cpp/benchmarks/lists/copying/scatter_lists.cu +++ b/cpp/benchmarks/lists/copying/scatter_lists.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,7 @@ template void BM_lists_scatter(::benchmark::State& state) { auto stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); cudf::size_type const base_size{(cudf::size_type)state.range(0)}; cudf::size_type const num_elements_per_row{(cudf::size_type)state.range(1)}; diff --git a/cpp/benchmarks/lists/set_operations.cpp b/cpp/benchmarks/lists/set_operations.cpp index 6bed33d2570..8a94227c23b 100644 --- a/cpp/benchmarks/lists/set_operations.cpp +++ b/cpp/benchmarks/lists/set_operations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include @@ -55,7 +56,7 @@ void nvbench_set_op(nvbench::state& state, BenchFuncPtr bfunc) cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); }); } diff --git a/cpp/benchmarks/merge/merge_lists.cpp b/cpp/benchmarks/merge/merge_lists.cpp index bcb9f10ac83..2fe8b02055b 100644 --- a/cpp/benchmarks/merge/merge_lists.cpp +++ b/cpp/benchmarks/merge/merge_lists.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include +#include #include @@ -27,11 +28,11 @@ void nvbench_merge_list(nvbench::state& state) auto const input1 = create_lists_data(state); auto const sorted_input1 = - cudf::detail::sort(*input1, {}, {}, stream, rmm::mr::get_current_device_resource()); + cudf::detail::sort(*input1, {}, {}, stream, cudf::get_current_device_resource_ref()); auto const input2 = create_lists_data(state); auto const sorted_input2 = - cudf::detail::sort(*input2, {}, {}, stream, rmm::mr::get_current_device_resource()); + cudf::detail::sort(*input2, {}, {}, stream, cudf::get_current_device_resource_ref()); stream.synchronize(); @@ -43,7 +44,7 @@ void nvbench_merge_list(nvbench::state& state) {cudf::order::ASCENDING}, {}, stream_view, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); }); } diff --git a/cpp/benchmarks/merge/merge_structs.cpp b/cpp/benchmarks/merge/merge_structs.cpp index 9c56b44b623..cfb44d2737f 100644 --- a/cpp/benchmarks/merge/merge_structs.cpp +++ b/cpp/benchmarks/merge/merge_structs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include +#include #include @@ -27,11 +28,11 @@ void nvbench_merge_struct(nvbench::state& state) auto const input1 = create_structs_data(state); auto const sorted_input1 = - cudf::detail::sort(*input1, {}, {}, stream, rmm::mr::get_current_device_resource()); + cudf::detail::sort(*input1, {}, {}, stream, cudf::get_current_device_resource_ref()); auto const input2 = create_structs_data(state); auto const sorted_input2 = - cudf::detail::sort(*input2, {}, {}, stream, rmm::mr::get_current_device_resource()); + cudf::detail::sort(*input2, {}, {}, stream, cudf::get_current_device_resource_ref()); stream.synchronize(); @@ -43,7 +44,7 @@ void nvbench_merge_struct(nvbench::state& state) {cudf::order::ASCENDING}, {}, stream_view, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); }); } diff --git a/cpp/benchmarks/ndsh/README.md b/cpp/benchmarks/ndsh/README.md new file mode 100644 index 00000000000..0a462e1684e --- /dev/null +++ b/cpp/benchmarks/ndsh/README.md @@ -0,0 +1,11 @@ +# NDS-H Benchmarks for `libcudf` + +## Disclaimer + +NDS-H is derived from the TPC-H Benchmarks and as such any results obtained using NDS-H are not +comparable to published TPC-H Benchmark results, as the results obtained from using NDS-H do not +comply with the TPC-H Benchmarks. + +## Current Status + +For now, only Q1, Q5, Q6, Q9, and Q10 have been implemented diff --git a/cpp/examples/tpch/q1.cpp b/cpp/benchmarks/ndsh/q01.cpp similarity index 79% rename from cpp/examples/tpch/q1.cpp rename to cpp/benchmarks/ndsh/q01.cpp index 1bdf039da4a..ef709926ae9 100644 --- a/cpp/examples/tpch/q1.cpp +++ b/cpp/benchmarks/ndsh/q01.cpp @@ -14,16 +14,19 @@ * limitations under the License. */ -#include "../utilities/timer.hpp" -#include "utils.hpp" +#include "utilities.hpp" #include +#include #include #include +#include + +#include /** - * @file q1.cpp - * @brief Implement query 1 of the TPC-H benchmark. + * @file q01.cpp + * @brief Implement query 1 of the NDS-H benchmark. * * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; * @@ -58,11 +61,11 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -[[nodiscard]] std::unique_ptr calc_disc_price( +[[nodiscard]] std::unique_ptr calculate_disc_price( cudf::column_view const& discount, cudf::column_view const& extendedprice, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto const one = cudf::numeric_scalar(1); auto const one_minus_discount = @@ -85,11 +88,11 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -[[nodiscard]] std::unique_ptr calc_charge( +[[nodiscard]] std::unique_ptr calculate_charge( cudf::column_view const& tax, cudf::column_view const& disc_price, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto const one = cudf::numeric_scalar(1); auto const one_plus_tax = @@ -100,16 +103,9 @@ return charge; } -int main(int argc, char const** argv) +void run_ndsh_q1(nvbench::state& state, + std::unordered_map& sources) { - auto const args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - cudf::examples::timer timer; - // Define the column projections and filter predicate for `lineitem` table std::vector const lineitem_cols = {"l_returnflag", "l_linestatus", @@ -124,17 +120,17 @@ int main(int argc, char const** argv) auto shipdate_upper = cudf::timestamp_scalar(days_since_epoch(1998, 9, 2), true); auto const shipdate_upper_literal = cudf::ast::literal(shipdate_upper); - auto lineitem_pred = std::make_unique( + auto const lineitem_pred = std::make_unique( cudf::ast::ast_operator::LESS_EQUAL, shipdate_ref, shipdate_upper_literal); // Read out the `lineitem` table from parquet file auto lineitem = - read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred)); + read_parquet(sources["lineitem"].make_source_info(), lineitem_cols, std::move(lineitem_pred)); // Calculate the discount price and charge columns and append to lineitem table auto disc_price = - calc_disc_price(lineitem->column("l_discount"), lineitem->column("l_extendedprice")); - auto charge = calc_charge(lineitem->column("l_tax"), disc_price->view()); + calculate_disc_price(lineitem->column("l_discount"), lineitem->column("l_extendedprice")); + auto charge = calculate_charge(lineitem->column("l_tax"), disc_price->view()); (*lineitem).append(disc_price, "disc_price").append(charge, "charge"); // Perform the group by operation @@ -166,9 +162,21 @@ int main(int argc, char const** argv) {"l_returnflag", "l_linestatus"}, {cudf::order::ASCENDING, cudf::order::ASCENDING}); - timer.print_elapsed_millis(); - // Write query result to a parquet file orderedby_table->to_parquet("q1.parquet"); - return 0; } + +void ndsh_q1(nvbench::state& state) +{ + // Generate the required parquet files in device buffers + double const scale_factor = state.get_float64("scale_factor"); + std::unordered_map sources; + generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { run_ndsh_q1(state, sources); }); +} + +NVBENCH_BENCH(ndsh_q1).set_name("ndsh_q1").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/examples/tpch/q5.cpp b/cpp/benchmarks/ndsh/q05.cpp similarity index 73% rename from cpp/examples/tpch/q5.cpp rename to cpp/benchmarks/ndsh/q05.cpp index e56850b94d6..522bc4789c2 100644 --- a/cpp/examples/tpch/q5.cpp +++ b/cpp/benchmarks/ndsh/q05.cpp @@ -14,16 +14,19 @@ * limitations under the License. */ -#include "../utilities/timer.hpp" -#include "utils.hpp" +#include "utilities.hpp" #include +#include #include #include +#include + +#include /** - * @file q5.cpp - * @brief Implement query 5 of the TPC-H benchmark. + * @file q05.cpp + * @brief Implement query 5 of the NDS-H benchmark. * * create view customer as select * from '/tables/scale-1/customer.parquet'; * create view orders as select * from '/tables/scale-1/orders.parquet'; @@ -44,14 +47,14 @@ * region * where * c_custkey = o_custkey - * and l_orderkey = o_orderkey - * and l_suppkey = s_suppkey - * and c_nationkey = s_nationkey - * and s_nationkey = n_nationkey - * and n_regionkey = r_regionkey - * and r_name = 'ASIA' - * and o_orderdate >= date '1994-01-01' - * and o_orderdate < date '1995-01-01' + * and l_orderkey = o_orderkey + * and l_suppkey = s_suppkey + * and c_nationkey = s_nationkey + * and s_nationkey = n_nationkey + * and n_regionkey = r_regionkey + * and r_name = 'ASIA' + * and o_orderdate >= date '1994-01-01' + * and o_orderdate < date '1995-01-01' * group by * n_name * order by @@ -66,11 +69,11 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -[[nodiscard]] std::unique_ptr calc_revenue( +[[nodiscard]] std::unique_ptr calculate_revenue( cudf::column_view const& extendedprice, cudf::column_view const& discount, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto const one = cudf::numeric_scalar(1); auto const one_minus_discount = @@ -85,16 +88,9 @@ return revenue; } -int main(int argc, char const** argv) +void run_ndsh_q5(nvbench::state& state, + std::unordered_map& sources) { - auto const args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - cudf::examples::timer timer; - // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; auto const o_orderdate_ref = cudf::ast::column_reference(std::distance( @@ -109,7 +105,7 @@ int main(int argc, char const** argv) auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); auto const o_orderdate_pred_upper = cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit); - auto orders_pred = std::make_unique( + auto const orders_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper); // Define the column projection and filter predicate for the `region` table @@ -118,23 +114,23 @@ int main(int argc, char const** argv) region_cols.begin(), std::find(region_cols.begin(), region_cols.end(), "r_name"))); auto r_name_value = cudf::string_scalar("ASIA"); auto const r_name_literal = cudf::ast::literal(r_name_value); - auto region_pred = std::make_unique( + auto const region_pred = std::make_unique( cudf::ast::ast_operator::EQUAL, r_name_ref, r_name_literal); // Read out the tables from parquet files // while pushing down the column projections and filter predicates auto const customer = - read_parquet(args.dataset_dir + "/customer.parquet", {"c_custkey", "c_nationkey"}); + read_parquet(sources["customer"].make_source_info(), {"c_custkey", "c_nationkey"}); auto const orders = - read_parquet(args.dataset_dir + "/orders.parquet", orders_cols, std::move(orders_pred)); - auto const lineitem = read_parquet(args.dataset_dir + "/lineitem.parquet", + read_parquet(sources["orders"].make_source_info(), orders_cols, std::move(orders_pred)); + auto const lineitem = read_parquet(sources["lineitem"].make_source_info(), {"l_orderkey", "l_suppkey", "l_extendedprice", "l_discount"}); auto const supplier = - read_parquet(args.dataset_dir + "/supplier.parquet", {"s_suppkey", "s_nationkey"}); + read_parquet(sources["supplier"].make_source_info(), {"s_suppkey", "s_nationkey"}); auto const nation = - read_parquet(args.dataset_dir + "/nation.parquet", {"n_nationkey", "n_regionkey", "n_name"}); + read_parquet(sources["nation"].make_source_info(), {"n_nationkey", "n_regionkey", "n_name"}); auto const region = - read_parquet(args.dataset_dir + "/region.parquet", region_cols, std::move(region_pred)); + read_parquet(sources["region"].make_source_info(), region_cols, std::move(region_pred)); // Perform the joins auto const join_a = apply_inner_join(region, nation, {"r_regionkey"}, {"n_regionkey"}); @@ -146,7 +142,7 @@ int main(int argc, char const** argv) // Calculate and append the `revenue` column auto revenue = - calc_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount")); + calculate_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount")); (*joined_table).append(revenue, "revenue"); // Perform the groupby operation @@ -161,9 +157,22 @@ int main(int argc, char const** argv) auto const orderedby_table = apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); - timer.print_elapsed_millis(); - // Write query result to a parquet file orderedby_table->to_parquet("q5.parquet"); - return 0; } + +void ndsh_q5(nvbench::state& state) +{ + // Generate the required parquet files in device buffers + double const scale_factor = state.get_float64("scale_factor"); + std::unordered_map sources; + generate_parquet_data_sources( + scale_factor, {"customer", "orders", "lineitem", "supplier", "nation", "region"}, sources); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { run_ndsh_q5(state, sources); }); +} + +NVBENCH_BENCH(ndsh_q5).set_name("ndsh_q5").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/examples/tpch/q6.cpp b/cpp/benchmarks/ndsh/q06.cpp similarity index 76% rename from cpp/examples/tpch/q6.cpp rename to cpp/benchmarks/ndsh/q06.cpp index f11b3d6ab3b..04078547973 100644 --- a/cpp/examples/tpch/q6.cpp +++ b/cpp/benchmarks/ndsh/q06.cpp @@ -14,16 +14,20 @@ * limitations under the License. */ -#include "../utilities/timer.hpp" -#include "utils.hpp" +#include "utilities.hpp" #include +#include #include #include +#include +#include + +#include /** - * @file q6.cpp - * @brief Implement query 6 of the TPC-H benchmark. + * @file q06.cpp + * @brief Implement query 6 of the NDS-H benchmark. * * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; * @@ -47,11 +51,11 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -[[nodiscard]] std::unique_ptr calc_revenue( +[[nodiscard]] std::unique_ptr calculate_revenue( cudf::column_view const& extendedprice, cudf::column_view const& discount, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto const revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; auto revenue = cudf::binary_operation( @@ -59,16 +63,9 @@ return revenue; } -int main(int argc, char const** argv) +void run_ndsh_q6(nvbench::state& state, + std::unordered_map& sources) { - auto const args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - cudf::examples::timer timer; - // Read out the `lineitem` table from parquet file std::vector const lineitem_cols = { "l_extendedprice", "l_discount", "l_shipdate", "l_quantity"}; @@ -84,10 +81,10 @@ int main(int argc, char const** argv) cudf::ast::ast_operator::GREATER_EQUAL, shipdate_ref, shipdate_lower_literal); auto const shipdate_pred_b = cudf::ast::operation(cudf::ast::ast_operator::LESS, shipdate_ref, shipdate_upper_literal); - auto lineitem_pred = std::make_unique( + auto const lineitem_pred = std::make_unique( cudf::ast::ast_operator::LOGICAL_AND, shipdate_pred_a, shipdate_pred_b); auto lineitem = - read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred)); + read_parquet(sources["lineitem"].make_source_info(), lineitem_cols, std::move(lineitem_pred)); // Cast the discount and quantity columns to float32 and append to lineitem table auto discout_float = @@ -98,8 +95,8 @@ int main(int argc, char const** argv) (*lineitem).append(discout_float, "l_discount_float").append(quantity_float, "l_quantity_float"); // Apply the filters - auto const discount_ref = cudf::ast::column_reference(lineitem->col_id("l_discount_float")); - auto const quantity_ref = cudf::ast::column_reference(lineitem->col_id("l_quantity_float")); + auto const discount_ref = cudf::ast::column_reference(lineitem->column_id("l_discount_float")); + auto const quantity_ref = cudf::ast::column_reference(lineitem->column_id("l_quantity_float")); auto discount_lower = cudf::numeric_scalar(0.05); auto const discount_lower_literal = cudf::ast::literal(discount_lower); @@ -122,16 +119,28 @@ int main(int argc, char const** argv) auto const filtered_table = apply_filter(lineitem, discount_quantity_pred); // Calculate the `revenue` column - auto revenue = - calc_revenue(filtered_table->column("l_extendedprice"), filtered_table->column("l_discount")); + auto revenue = calculate_revenue(filtered_table->column("l_extendedprice"), + filtered_table->column("l_discount")); // Sum the `revenue` column auto const revenue_view = revenue->view(); auto const result_table = apply_reduction(revenue_view, cudf::aggregation::Kind::SUM, "revenue"); - timer.print_elapsed_millis(); - // Write query result to a parquet file result_table->to_parquet("q6.parquet"); - return 0; } + +void ndsh_q6(nvbench::state& state) +{ + // Generate the required parquet files in device buffers + double const scale_factor = state.get_float64("scale_factor"); + std::unordered_map sources; + generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { run_ndsh_q6(state, sources); }); +} + +NVBENCH_BENCH(ndsh_q6).set_name("ndsh_q6").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/examples/tpch/q9.cpp b/cpp/benchmarks/ndsh/q09.cpp similarity index 77% rename from cpp/examples/tpch/q9.cpp rename to cpp/benchmarks/ndsh/q09.cpp index d3c218253f9..59218ab8912 100644 --- a/cpp/examples/tpch/q9.cpp +++ b/cpp/benchmarks/ndsh/q09.cpp @@ -14,18 +14,21 @@ * limitations under the License. */ -#include "../utilities/timer.hpp" -#include "utils.hpp" +#include "utilities.hpp" +#include #include #include #include #include #include +#include + +#include /** - * @file q9.cpp - * @brief Implement query 9 of the TPC-H benchmark. + * @file q09.cpp + * @brief Implement query 9 of the NDS-H benchmark. * * create view part as select * from '/tables/scale-1/part.parquet'; * create view supplier as select * from '/tables/scale-1/supplier.parquet'; @@ -78,13 +81,13 @@ * @param stream The CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. */ -[[nodiscard]] std::unique_ptr calc_amount( +[[nodiscard]] std::unique_ptr calculate_amount( cudf::column_view const& discount, cudf::column_view const& extendedprice, cudf::column_view const& supplycost, cudf::column_view const& quantity, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto const one = cudf::numeric_scalar(1); auto const one_minus_discount = @@ -108,28 +111,21 @@ return amount; } -int main(int argc, char const** argv) +void run_ndsh_q9(nvbench::state& state, + std::unordered_map& sources) { - auto const args = parse_args(argc, argv); - - // Use a memory pool - auto resource = create_memory_resource(args.memory_resource_type); - rmm::mr::set_current_device_resource(resource.get()); - - cudf::examples::timer timer; - // Read out the table from parquet files auto const lineitem = read_parquet( - args.dataset_dir + "/lineitem.parquet", + sources["lineitem"].make_source_info(), {"l_suppkey", "l_partkey", "l_orderkey", "l_extendedprice", "l_discount", "l_quantity"}); - auto const nation = read_parquet(args.dataset_dir + "/nation.parquet", {"n_nationkey", "n_name"}); + auto const nation = read_parquet(sources["nation"].make_source_info(), {"n_nationkey", "n_name"}); auto const orders = - read_parquet(args.dataset_dir + "/orders.parquet", {"o_orderkey", "o_orderdate"}); - auto const part = read_parquet(args.dataset_dir + "/part.parquet", {"p_partkey", "p_name"}); - auto const partsupp = read_parquet(args.dataset_dir + "/partsupp.parquet", + read_parquet(sources["orders"].make_source_info(), {"o_orderkey", "o_orderdate"}); + auto const part = read_parquet(sources["part"].make_source_info(), {"p_partkey", "p_name"}); + auto const partsupp = read_parquet(sources["partsupp"].make_source_info(), {"ps_suppkey", "ps_partkey", "ps_supplycost"}); auto const supplier = - read_parquet(args.dataset_dir + "/supplier.parquet", {"s_suppkey", "s_nationkey"}); + read_parquet(sources["supplier"].make_source_info(), {"s_suppkey", "s_nationkey"}); // Generating the `profit` table // Filter the part table using `p_name like '%green%'` @@ -149,10 +145,10 @@ int main(int argc, char const** argv) // Calculate the `nation`, `o_year`, and `amount` columns auto n_name = std::make_unique(joined_table->column("n_name")); auto o_year = cudf::datetime::extract_year(joined_table->column("o_orderdate")); - auto amount = calc_amount(joined_table->column("l_discount"), - joined_table->column("l_extendedprice"), - joined_table->column("ps_supplycost"), - joined_table->column("l_quantity")); + auto amount = calculate_amount(joined_table->column("l_discount"), + joined_table->column("l_extendedprice"), + joined_table->column("ps_supplycost"), + joined_table->column("l_quantity")); // Put together the `profit` table std::vector> profit_columns; @@ -174,9 +170,22 @@ int main(int argc, char const** argv) auto const orderedby_table = apply_orderby( groupedby_table, {"nation", "o_year"}, {cudf::order::ASCENDING, cudf::order::DESCENDING}); - timer.print_elapsed_millis(); - // Write query result to a parquet file orderedby_table->to_parquet("q9.parquet"); - return 0; } + +void ndsh_q9(nvbench::state& state) +{ + // Generate the required parquet files in device buffers + double const scale_factor = state.get_float64("scale_factor"); + std::unordered_map sources; + generate_parquet_data_sources( + scale_factor, {"part", "supplier", "lineitem", "partsupp", "orders", "nation"}, sources); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { run_ndsh_q9(state, sources); }); +} + +NVBENCH_BENCH(ndsh_q9).set_name("ndsh_q9").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/benchmarks/ndsh/q10.cpp b/cpp/benchmarks/ndsh/q10.cpp new file mode 100644 index 00000000000..a520480020a --- /dev/null +++ b/cpp/benchmarks/ndsh/q10.cpp @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utilities.hpp" + +#include +#include +#include +#include +#include + +#include + +/** + * @file q10.cpp + * @brief Implement query 10 of the NDS-H benchmark. + * + * create view customer as select * from '/tables/scale-1/customer.parquet'; + * create view orders as select * from '/tables/scale-1/orders.parquet'; + * create view lineitem as select * from '/tables/scale-1/lineitem.parquet'; + * create view nation as select * from '/tables/scale-1/nation.parquet'; + * + * select + * c_custkey, + * c_name, + * sum(l_extendedprice * (1 - l_discount)) as revenue, + * c_acctbal, + * n_name, + * c_address, + * c_phone, + * c_comment + * from + * customer, + * orders, + * lineitem, + * nation + * where + * c_custkey = o_custkey + * and l_orderkey = o_orderkey + * and o_orderdate >= date '1993-10-01' + * and o_orderdate < date '1994-01-01' + * and l_returnflag = 'R' + * and c_nationkey = n_nationkey + * group by + * c_custkey, + * c_name, + * c_acctbal, + * c_phone, + * n_name, + * c_address, + * c_comment + * order by + * revenue desc; + */ + +/** + * @brief Calculate the revenue column + * + * @param extendedprice The extended price column + * @param discount The discount column + * @param stream The CUDA stream used for device memory operations and kernel launches. + * @param mr Device memory resource used to allocate the returned column's device memory. + */ +[[nodiscard]] std::unique_ptr calculate_revenue( + cudf::column_view const& extendedprice, + cudf::column_view const& discount, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) +{ + auto const one = cudf::numeric_scalar(1); + auto const one_minus_discount = + cudf::binary_operation(one, discount, cudf::binary_operator::SUB, discount.type(), stream, mr); + auto const revenue_type = cudf::data_type{cudf::type_id::FLOAT64}; + auto revenue = cudf::binary_operation(extendedprice, + one_minus_discount->view(), + cudf::binary_operator::MUL, + revenue_type, + stream, + mr); + return revenue; +} + +void run_ndsh_q10(nvbench::state& state, + std::unordered_map& sources) +{ + // Define the column projection and filter predicate for the `orders` table + std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; + auto const o_orderdate_ref = cudf::ast::column_reference(std::distance( + orders_cols.begin(), std::find(orders_cols.begin(), orders_cols.end(), "o_orderdate"))); + auto o_orderdate_lower = + cudf::timestamp_scalar(days_since_epoch(1993, 10, 1), true); + auto const o_orderdate_lower_limit = cudf::ast::literal(o_orderdate_lower); + auto const o_orderdate_pred_lower = cudf::ast::operation( + cudf::ast::ast_operator::GREATER_EQUAL, o_orderdate_ref, o_orderdate_lower_limit); + auto o_orderdate_upper = + cudf::timestamp_scalar(days_since_epoch(1994, 1, 1), true); + auto const o_orderdate_upper_limit = cudf::ast::literal(o_orderdate_upper); + auto const o_orderdate_pred_upper = + cudf::ast::operation(cudf::ast::ast_operator::LESS, o_orderdate_ref, o_orderdate_upper_limit); + auto const orders_pred = std::make_unique( + cudf::ast::ast_operator::LOGICAL_AND, o_orderdate_pred_lower, o_orderdate_pred_upper); + + auto const l_returnflag_ref = cudf::ast::column_reference(3); + auto r_scalar = cudf::string_scalar("R"); + auto const r_literal = cudf::ast::literal(r_scalar); + auto const lineitem_pred = std::make_unique( + cudf::ast::ast_operator::EQUAL, l_returnflag_ref, r_literal); + + // Read out the tables from parquet files + // while pushing down the column projections and filter predicates + auto const customer = read_parquet( + sources["customer"].make_source_info(), + {"c_custkey", "c_name", "c_nationkey", "c_acctbal", "c_address", "c_phone", "c_comment"}); + auto const orders = + read_parquet(sources["orders"].make_source_info(), orders_cols, std::move(orders_pred)); + auto const lineitem = + read_parquet(sources["lineitem"].make_source_info(), + {"l_extendedprice", "l_discount", "l_orderkey", "l_returnflag"}, + std::move(lineitem_pred)); + auto const nation = read_parquet(sources["nation"].make_source_info(), {"n_name", "n_nationkey"}); + + // Perform the joins + auto const join_a = apply_inner_join(customer, nation, {"c_nationkey"}, {"n_nationkey"}); + auto const join_b = apply_inner_join(lineitem, orders, {"l_orderkey"}, {"o_orderkey"}); + auto const joined_table = apply_inner_join(join_a, join_b, {"c_custkey"}, {"o_custkey"}); + + // Calculate and append the `revenue` column + auto revenue = + calculate_revenue(joined_table->column("l_extendedprice"), joined_table->column("l_discount")); + (*joined_table).append(revenue, "revenue"); + + // Perform the groupby operation + auto const groupedby_table = apply_groupby( + joined_table, + groupby_context_t{ + {"c_custkey", "c_name", "c_acctbal", "c_phone", "n_name", "c_address", "c_comment"}, + { + {"revenue", {{cudf::aggregation::Kind::SUM, "revenue"}}}, + }}); + + // Perform the order by operation + auto const orderedby_table = + apply_orderby(groupedby_table, {"revenue"}, {cudf::order::DESCENDING}); + + // Write query result to a parquet file + orderedby_table->to_parquet("q10.parquet"); +} + +void ndsh_q10(nvbench::state& state) +{ + // Generate the required parquet files in device buffers + double const scale_factor = state.get_float64("scale_factor"); + std::unordered_map sources; + generate_parquet_data_sources( + scale_factor, {"customer", "orders", "lineitem", "nation"}, sources); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch& launch) { run_ndsh_q10(state, sources); }); +} + +NVBENCH_BENCH(ndsh_q10).set_name("ndsh_q10").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/benchmarks/ndsh/utilities.cpp b/cpp/benchmarks/ndsh/utilities.cpp new file mode 100644 index 00000000000..62116ddf661 --- /dev/null +++ b/cpp/benchmarks/ndsh/utilities.cpp @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utilities.hpp" + +#include "common/ndsh_data_generator/ndsh_data_generator.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace { + +std::vector const ORDERS_SCHEMA = {"o_orderkey", + "o_custkey", + "o_orderstatus", + "o_totalprice", + "o_orderdate", + "o_orderpriority", + "o_clerk", + "o_shippriority", + "o_comment"}; +std::vector const LINEITEM_SCHEMA = {"l_orderkey", + "l_partkey", + "l_suppkey", + "l_linenumber", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_tax", + "l_returnflag", + "l_linestatus", + "l_shipdate", + "l_commitdate", + "l_receiptdate", + "l_shipinstruct", + "l_shipmode", + "l_comment"}; +std::vector const PART_SCHEMA = {"p_partkey", + "p_name", + "p_mfgr", + "p_brand", + "p_type", + "p_size", + "p_container", + "p_retailprice", + "p_comment"}; +std::vector const PARTSUPP_SCHEMA = { + "ps_partkey", "ps_suppkey", "ps_availqty", "ps_supplycost", "ps_comment"}; +std::vector const SUPPLIER_SCHEMA = { + "s_suppkey", "s_name", "s_address", "s_nationkey", "s_phone", "s_acctbal", "s_comment"}; +std::vector const CUSTOMER_SCHEMA = {"c_custkey", + "c_name", + "c_address", + "c_nationkey", + "c_phone", + "c_acctbal", + "c_mktsegment", + "c_comment"}; +std::vector const NATION_SCHEMA = { + "n_nationkey", "n_name", "n_regionkey", "n_comment"}; +std::vector const REGION_SCHEMA = {"r_regionkey", "r_name", "r_comment"}; + +} // namespace + +cudf::table_view table_with_names::table() const { return tbl->view(); } + +cudf::column_view table_with_names::column(std::string const& col_name) const +{ + return tbl->view().column(column_id(col_name)); +} + +std::vector const& table_with_names::column_names() const { return col_names; } + +cudf::size_type table_with_names::column_id(std::string const& col_name) const +{ + auto it = std::find(col_names.begin(), col_names.end(), col_name); + if (it == col_names.end()) { + std::string err_msg = "Column `" + col_name + "` not found"; + throw std::runtime_error(err_msg); + } + return std::distance(col_names.begin(), it); +} + +table_with_names& table_with_names::append(std::unique_ptr& col, + std::string const& col_name) +{ + auto cols = tbl->release(); + cols.push_back(std::move(col)); + tbl = std::make_unique(std::move(cols)); + col_names.push_back(col_name); + return (*this); +} + +cudf::table_view table_with_names::select(std::vector const& col_names) const +{ + CUDF_FUNC_RANGE(); + std::vector col_indices; + for (auto const& col_name : col_names) { + col_indices.push_back(column_id(col_name)); + } + return tbl->select(col_indices); +} + +void table_with_names::to_parquet(std::string const& filepath) const +{ + CUDF_FUNC_RANGE(); + auto const sink_info = cudf::io::sink_info(filepath); + cudf::io::table_metadata metadata; + metadata.schema_info = + std::vector(col_names.begin(), col_names.end()); + auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; + auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); + builder.metadata(table_input_metadata); + auto const options = builder.build(); + cudf::io::write_parquet(options); +} + +std::unique_ptr join_and_gather(cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls) +{ + CUDF_FUNC_RANGE(); + constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; + auto const left_selected = left_input.select(left_on); + auto const right_selected = right_input.select(right_on); + auto const [left_join_indices, right_join_indices] = + cudf::inner_join(left_selected, + right_selected, + compare_nulls, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + auto const left_indices_span = cudf::device_span{*left_join_indices}; + auto const right_indices_span = cudf::device_span{*right_join_indices}; + + auto const left_indices_col = cudf::column_view{left_indices_span}; + auto const right_indices_col = cudf::column_view{right_indices_span}; + + auto const left_result = cudf::gather(left_input, left_indices_col, oob_policy); + auto const right_result = cudf::gather(right_input, right_indices_col, oob_policy); + + auto joined_cols = left_result->release(); + auto right_cols = right_result->release(); + joined_cols.insert(joined_cols.end(), + std::make_move_iterator(right_cols.begin()), + std::make_move_iterator(right_cols.end())); + return std::make_unique(std::move(joined_cols)); +} + +std::unique_ptr apply_inner_join( + std::unique_ptr const& left_input, + std::unique_ptr const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls) +{ + CUDF_FUNC_RANGE(); + std::vector left_on_indices; + std::vector right_on_indices; + std::transform( + left_on.begin(), left_on.end(), std::back_inserter(left_on_indices), [&](auto const& col_name) { + return left_input->column_id(col_name); + }); + std::transform(right_on.begin(), + right_on.end(), + std::back_inserter(right_on_indices), + [&](auto const& col_name) { return right_input->column_id(col_name); }); + auto table = join_and_gather( + left_input->table(), right_input->table(), left_on_indices, right_on_indices, compare_nulls); + ; + std::vector merged_column_names; + merged_column_names.reserve(left_input->column_names().size() + + right_input->column_names().size()); + std::copy(left_input->column_names().begin(), + left_input->column_names().end(), + std::back_inserter(merged_column_names)); + std::copy(right_input->column_names().begin(), + right_input->column_names().end(), + std::back_inserter(merged_column_names)); + return std::make_unique(std::move(table), merged_column_names); + return std::make_unique(std::move(table), merged_column_names); +} + +std::unique_ptr apply_filter(std::unique_ptr const& table, + cudf::ast::operation const& predicate) +{ + CUDF_FUNC_RANGE(); + auto const boolean_mask = cudf::compute_column(table->table(), predicate); + auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); + return std::make_unique(std::move(result_table), table->column_names()); +} + +std::unique_ptr apply_mask(std::unique_ptr const& table, + std::unique_ptr const& mask) +{ + CUDF_FUNC_RANGE(); + auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); + return std::make_unique(std::move(result_table), table->column_names()); +} + +std::unique_ptr apply_groupby(std::unique_ptr const& table, + groupby_context_t const& ctx) +{ + CUDF_FUNC_RANGE(); + auto const keys = table->select(ctx.keys); + cudf::groupby::groupby groupby_obj(keys); + std::vector result_column_names; + result_column_names.insert(result_column_names.end(), ctx.keys.begin(), ctx.keys.end()); + std::vector requests; + for (auto& [value_col, aggregations] : ctx.values) { + requests.emplace_back(cudf::groupby::aggregation_request()); + for (auto& agg : aggregations) { + if (agg.first == cudf::aggregation::Kind::SUM) { + requests.back().aggregations.push_back( + cudf::make_sum_aggregation()); + } else if (agg.first == cudf::aggregation::Kind::MEAN) { + requests.back().aggregations.push_back( + cudf::make_mean_aggregation()); + } else if (agg.first == cudf::aggregation::Kind::COUNT_ALL) { + requests.back().aggregations.push_back( + cudf::make_count_aggregation()); + } else { + throw std::runtime_error("Unsupported aggregation"); + } + result_column_names.push_back(agg.second); + } + requests.back().values = table->column(value_col); + } + auto agg_results = groupby_obj.aggregate(requests); + std::vector> result_columns; + for (auto i = 0; i < agg_results.first->num_columns(); i++) { + auto col = std::make_unique(agg_results.first->get_column(i)); + result_columns.push_back(std::move(col)); + } + for (size_t i = 0; i < agg_results.second.size(); i++) { + for (size_t j = 0; j < agg_results.second[i].results.size(); j++) { + result_columns.push_back(std::move(agg_results.second[i].results[j])); + } + } + auto result_table = std::make_unique(std::move(result_columns)); + return std::make_unique(std::move(result_table), result_column_names); +} + +std::unique_ptr apply_orderby(std::unique_ptr const& table, + std::vector const& sort_keys, + std::vector const& sort_key_orders) +{ + CUDF_FUNC_RANGE(); + std::vector column_views; + for (auto& key : sort_keys) { + column_views.push_back(table->column(key)); + } + auto result_table = + cudf::sort_by_key(table->table(), cudf::table_view{column_views}, sort_key_orders); + return std::make_unique(std::move(result_table), table->column_names()); +} + +std::unique_ptr apply_reduction(cudf::column_view const& column, + cudf::aggregation::Kind const& agg_kind, + std::string const& col_name) +{ + CUDF_FUNC_RANGE(); + auto const agg = cudf::make_sum_aggregation(); + auto const result = cudf::reduce(column, *agg, column.type()); + cudf::size_type const len = 1; + auto col = cudf::make_column_from_scalar(*result, len); + std::vector> columns; + columns.push_back(std::move(col)); + auto result_table = std::make_unique(std::move(columns)); + std::vector col_names = {col_name}; + return std::make_unique(std::move(result_table), col_names); +} + +std::unique_ptr read_parquet( + cudf::io::source_info const& source_info, + std::vector const& columns, + std::unique_ptr const& predicate) +{ + CUDF_FUNC_RANGE(); + auto builder = cudf::io::parquet_reader_options_builder(source_info); + if (!columns.empty()) { builder.columns(columns); } + if (predicate) { builder.filter(*predicate); } + auto const options = builder.build(); + auto table_with_metadata = cudf::io::read_parquet(options); + std::vector column_names; + for (auto const& col_info : table_with_metadata.metadata.schema_info) { + column_names.push_back(col_info.name); + } + return std::make_unique(std::move(table_with_metadata.tbl), column_names); +} + +std::tm make_tm(int year, int month, int day) +{ + std::tm tm{}; + tm.tm_year = year - 1900; + tm.tm_mon = month - 1; + tm.tm_mday = day; + return tm; +} + +int32_t days_since_epoch(int year, int month, int day) +{ + std::tm tm = make_tm(year, month, day); + std::tm epoch = make_tm(1970, 1, 1); + std::time_t time = std::mktime(&tm); + std::time_t epoch_time = std::mktime(&epoch); + double diff = std::difftime(time, epoch_time) / (60 * 60 * 24); + return static_cast(diff); +} + +void write_to_parquet_device_buffer(std::unique_ptr const& table, + std::vector const& col_names, + parquet_device_buffer& source) +{ + CUDF_FUNC_RANGE(); + auto const stream = cudf::get_default_stream(); + + // Prepare the table metadata + cudf::io::table_metadata metadata; + std::vector col_name_infos; + for (auto& col_name : col_names) { + col_name_infos.push_back(cudf::io::column_name_info(col_name)); + } + metadata.schema_info = col_name_infos; + auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; + + // Declare a host and device buffer + std::vector h_buffer; + + // Write parquet data to host buffer + auto builder = + cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&h_buffer), table->view()); + builder.metadata(table_input_metadata); + auto const options = builder.build(); + cudf::io::write_parquet(options); + + // Copy host buffer to device buffer + source.d_buffer.resize(h_buffer.size(), stream); + CUDF_CUDA_TRY(cudaMemcpyAsync( + source.d_buffer.data(), h_buffer.data(), h_buffer.size(), cudaMemcpyDefault, stream.value())); +} + +void generate_parquet_data_sources(double scale_factor, + std::vector const& table_names, + std::unordered_map& sources) +{ + CUDF_FUNC_RANGE(); + std::for_each(table_names.begin(), table_names.end(), [&](auto const& table_name) { + sources[table_name] = parquet_device_buffer(); + }); + + auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + + auto partsupp = cudf::datagen::generate_partsupp( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + + auto supplier = cudf::datagen::generate_supplier( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + + auto customer = cudf::datagen::generate_customer( + scale_factor, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + + auto nation = cudf::datagen::generate_nation(cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + auto region = cudf::datagen::generate_region(cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + write_to_parquet_device_buffer(std::move(orders), ORDERS_SCHEMA, sources["orders"]); + write_to_parquet_device_buffer(std::move(lineitem), LINEITEM_SCHEMA, sources["lineitem"]); + write_to_parquet_device_buffer(std::move(part), PART_SCHEMA, sources["part"]); + write_to_parquet_device_buffer(std::move(partsupp), PARTSUPP_SCHEMA, sources["partsupp"]); + write_to_parquet_device_buffer(std::move(customer), CUSTOMER_SCHEMA, sources["customer"]); + write_to_parquet_device_buffer(std::move(supplier), SUPPLIER_SCHEMA, sources["supplier"]); + write_to_parquet_device_buffer(std::move(nation), NATION_SCHEMA, sources["nation"]); + write_to_parquet_device_buffer(std::move(region), REGION_SCHEMA, sources["region"]); +} diff --git a/cpp/benchmarks/ndsh/utilities.hpp b/cpp/benchmarks/ndsh/utilities.hpp new file mode 100644 index 00000000000..762e43deccf --- /dev/null +++ b/cpp/benchmarks/ndsh/utilities.hpp @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +/** + * @brief A class to represent a table with column names attached + */ +class table_with_names { + public: + table_with_names(std::unique_ptr tbl, std::vector col_names) + : tbl(std::move(tbl)), col_names(col_names){}; + /** + * @brief Return the table view + */ + [[nodiscard]] cudf::table_view table() const; + /** + * @brief Return the column view for a given column name + * + * @param col_name The name of the column + */ + [[nodiscard]] cudf::column_view column(std::string const& col_name) const; + /** + * @param Return the column names of the table + */ + [[nodiscard]] std::vector const& column_names() const; + /** + * @brief Translate a column name to a column index + * + * @param col_name The name of the column + */ + [[nodiscard]] cudf::size_type column_id(std::string const& col_name) const; + /** + * @brief Append a column to the table + * + * @param col The column to append + * @param col_name The name of the appended column + */ + table_with_names& append(std::unique_ptr& col, std::string const& col_name); + /** + * @brief Select a subset of columns from the table + * + * @param col_names The names of the columns to select + */ + [[nodiscard]] cudf::table_view select(std::vector const& col_names) const; + /** + * @brief Write the table to a parquet file + * + * @param filepath The path to the parquet file + */ + void to_parquet(std::string const& filepath) const; + + private: + std::unique_ptr tbl; + std::vector col_names; +}; + +/** + * @brief Inner join two tables and gather the result + * + * @param left_input The left input table + * @param right_input The right input table + * @param left_on The columns to join on in the left table + * @param right_on The columns to join on in the right table + * @param compare_nulls The null equality policy + */ +[[nodiscard]] std::unique_ptr join_and_gather( + cudf::table_view const& left_input, + cudf::table_view const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls); + +/** + * @brief Apply an inner join operation to two tables + * + * @param left_input The left input table + * @param right_input The right input table + * @param left_on The columns to join on in the left table + * @param right_on The columns to join on in the right table + * @param compare_nulls The null equality policy + */ +[[nodiscard]] std::unique_ptr apply_inner_join( + std::unique_ptr const& left_input, + std::unique_ptr const& right_input, + std::vector const& left_on, + std::vector const& right_on, + cudf::null_equality compare_nulls = cudf::null_equality::EQUAL); + +/** + * @brief Apply a filter predicate to a table + * + * @param table The input table + * @param predicate The filter predicate + */ +[[nodiscard]] std::unique_ptr apply_filter( + std::unique_ptr const& table, cudf::ast::operation const& predicate); + +/** + * @brief Apply a boolean mask to a table + * + * @param table The input table + * @param mask The boolean mask + */ +[[nodiscard]] std::unique_ptr apply_mask( + std::unique_ptr const& table, std::unique_ptr const& mask); + +/** + * Struct representing group by key columns, value columns, and the type of aggregations to perform + * on the value columns + */ +struct groupby_context_t { + std::vector keys; + std::unordered_map>> + values; +}; + +/** + * @brief Apply a groupby operation to a table + * + * @param table The input table + * @param ctx The groupby context + */ +[[nodiscard]] std::unique_ptr apply_groupby( + std::unique_ptr const& table, groupby_context_t const& ctx); + +/** + * @brief Apply an order by operation to a table + * + * @param table The input table + * @param sort_keys The sort keys + * @param sort_key_orders The sort key orders + */ +[[nodiscard]] std::unique_ptr apply_orderby( + std::unique_ptr const& table, + std::vector const& sort_keys, + std::vector const& sort_key_orders); + +/** + * @brief Apply a reduction operation to a column + * + * @param column The input column + * @param agg_kind The aggregation kind + * @param col_name The name of the output column + */ +[[nodiscard]] std::unique_ptr apply_reduction( + cudf::column_view const& column, + cudf::aggregation::Kind const& agg_kind, + std::string const& col_name); + +/** + * @brief Read a parquet file into a table + * + * @param source_info The source of the parquet file + * @param columns The columns to read + * @param predicate The filter predicate to pushdown + */ +[[nodiscard]] std::unique_ptr read_parquet( + cudf::io::source_info const& source_info, + std::vector const& columns = {}, + std::unique_ptr const& predicate = nullptr); + +/** + * @brief Generate the `std::tm` structure from year, month, and day + * + * @param year The year + * @param month The month + * @param day The day + */ +std::tm make_tm(int year, int month, int day); + +/** + * @brief Calculate the number of days since the UNIX epoch + * + * @param year The year + * @param month The month + * @param day The day + */ +int32_t days_since_epoch(int year, int month, int day); + +/** + * @brief Struct representing a parquet device buffer + */ +struct parquet_device_buffer { + parquet_device_buffer() : d_buffer{0, cudf::get_default_stream()} {}; + cudf::io::source_info make_source_info() { return cudf::io::source_info(d_buffer); } + rmm::device_uvector d_buffer; +}; + +/** + * @brief Write a `cudf::table` to a parquet device buffer + * + * @param table The `cudf::table` to write + * @param col_names The column names of the table + * @param parquet_device_buffer The parquet device buffer to write the table to + */ +void write_to_parquet_device_buffer(std::unique_ptr const& table, + std::vector const& col_names, + parquet_device_buffer& source); + +/** + * @brief Generate NDS-H tables and write to parquet device buffers + * + * @param scale_factor The scale factor of NDS-H tables to generate + * @param table_names The names of the tables to generate + * @param sources The parquet data sources to populate + */ +void generate_parquet_data_sources(double scale_factor, + std::vector const& table_names, + std::unordered_map& sources); diff --git a/cpp/benchmarks/quantiles/tdigest.cu b/cpp/benchmarks/quantiles/tdigest.cu new file mode 100644 index 00000000000..9d37dbc9a26 --- /dev/null +++ b/cpp/benchmarks/quantiles/tdigest.cu @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include + +#include +#include +#include + +#include + +void bm_tdigest_merge(nvbench::state& state) +{ + auto const num_tdigests = static_cast(state.get_int64("num_tdigests")); + auto const tdigest_size = static_cast(state.get_int64("tdigest_size")); + auto const tdigests_per_group = + static_cast(state.get_int64("tdigests_per_group")); + auto const max_centroids = static_cast(state.get_int64("max_centroids")); + auto const num_groups = num_tdigests / tdigests_per_group; + auto const total_centroids = num_tdigests * tdigest_size; + + auto stream = cudf::get_default_stream(); + auto mr = rmm::mr::get_current_device_resource(); + + constexpr int base_value = 5; + + // construct inner means/weights + auto val_iter = cudf::detail::make_counting_transform_iterator( + 0, cuda::proclaim_return_type([tdigest_size](cudf::size_type i) { + return static_cast(base_value + (i % tdigest_size)); + })); + auto one_iter = thrust::make_constant_iterator(1); + cudf::test::fixed_width_column_wrapper means(val_iter, val_iter + total_centroids); + cudf::test::fixed_width_column_wrapper weights(one_iter, one_iter + total_centroids); + std::vector> inner_struct_children; + inner_struct_children.push_back(means.release()); + inner_struct_children.push_back(weights.release()); + cudf::test::structs_column_wrapper inner_struct(std::move(inner_struct_children)); + + // construct the tdigest lists themselves + auto offset_iter = cudf::detail::make_counting_transform_iterator( + 0, cuda::proclaim_return_type([tdigest_size](cudf::size_type i) { + return i * tdigest_size; + })); + cudf::test::fixed_width_column_wrapper offsets(offset_iter, offset_iter + num_tdigests + 1); + auto list_col = cudf::make_lists_column( + num_tdigests, offsets.release(), inner_struct.release(), 0, {}, stream, mr); + + // min and max columns + auto min_iter = thrust::make_constant_iterator(base_value); + auto max_iter = thrust::make_constant_iterator(base_value + (tdigest_size - 1)); + cudf::test::fixed_width_column_wrapper mins(min_iter, min_iter + num_tdigests); + cudf::test::fixed_width_column_wrapper maxes(max_iter, max_iter + num_tdigests); + + // assemble the whole thing + std::vector> tdigest_children; + tdigest_children.push_back(std::move(list_col)); + tdigest_children.push_back(mins.release()); + tdigest_children.push_back(maxes.release()); + cudf::test::structs_column_wrapper tdigest(std::move(tdigest_children)); + + rmm::device_uvector group_offsets(num_groups + 1, stream, mr); + rmm::device_uvector group_labels(num_tdigests, stream, mr); + auto group_offset_iter = cudf::detail::make_counting_transform_iterator( + 0, + cuda::proclaim_return_type( + [tdigests_per_group] __device__(cudf::size_type i) { return i * tdigests_per_group; })); + thrust::copy(rmm::exec_policy_nosync(stream, mr), + group_offset_iter, + group_offset_iter + num_groups + 1, + group_offsets.begin()); + auto group_label_iter = cudf::detail::make_counting_transform_iterator( + 0, + cuda::proclaim_return_type( + [tdigests_per_group] __device__(cudf::size_type i) { return i / tdigests_per_group; })); + thrust::copy(rmm::exec_policy_nosync(stream, mr), + group_label_iter, + group_label_iter + num_tdigests, + group_labels.begin()); + + state.add_element_count(total_centroids); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync, + [&](nvbench::launch& launch, auto& timer) { + timer.start(); + auto result = cudf::tdigest::detail::group_merge_tdigest( + tdigest, group_offsets, group_labels, num_groups, max_centroids, stream, mr); + timer.stop(); + }); +} + +NVBENCH_BENCH(bm_tdigest_merge) + .set_name("TDigest many tiny groups") + .add_int64_axis("num_tdigests", {500'000}) + .add_int64_axis("tdigest_size", {1, 1000}) + .add_int64_axis("tdigests_per_group", {1}) + .add_int64_axis("max_centroids", {10000, 1000}); + +NVBENCH_BENCH(bm_tdigest_merge) + .set_name("TDigest many small groups") + .add_int64_axis("num_tdigests", {500'000}) + .add_int64_axis("tdigest_size", {1, 1000}) + .add_int64_axis("tdigests_per_group", {3}) + .add_int64_axis("max_centroids", {10000, 1000}); diff --git a/cpp/benchmarks/reduction/anyall.cpp b/cpp/benchmarks/reduction/anyall.cpp index e9d23881764..1e578fab181 100644 --- a/cpp/benchmarks/reduction/anyall.cpp +++ b/cpp/benchmarks/reduction/anyall.cpp @@ -16,65 +16,51 @@ #include #include -#include -#include -#include +#include #include #include #include -#include +#include -class Reduction : public cudf::benchmark {}; +#include -template -void BM_reduction_anyall(benchmark::State& state, - std::unique_ptr const& agg) +template +static void reduction_anyall(nvbench::state& state, nvbench::type_list) { - cudf::size_type const column_size{static_cast(state.range(0))}; - auto const dtype = cudf::type_to_id(); - data_profile const profile = data_profile_builder().no_validity().distribution( - dtype, distribution_id::UNIFORM, 0, agg->kind == cudf::aggregation::ANY ? 0 : 100); - auto const values = create_random_column(dtype, row_count{column_size}, profile); + auto const size = static_cast(state.get_int64("size")); + auto const kind_str = state.get_string("kind"); - cudf::data_type output_dtype{cudf::type_id::BOOL8}; + auto const input_type = cudf::type_to_id(); + auto const agg = kind_str == "any" ? cudf::make_any_aggregation() + : cudf::make_all_aggregation(); - for (auto _ : state) { - cuda_event_timer timer(state, true); - auto result = cudf::reduce(*values, *agg, output_dtype); - } + data_profile const profile = + data_profile_builder().no_validity().distribution(input_type, + distribution_id::UNIFORM, + (kind_str == "all" ? 1 : 0), + (kind_str == "any" ? 0 : 100)); + auto const values = create_random_column(input_type, row_count{size}, profile); - // The benchmark takes a column and produces one scalar. - set_items_processed(state, column_size + 1); - set_bytes_processed(state, estimate_size(values->view()) + cudf::size_of(output_dtype)); -} + auto const output_type = cudf::data_type{cudf::type_id::BOOL8}; + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_element_count(size); + state.add_global_memory_reads(size); + state.add_global_memory_writes(1); -#define concat(a, b, c) a##b##c -#define get_agg(op) concat(cudf::make_, op, _aggregation()) + state.exec(nvbench::exec_tag::sync, [&values, output_type, &agg](nvbench::launch& launch) { + cudf::reduce(*values, *agg, output_type); + }); -// TYPE, OP -#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ - BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) \ - { \ - BM_reduction_anyall(state, get_agg(aggregation)); \ - } \ - BENCHMARK_REGISTER_F(Reduction, name) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ - ->Arg(100000000); /* 100M */ + set_throughputs(state); +} -#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ - RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) +using Types = nvbench::type_list; -REDUCE_BENCHMARK_DEFINE(bool, all); -REDUCE_BENCHMARK_DEFINE(int8_t, all); -REDUCE_BENCHMARK_DEFINE(int32_t, all); -REDUCE_BENCHMARK_DEFINE(float, all); -REDUCE_BENCHMARK_DEFINE(bool, any); -REDUCE_BENCHMARK_DEFINE(int8_t, any); -REDUCE_BENCHMARK_DEFINE(int32_t, any); -REDUCE_BENCHMARK_DEFINE(float, any); +NVBENCH_BENCH_TYPES(reduction_anyall, NVBENCH_TYPE_AXES(Types)) + .set_name("anyall") + .set_type_axes_names({"DataType"}) + .add_string_axis("kind", {"any", "all"}) + .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000}); diff --git a/cpp/benchmarks/reduction/dictionary.cpp b/cpp/benchmarks/reduction/dictionary.cpp index 5095337dbb3..1bdb50a539a 100644 --- a/cpp/benchmarks/reduction/dictionary.cpp +++ b/cpp/benchmarks/reduction/dictionary.cpp @@ -16,79 +16,84 @@ #include #include -#include -#include +#include +#include #include #include #include #include -class ReductionDictionary : public cudf::benchmark {}; +#include -template -void BM_reduction_dictionary(benchmark::State& state, - std::unique_ptr const& agg) +template +static std::unique_ptr make_reduce_aggregation() { - cudf::size_type const column_size{static_cast(state.range(0))}; + switch (kind) { + case cudf::reduce_aggregation::ANY: + return cudf::make_any_aggregation(); + case cudf::reduce_aggregation::ALL: + return cudf::make_all_aggregation(); + case cudf::reduce_aggregation::MIN: + return cudf::make_min_aggregation(); + case cudf::reduce_aggregation::MAX: + return cudf::make_max_aggregation(); + case cudf::reduce_aggregation::MEAN: + return cudf::make_mean_aggregation(); + default: CUDF_FAIL("Unsupported reduce aggregation in this benchmark"); + } +} + +template +static void reduction_dictionary(nvbench::state& state, + nvbench::type_list>) +{ + cudf::size_type const size{static_cast(state.get_int64("size"))}; - // int column and encoded dictionary column data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution( cudf::type_to_id(), distribution_id::UNIFORM, - (agg->kind == cudf::aggregation::ALL ? 1 : 0), - (agg->kind == cudf::aggregation::ANY ? 0 : 100)); - auto int_column = create_random_column(cudf::type_to_id(), row_count{column_size}, profile); - auto number_col = cudf::cast(*int_column, cudf::data_type{cudf::type_to_id()}); + (kind == cudf::aggregation::ALL ? 1 : 0), + (kind == cudf::aggregation::ANY ? 0 : 100)); + auto int_column = create_random_column(cudf::type_to_id(), row_count{size}, profile); + auto number_col = cudf::cast(*int_column, cudf::data_type{cudf::type_to_id()}); auto values = cudf::dictionary::encode(*number_col); - cudf::data_type output_dtype = [&] { - if (agg->kind == cudf::aggregation::ANY || agg->kind == cudf::aggregation::ALL) + cudf::data_type output_type = [&] { + if (kind == cudf::aggregation::ANY || kind == cudf::aggregation::ALL) { return cudf::data_type{cudf::type_id::BOOL8}; - if (agg->kind == cudf::aggregation::MEAN) return cudf::data_type{cudf::type_id::FLOAT64}; - return cudf::data_type{cudf::type_to_id()}; + } + if (kind == cudf::aggregation::MEAN) { return cudf::data_type{cudf::type_id::FLOAT64}; } + return cudf::data_type{cudf::type_to_id()}; }(); - for (auto _ : state) { - cuda_event_timer timer(state, true); - auto result = cudf::reduce(*values, *agg, output_dtype); + auto agg = make_reduce_aggregation(); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_element_count(size); + state.add_global_memory_reads(size); + if (kind == cudf::aggregation::ANY || kind == cudf::aggregation::ALL) { + state.add_global_memory_writes(1); // BOOL8s + } else { + state.add_global_memory_writes(1); } - // The benchmark takes a column and produces two scalars. - set_items_processed(state, column_size + 1); + state.exec(nvbench::exec_tag::sync, [&values, output_type, &agg](nvbench::launch& launch) { + cudf::reduce(*values, *agg, output_type); + }); - // We don't set the metrics for the size read/written as row_bit_count() doesn't - // support the dictionary type yet (and so is estimate_size()). - // See https://github.com/rapidsai/cudf/issues/16121 for details. + set_throughputs(state); } -#define concat(a, b, c) a##b##c -#define get_agg(op) concat(cudf::make_, op, _aggregation()) - -// TYPE, OP -#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ - BENCHMARK_DEFINE_F(ReductionDictionary, name)(::benchmark::State & state) \ - { \ - BM_reduction_dictionary(state, get_agg(aggregation)); \ - } \ - BENCHMARK_REGISTER_F(ReductionDictionary, name) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ - ->Arg(100000000); /* 100M */ - -#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ - RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) +using Types = nvbench::type_list; +using AggKinds = nvbench::enum_type_list; -REDUCE_BENCHMARK_DEFINE(int32_t, all); -REDUCE_BENCHMARK_DEFINE(float, all); -REDUCE_BENCHMARK_DEFINE(int32_t, any); -REDUCE_BENCHMARK_DEFINE(float, any); -REDUCE_BENCHMARK_DEFINE(int32_t, min); -REDUCE_BENCHMARK_DEFINE(float, min); -REDUCE_BENCHMARK_DEFINE(int32_t, max); -REDUCE_BENCHMARK_DEFINE(float, max); -REDUCE_BENCHMARK_DEFINE(int32_t, mean); -REDUCE_BENCHMARK_DEFINE(float, mean); +NVBENCH_BENCH_TYPES(reduction_dictionary, NVBENCH_TYPE_AXES(Types, AggKinds)) + .set_name("reduction_dictionary") + .set_type_axes_names({"DataType", "AggKinds"}) + .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000}); diff --git a/cpp/benchmarks/reduction/minmax.cpp b/cpp/benchmarks/reduction/minmax.cpp index 050f2887221..636de303cc4 100644 --- a/cpp/benchmarks/reduction/minmax.cpp +++ b/cpp/benchmarks/reduction/minmax.cpp @@ -16,55 +16,42 @@ #include #include -#include -#include -#include +#include #include #include #include -class Reduction : public cudf::benchmark {}; +#include -template -void BM_reduction(benchmark::State& state) +template +static void reduction_minmax(nvbench::state& state, nvbench::type_list) { - cudf::size_type const column_size{(cudf::size_type)state.range(0)}; - auto const dtype_id = cudf::type_to_id(); - auto const input_column = - create_random_column(dtype_id, row_count{column_size}, data_profile_builder().no_validity()); + auto const size = static_cast(state.get_int64("size")); - for (auto _ : state) { - cuda_event_timer timer(state, true); - auto result = cudf::minmax(*input_column); - } + auto const input_type = cudf::type_to_id(); - // The benchmark takes a column and produces two scalars. - set_items_processed(state, column_size + 2); - cudf::data_type dtype = cudf::data_type{dtype_id}; - set_bytes_processed(state, estimate_size(input_column->view()) + 2 * cudf::size_of(dtype)); -} + data_profile const profile = + data_profile_builder().no_validity().distribution(input_type, distribution_id::UNIFORM, 0, 100); + auto const input_column = create_random_column(input_type, row_count{size}, profile); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_element_count(size); + state.add_global_memory_reads(size); + state.add_global_memory_writes(2); -#define concat(a, b, c) a##b##c -#define get_agg(op) concat(cudf::make_, op, _aggregation()) + state.exec(nvbench::exec_tag::sync, + [&input_column](nvbench::launch& launch) { cudf::minmax(*input_column); }); + + set_throughputs(state); +} -// TYPE, OP -#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ - BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) { BM_reduction(state); } \ - BENCHMARK_REGISTER_F(Reduction, name) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ - ->Arg(100000000); /* 100M */ +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); -#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ - RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) +using Types = nvbench::type_list; -REDUCE_BENCHMARK_DEFINE(bool, minmax); -REDUCE_BENCHMARK_DEFINE(int8_t, minmax); -REDUCE_BENCHMARK_DEFINE(int32_t, minmax); -using cudf::timestamp_ms; -REDUCE_BENCHMARK_DEFINE(timestamp_ms, minmax); -REDUCE_BENCHMARK_DEFINE(float, minmax); +NVBENCH_BENCH_TYPES(reduction_minmax, NVBENCH_TYPE_AXES(Types)) + .set_name("minmax") + .set_type_axes_names({"DataType"}) + .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000}); diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp index 14876c80d3e..05aeed47fa6 100644 --- a/cpp/benchmarks/reduction/rank.cpp +++ b/cpp/benchmarks/reduction/rank.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -45,7 +46,7 @@ static void nvbench_reduction_scan(nvbench::state& state, nvbench::type_list #include -#include -#include -#include +#include #include #include #include #include +#include + #include -class Reduction : public cudf::benchmark {}; +template +static std::unique_ptr make_reduce_aggregation() +{ + switch (kind) { + case cudf::reduce_aggregation::MIN: + return cudf::make_min_aggregation(); + case cudf::reduce_aggregation::SUM: + return cudf::make_sum_aggregation(); + case cudf::reduce_aggregation::MEAN: + return cudf::make_mean_aggregation(); + case cudf::reduce_aggregation::PRODUCT: + return cudf::make_product_aggregation(); + case cudf::reduce_aggregation::VARIANCE: + return cudf::make_variance_aggregation(); + case cudf::reduce_aggregation::STD: + return cudf::make_std_aggregation(); + default: CUDF_FAIL("Unsupported reduce aggregation in this benchmark"); + } +} -template -void BM_reduction(benchmark::State& state, std::unique_ptr const& agg) +template +static void reduction(nvbench::state& state, nvbench::type_list>) { - cudf::size_type const column_size{(cudf::size_type)state.range(0)}; - auto const dtype = cudf::type_to_id(); + auto const size = static_cast(state.get_int64("size")); + if (cudf::is_chrono() && kind != cudf::aggregation::MIN) { + state.skip("Skip chrono types for some aggregations"); + } + + auto const input_type = cudf::type_to_id(); data_profile const profile = - data_profile_builder().no_validity().distribution(dtype, distribution_id::UNIFORM, 0, 100); - auto const input_column = create_random_column(dtype, row_count{column_size}, profile); + data_profile_builder().no_validity().distribution(input_type, distribution_id::UNIFORM, 0, 100); + auto const input_column = create_random_column(input_type, row_count{size}, profile); - cudf::data_type output_dtype = - (agg->kind == cudf::aggregation::MEAN || agg->kind == cudf::aggregation::VARIANCE || - agg->kind == cudf::aggregation::STD) + cudf::data_type output_type = + (kind == cudf::aggregation::MEAN || kind == cudf::aggregation::VARIANCE || + kind == cudf::aggregation::STD) ? cudf::data_type{cudf::type_id::FLOAT64} : input_column->type(); - for (auto _ : state) { - cuda_event_timer timer(state, true); - auto result = cudf::reduce(*input_column, *agg, output_dtype); - } + auto agg = make_reduce_aggregation(); - // The benchmark takes a column and produces two scalars. - set_items_processed(state, column_size + 1); - set_bytes_processed(state, estimate_size(input_column->view()) + cudf::size_of(output_dtype)); -} + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_element_count(size); + state.add_global_memory_reads(size); + state.add_global_memory_writes(1); -#define concat(a, b, c) a##b##c -#define get_agg(op) concat(cudf::make_, op, _aggregation()) + state.exec(nvbench::exec_tag::sync, [&input_column, output_type, &agg](nvbench::launch& launch) { + cudf::reduce(*input_column, *agg, output_type); + }); -// TYPE, OP -#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \ - BENCHMARK_DEFINE_F(Reduction, name)(::benchmark::State & state) \ - { \ - BM_reduction(state, get_agg(aggregation)); \ - } \ - BENCHMARK_REGISTER_F(Reduction, name) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ - ->Arg(100000000); /* 100M */ + set_throughputs(state); +} -#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \ - RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation) +NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); -#define REDUCE_BENCHMARK_NUMERIC(aggregation) \ - REDUCE_BENCHMARK_DEFINE(bool, aggregation); \ - REDUCE_BENCHMARK_DEFINE(int8_t, aggregation); \ - REDUCE_BENCHMARK_DEFINE(int32_t, aggregation); \ - REDUCE_BENCHMARK_DEFINE(int64_t, aggregation); \ - REDUCE_BENCHMARK_DEFINE(float, aggregation); \ - REDUCE_BENCHMARK_DEFINE(double, aggregation); +using Types = nvbench::type_list; +using AggKinds = nvbench::enum_type_list; -REDUCE_BENCHMARK_NUMERIC(sum); -REDUCE_BENCHMARK_DEFINE(int32_t, product); -REDUCE_BENCHMARK_DEFINE(float, product); -REDUCE_BENCHMARK_DEFINE(int64_t, min); -REDUCE_BENCHMARK_DEFINE(double, min); -using cudf::timestamp_ms; -REDUCE_BENCHMARK_DEFINE(timestamp_ms, min); -REDUCE_BENCHMARK_DEFINE(int8_t, mean); -REDUCE_BENCHMARK_DEFINE(float, mean); -REDUCE_BENCHMARK_DEFINE(int32_t, variance); -REDUCE_BENCHMARK_DEFINE(double, variance); -REDUCE_BENCHMARK_DEFINE(int64_t, std); -REDUCE_BENCHMARK_DEFINE(float, std); +NVBENCH_BENCH_TYPES(reduction, NVBENCH_TYPE_AXES(Types, AggKinds)) + .set_name("reduction") + .set_type_axes_names({"DataType", "AggKinds"}) + .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000}); diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp index dc05aad9807..f3d67a79498 100644 --- a/cpp/benchmarks/reduction/scan.cpp +++ b/cpp/benchmarks/reduction/scan.cpp @@ -16,9 +16,7 @@ #include #include -#include -#include -#include +#include #include #include @@ -26,43 +24,38 @@ #include #include -class ReductionScan : public cudf::benchmark {}; +#include -template -static void BM_reduction_scan(benchmark::State& state, bool include_nulls) +template +static void reduction_scan(nvbench::state& state, nvbench::type_list) { - cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; - auto const dtype = cudf::type_to_id(); - auto const column = create_random_column(dtype, row_count{n_rows}); - if (!include_nulls) column->set_null_mask(rmm::device_buffer{}, 0); + auto const size = static_cast(state.get_int64("size")); + auto const nulls = state.get_float64("nulls"); + auto const input_type = cudf::type_to_id(); - std::unique_ptr result = nullptr; - for (auto _ : state) { - cuda_event_timer timer(state, true); - result = cudf::scan( - *column, *cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE); - } + data_profile const profile = data_profile_builder().null_probability(nulls).distribution( + input_type, distribution_id::UNIFORM, 0, 100); + auto const input_column = create_random_column(input_type, row_count{size}, profile); - // The benchmark takes a column and produces a new column of the same size as input. - set_items_processed(state, n_rows * 2); - set_bytes_processed(state, estimate_size(column->view()) + estimate_size(result->view())); + auto agg = cudf::make_min_aggregation(); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + state.add_element_count(size); + state.add_global_memory_reads(size); + state.add_global_memory_writes(1); + + state.exec(nvbench::exec_tag::sync, [&input_column, &agg](nvbench::launch& launch) { + cudf::scan(*input_column, *agg, cudf::scan_type::INCLUSIVE); + }); + + set_throughputs(state); } -#define SCAN_BENCHMARK_DEFINE(name, type, nulls) \ - BENCHMARK_DEFINE_F(ReductionScan, name) \ - (::benchmark::State & state) { BM_reduction_scan(state, nulls); } \ - BENCHMARK_REGISTER_F(ReductionScan, name) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ - ->Arg(100000000); /* 100M */ +using Types = nvbench::type_list; -SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false); -SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false); -SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false); -SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false); -SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true); -SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true); -SCAN_BENCHMARK_DEFINE(double_nulls, double, true); +NVBENCH_BENCH_TYPES(reduction_scan, NVBENCH_TYPE_AXES(Types)) + .set_name("scan") + .set_type_axes_names({"DataType"}) + .add_float64_axis("nulls", {0.0, 0.1}) + .add_int64_axis("size", {100'000, 1'000'000, 10'000'000, 100'000'000}); diff --git a/cpp/benchmarks/reduction/scan_structs.cpp b/cpp/benchmarks/reduction/scan_structs.cpp index a781f75a314..2de1db6dfe5 100644 --- a/cpp/benchmarks/reduction/scan_structs.cpp +++ b/cpp/benchmarks/reduction/scan_structs.cpp @@ -20,6 +20,7 @@ #include #include +#include #include @@ -57,7 +58,7 @@ static void nvbench_structs_scan(nvbench::state& state) std::unique_ptr result = nullptr; state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { result = cudf::detail::scan_inclusive( - input_view, *agg, null_policy, stream, rmm::mr::get_current_device_resource()); + input_view, *agg, null_policy, stream, cudf::get_current_device_resource_ref()); }); state.add_element_count(input_view.size()); diff --git a/cpp/benchmarks/search/contains_table.cpp b/cpp/benchmarks/search/contains_table.cpp index 17702d0741c..3bc1ac9c70a 100644 --- a/cpp/benchmarks/search/contains_table.cpp +++ b/cpp/benchmarks/search/contains_table.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,8 +20,7 @@ #include #include #include - -#include +#include #include @@ -58,7 +57,7 @@ static void nvbench_contains_table(nvbench::state& state, nvbench::type_list #include +#include #include @@ -37,7 +38,9 @@ void nvbench_rank_lists(nvbench::state& state, nvbench::type_list #include +#include #include @@ -35,7 +36,9 @@ void nvbench_rank_structs(nvbench::state& state, nvbench::type_list #include +#include #include @@ -33,7 +34,7 @@ void sort_multiple_lists(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { cudf::detail::sorted_order( - *input_table, {}, {}, stream, rmm::mr::get_current_device_resource()); + *input_table, {}, {}, stream, cudf::get_current_device_resource_ref()); }); } @@ -76,7 +77,8 @@ void sort_lists_of_structs(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; - cudf::detail::sorted_order(input_table, {}, {}, stream, rmm::mr::get_current_device_resource()); + cudf::detail::sorted_order( + input_table, {}, {}, stream, cudf::get_current_device_resource_ref()); }); } diff --git a/cpp/benchmarks/sort/sort_structs.cpp b/cpp/benchmarks/sort/sort_structs.cpp index 3a3d1080ba0..fa1cf0279dd 100644 --- a/cpp/benchmarks/sort/sort_structs.cpp +++ b/cpp/benchmarks/sort/sort_structs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include @@ -26,7 +27,8 @@ void nvbench_sort_struct(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { rmm::cuda_stream_view stream_view{launch.get_stream()}; - cudf::detail::sorted_order(*input, {}, {}, stream_view, rmm::mr::get_current_device_resource()); + cudf::detail::sorted_order( + *input, {}, {}, stream_view, cudf::get_current_device_resource_ref()); }); } diff --git a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp index 492237474ff..fa017ca9e29 100644 --- a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp +++ b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp @@ -15,120 +15,76 @@ */ #include +#include #include +#include -#include -#include +#include namespace { -constexpr cudf::size_type hundredM = 1e8; -constexpr cudf::size_type tenM = 1e7; -constexpr cudf::size_type tenK = 1e4; -constexpr cudf::size_type fifty_percent = 50; - -void percent_range(benchmark::internal::Benchmark* b) -{ - b->Unit(benchmark::kMillisecond); - for (int percent = 0; percent <= 100; percent += 10) - b->Args({hundredM, percent}); -} - -void size_range(benchmark::internal::Benchmark* b) -{ - b->Unit(benchmark::kMillisecond); - for (int size = tenK; size <= hundredM; size *= 10) - b->Args({size, fifty_percent}); -} - template -void calculate_bandwidth(benchmark::State& state, cudf::size_type num_columns) +void calculate_bandwidth(nvbench::state& state) { - cudf::size_type const column_size{static_cast(state.range(0))}; - cudf::size_type const percent_true{static_cast(state.range(1))}; - - float const fraction = percent_true / 100.f; - cudf::size_type const column_size_out = fraction * column_size; - int64_t const mask_size = - sizeof(bool) * column_size + cudf::bitmask_allocation_size_bytes(column_size); - int64_t const validity_bytes_in = (fraction >= 1.0f / 32) - ? cudf::bitmask_allocation_size_bytes(column_size) - : 4 * column_size_out; - int64_t const validity_bytes_out = cudf::bitmask_allocation_size_bytes(column_size_out); - int64_t const column_bytes_out = sizeof(T) * column_size_out; + auto const n_rows = static_cast(state.get_int64("rows")); + auto const n_cols = static_cast(state.get_int64("columns")); + auto const percent_true = static_cast(state.get_int64("hits_%")); + + double const fraction = percent_true / 100.0; + cudf::size_type const output_size = fraction * n_rows; + int64_t const mask_size = sizeof(bool) * n_rows + cudf::bitmask_allocation_size_bytes(n_rows); + int64_t const validity_bytes_in = + (fraction >= 1.0 / 32) ? cudf::bitmask_allocation_size_bytes(n_rows) : 4 * output_size; + int64_t const validity_bytes_out = cudf::bitmask_allocation_size_bytes(output_size); + int64_t const column_bytes_out = sizeof(T) * output_size; int64_t const column_bytes_in = column_bytes_out; // we only read unmasked inputs - int64_t const bytes_read = - (column_bytes_in + validity_bytes_in) * num_columns + // reading columns - mask_size; // reading boolean mask + int64_t const bytes_read = (column_bytes_in + validity_bytes_in) * n_cols + // reading columns + mask_size; // reading boolean mask int64_t const bytes_written = - (column_bytes_out + validity_bytes_out) * num_columns; // writing columns + (column_bytes_out + validity_bytes_out) * n_cols; // writing columns - state.SetItemsProcessed(state.iterations() * column_size * num_columns); - state.SetBytesProcessed(static_cast(state.iterations()) * (bytes_read + bytes_written)); + state.add_element_count(n_rows * n_cols); + state.add_global_memory_reads(bytes_read); + state.add_global_memory_writes(bytes_written); } } // namespace -template -void BM_apply_boolean_mask(benchmark::State& state, cudf::size_type num_columns) +template +void apply_boolean_mask_benchmark(nvbench::state& state, nvbench::type_list) { - cudf::size_type const column_size{static_cast(state.range(0))}; - cudf::size_type const percent_true{static_cast(state.range(1))}; + auto const n_rows = static_cast(state.get_int64("rows")); + auto const n_cols = static_cast(state.get_int64("columns")); + auto const percent_true = static_cast(state.get_int64("hits_%")); - data_profile profile = data_profile_builder().cardinality(0).null_probability(0.0).distribution( - cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + auto const input_type = cudf::type_to_id(); + data_profile profile = data_profile_builder().cardinality(0).no_validity().distribution( + input_type, distribution_id::UNIFORM, 0, 20); - auto source_table = create_random_table( - cycle_dtypes({cudf::type_to_id()}, num_columns), row_count{column_size}, profile); + auto source_table = + create_random_table(cycle_dtypes({input_type}, n_cols), row_count{n_rows}, profile); profile.set_bool_probability_true(percent_true / 100.0); profile.set_null_probability(std::nullopt); // no null mask - auto mask = create_random_column(cudf::type_id::BOOL8, row_count{column_size}, profile); + auto mask = create_random_column(cudf::type_id::BOOL8, row_count{n_rows}, profile); + + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + calculate_bandwidth(state); - for (auto _ : state) { - cuda_event_timer raii(state, true); - auto result = cudf::apply_boolean_mask(*source_table, mask->view()); - } + state.exec(nvbench::exec_tag::sync, [&source_table, &mask](nvbench::launch& launch) { + cudf::apply_boolean_mask(*source_table, mask->view()); + }); - calculate_bandwidth(state, num_columns); + set_throughputs(state); } -template -class ApplyBooleanMask : public cudf::benchmark { - public: - using TypeParam = T; -}; - -#define ABM_BENCHMARK_DEFINE(name, type, n_columns) \ - BENCHMARK_TEMPLATE_DEFINE_F(ApplyBooleanMask, name, type)(::benchmark::State & st) \ - { \ - BM_apply_boolean_mask(st, n_columns); \ - } - -ABM_BENCHMARK_DEFINE(float_1_col, float, 1); -ABM_BENCHMARK_DEFINE(float_2_col, float, 2); -ABM_BENCHMARK_DEFINE(float_4_col, float, 4); - -// shmoo 1, 2, 4 column float across percentage true -BENCHMARK_REGISTER_F(ApplyBooleanMask, float_1_col)->Apply(percent_range); -BENCHMARK_REGISTER_F(ApplyBooleanMask, float_2_col)->Apply(percent_range); -BENCHMARK_REGISTER_F(ApplyBooleanMask, float_4_col)->Apply(percent_range); - -// shmoo 1, 2, 4 column float across column sizes with 50% true -BENCHMARK_REGISTER_F(ApplyBooleanMask, float_1_col)->Apply(size_range); -BENCHMARK_REGISTER_F(ApplyBooleanMask, float_2_col)->Apply(size_range); -BENCHMARK_REGISTER_F(ApplyBooleanMask, float_4_col)->Apply(size_range); - -// spot benchmark other types -ABM_BENCHMARK_DEFINE(int8_1_col, int8_t, 1); -ABM_BENCHMARK_DEFINE(int16_1_col, int16_t, 1); -ABM_BENCHMARK_DEFINE(int32_1_col, int32_t, 1); -ABM_BENCHMARK_DEFINE(int64_1_col, int64_t, 1); -ABM_BENCHMARK_DEFINE(double_1_col, double, 1); -BENCHMARK_REGISTER_F(ApplyBooleanMask, int8_1_col)->Args({tenM, fifty_percent}); -BENCHMARK_REGISTER_F(ApplyBooleanMask, int16_1_col)->Args({tenM, fifty_percent}); -BENCHMARK_REGISTER_F(ApplyBooleanMask, int32_1_col)->Args({tenM, fifty_percent}); -BENCHMARK_REGISTER_F(ApplyBooleanMask, int64_1_col)->Args({tenM, fifty_percent}); -BENCHMARK_REGISTER_F(ApplyBooleanMask, double_1_col)->Args({tenM, fifty_percent}); +using data_type = nvbench::type_list; +NVBENCH_BENCH_TYPES(apply_boolean_mask_benchmark, NVBENCH_TYPE_AXES(data_type)) + .set_name("apply_boolean_mask") + .set_type_axes_names({"type"}) + .add_int64_axis("columns", {1, 4}) + .add_int64_axis("rows", {100'000, 1'000'000, 10'000'000}) + .add_int64_axis("hits_%", {10, 50, 100}); diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp index c04b6516903..d7deebca89a 100644 --- a/cpp/benchmarks/stream_compaction/distinct.cpp +++ b/cpp/benchmarks/stream_compaction/distinct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -23,15 +24,29 @@ #include +#include + NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); template void nvbench_distinct(nvbench::state& state, nvbench::type_list) { - cudf::size_type const num_rows = state.get_int64("NumRows"); + cudf::size_type const num_rows = state.get_int64("NumRows"); + auto const keep = get_keep(state.get_string("keep")); + cudf::size_type const cardinality = state.get_int64("cardinality"); + + if (cardinality > num_rows) { + state.skip("cardinality > num_rows"); + return; + } - data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution( - cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + data_profile profile = data_profile_builder() + .cardinality(cardinality) + .null_probability(0.01) + .distribution(cudf::type_to_id(), + distribution_id::UNIFORM, + static_cast(0), + std::numeric_limits::max()); auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); @@ -40,20 +55,19 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - auto result = cudf::distinct(input_table, - {0}, - cudf::duplicate_keep_option::KEEP_ANY, - cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL); + auto result = cudf::distinct( + input_table, {0}, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL); }); } -using data_type = nvbench::type_list; +using data_type = nvbench::type_list; NVBENCH_BENCH_TYPES(nvbench_distinct, NVBENCH_TYPE_AXES(data_type)) .set_name("distinct") .set_type_axes_names({"Type"}) - .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}); + .add_string_axis("keep", {"any", "first", "last", "none"}) + .add_int64_axis("cardinality", {100, 100'000, 10'000'000, 1'000'000'000}) + .add_int64_axis("NumRows", {100, 100'000, 10'000'000, 1'000'000'000}); template void nvbench_distinct_list(nvbench::state& state, nvbench::type_list) @@ -61,6 +75,7 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list) auto const size = state.get_int64("ColumnSize"); auto const dtype = cudf::type_to_id(); double const null_probability = state.get_float64("null_probability"); + auto const keep = get_keep(state.get_string("keep")); auto builder = data_profile_builder().null_probability(null_probability); if (dtype == cudf::type_id::LIST) { @@ -80,11 +95,8 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - auto result = cudf::distinct(*table, - {0}, - cudf::duplicate_keep_option::KEEP_ANY, - cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL); + auto result = + cudf::distinct(*table, {0}, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL); }); } @@ -92,5 +104,6 @@ NVBENCH_BENCH_TYPES(nvbench_distinct_list, NVBENCH_TYPE_AXES(nvbench::type_list)) .set_name("distinct_list") .set_type_axes_names({"Type"}) + .add_string_axis("keep", {"any", "first", "last", "none"}) .add_float64_axis("null_probability", {0.0, 0.1}) .add_int64_axis("ColumnSize", {100'000'000}); diff --git a/cpp/benchmarks/stream_compaction/stable_distinct.cpp b/cpp/benchmarks/stream_compaction/stable_distinct.cpp index bcee3048013..0a8836c0583 100644 --- a/cpp/benchmarks/stream_compaction/stable_distinct.cpp +++ b/cpp/benchmarks/stream_compaction/stable_distinct.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -23,15 +24,29 @@ #include +#include + NVBENCH_DECLARE_TYPE_STRINGS(cudf::timestamp_ms, "cudf::timestamp_ms", "cudf::timestamp_ms"); template void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list) { - cudf::size_type const num_rows = state.get_int64("NumRows"); + cudf::size_type const num_rows = state.get_int64("NumRows"); + auto const keep = get_keep(state.get_string("keep")); + cudf::size_type const cardinality = state.get_int64("cardinality"); + + if (cardinality > num_rows) { + state.skip("cardinality > num_rows"); + return; + } - data_profile profile = data_profile_builder().cardinality(0).null_probability(0.01).distribution( - cudf::type_to_id(), distribution_id::UNIFORM, 0, 100); + data_profile profile = data_profile_builder() + .cardinality(cardinality) + .null_probability(0.01) + .distribution(cudf::type_to_id(), + distribution_id::UNIFORM, + static_cast(0), + std::numeric_limits::max()); auto source_column = create_random_column(cudf::type_to_id(), row_count{num_rows}, profile); @@ -40,20 +55,19 @@ void nvbench_stable_distinct(nvbench::state& state, nvbench::type_list) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { - auto result = cudf::stable_distinct(input_table, - {0}, - cudf::duplicate_keep_option::KEEP_ANY, - cudf::null_equality::EQUAL, - cudf::nan_equality::ALL_EQUAL); + auto result = cudf::stable_distinct( + input_table, {0}, keep, cudf::null_equality::EQUAL, cudf::nan_equality::ALL_EQUAL); }); } -using data_type = nvbench::type_list; +using data_type = nvbench::type_list; NVBENCH_BENCH_TYPES(nvbench_stable_distinct, NVBENCH_TYPE_AXES(data_type)) .set_name("stable_distinct") .set_type_axes_names({"Type"}) - .add_int64_axis("NumRows", {10'000, 100'000, 1'000'000, 10'000'000}); + .add_string_axis("keep", {"any", "first", "last", "none"}) + .add_int64_axis("cardinality", {100, 100'000, 10'000'000, 1'000'000'000}) + .add_int64_axis("NumRows", {100, 100'000, 10'000'000, 1'000'000'000}); template void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list) @@ -61,6 +75,7 @@ void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list(); double const null_probability = state.get_float64("null_probability"); + auto const keep = get_keep(state.get_string("keep")); auto builder = data_profile_builder().null_probability(null_probability); if (dtype == cudf::type_id::LIST) { @@ -80,11 +95,8 @@ void nvbench_stable_distinct_list(nvbench::state& state, nvbench::type_list)) .set_name("stable_distinct_list") .set_type_axes_names({"Type"}) + .add_string_axis("keep", {"any", "first", "last", "none"}) .add_float64_axis("null_probability", {0.0, 0.1}) .add_int64_axis("ColumnSize", {100'000'000}); diff --git a/cpp/benchmarks/stream_compaction/stream_compaction_common.cpp b/cpp/benchmarks/stream_compaction/stream_compaction_common.cpp new file mode 100644 index 00000000000..8cbb2956777 --- /dev/null +++ b/cpp/benchmarks/stream_compaction/stream_compaction_common.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +cudf::duplicate_keep_option get_keep(std::string const& keep_str) +{ + if (keep_str == "any") { + return cudf::duplicate_keep_option::KEEP_ANY; + } else if (keep_str == "first") { + return cudf::duplicate_keep_option::KEEP_FIRST; + } else if (keep_str == "last") { + return cudf::duplicate_keep_option::KEEP_LAST; + } else if (keep_str == "none") { + return cudf::duplicate_keep_option::KEEP_NONE; + } else { + CUDF_FAIL("Unsupported keep option."); + } +} diff --git a/cpp/benchmarks/stream_compaction/stream_compaction_common.hpp b/cpp/benchmarks/stream_compaction/stream_compaction_common.hpp new file mode 100644 index 00000000000..d1ef2b10f41 --- /dev/null +++ b/cpp/benchmarks/stream_compaction/stream_compaction_common.hpp @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +cudf::duplicate_keep_option get_keep(std::string const& keep_str); diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp index 0f973a7c8b5..1898f0340b6 100644 --- a/cpp/benchmarks/string/slice.cpp +++ b/cpp/benchmarks/string/slice.cpp @@ -14,11 +14,8 @@ * limitations under the License. */ -#include "string_bench_args.hpp" - #include -#include -#include +#include #include @@ -29,56 +26,56 @@ #include +#include + #include -class StringSlice : public cudf::benchmark {}; +static void bench_slice(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const stype = state.get_string("type"); -enum slice_type { position, multi_position }; + if (static_cast(num_rows) * static_cast(row_width) >= + static_cast(std::numeric_limits::max())) { + state.skip("Skip benchmarks greater than size_type limit"); + } -static void BM_slice(benchmark::State& state, slice_type rt) -{ - cudf::size_type const n_rows{static_cast(state.range(0))}; - cudf::size_type const max_str_length{static_cast(state.range(1))}; data_profile const profile = data_profile_builder().distribution( - cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length); - auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile); + cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width); + auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile); cudf::strings_column_view input(column->view()); - auto starts_itr = thrust::constant_iterator(max_str_length / 3); - auto stops_itr = thrust::constant_iterator(max_str_length / 2); - cudf::test::fixed_width_column_wrapper starts(starts_itr, starts_itr + n_rows); - cudf::test::fixed_width_column_wrapper stops(stops_itr, stops_itr + n_rows); + auto starts_itr = thrust::constant_iterator(row_width / 4); + auto starts = + cudf::test::fixed_width_column_wrapper(starts_itr, starts_itr + num_rows); + auto stops_itr = thrust::constant_iterator(row_width / 3); + auto stops = + cudf::test::fixed_width_column_wrapper(stops_itr, stops_itr + num_rows); - for (auto _ : state) { - cuda_event_timer raii(state, true, cudf::get_default_stream()); - switch (rt) { - case position: - cudf::strings::slice_strings(input, max_str_length / 3, max_str_length / 2); - break; - case multi_position: cudf::strings::slice_strings(input, starts, stops); break; - } + auto stream = cudf::get_default_stream(); + state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value())); + // gather some throughput statistics as well + auto chars_size = input.chars_size(stream); + state.add_element_count(chars_size, "chars_size"); // number of bytes + state.add_global_memory_reads(chars_size); // all bytes are read + auto output_size = (row_width / 3 - row_width / 4) * num_rows; + state.add_global_memory_writes(output_size); + + if (stype == "multi") { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::slice_strings(input, starts, stops, stream); + }); + } else { + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::strings::slice_strings(input, row_width / 4, row_width / 3, 1, stream); + }); } - state.SetBytesProcessed(state.iterations() * input.chars_size(cudf::get_default_stream())); + set_throughputs(state); } -static void generate_bench_args(benchmark::internal::Benchmark* b) -{ - int const min_rows = 1 << 12; - int const max_rows = 1 << 24; - int const row_mult = 8; - int const min_rowlen = 1 << 5; - int const max_rowlen = 1 << 13; - int const len_mult = 2; - generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult); -} - -#define STRINGS_BENCHMARK_DEFINE(name) \ - BENCHMARK_DEFINE_F(StringSlice, name) \ - (::benchmark::State & st) { BM_slice(st, slice_type::name); } \ - BENCHMARK_REGISTER_F(StringSlice, name) \ - ->Apply(generate_bench_args) \ - ->UseManualTime() \ - ->Unit(benchmark::kMillisecond); - -STRINGS_BENCHMARK_DEFINE(position) -STRINGS_BENCHMARK_DEFINE(multi_position) +NVBENCH_BENCH(bench_slice) + .set_name("slice") + .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048}) + .add_int64_axis("num_rows", {262144, 2097152, 16777216}) + .add_string_axis("type", {"position", "multi"}); diff --git a/cpp/benchmarks/text/word_minhash.cpp b/cpp/benchmarks/text/word_minhash.cpp new file mode 100644 index 00000000000..adc3dddc59c --- /dev/null +++ b/cpp/benchmarks/text/word_minhash.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include + +#include + +#include + +#include + +static void bench_word_minhash(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const row_width = static_cast(state.get_int64("row_width")); + auto const seed_count = static_cast(state.get_int64("seed_count")); + auto const base64 = state.get_int64("hash_type") == 64; + + data_profile const strings_profile = + data_profile_builder().distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, 5); + auto strings_table = + create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile); + + auto const num_offsets = (num_rows / row_width) + 1; + auto offsets = cudf::sequence(num_offsets, + cudf::numeric_scalar(0), + cudf::numeric_scalar(row_width)); + + auto source = cudf::make_lists_column(num_offsets - 1, + std::move(offsets), + std::move(strings_table->release().front()), + 0, + rmm::device_buffer{}); + + data_profile const seeds_profile = data_profile_builder().no_validity().distribution( + cudf::type_to_id(), distribution_id::NORMAL, 0, 256); + auto const seed_type = base64 ? cudf::type_id::UINT64 : cudf::type_id::UINT32; + auto const seeds_table = create_random_table({seed_type}, row_count{seed_count}, seeds_profile); + auto seeds = seeds_table->get_column(0); + + state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); + + cudf::strings_column_view input(cudf::lists_column_view(source->view()).child()); + auto chars_size = input.chars_size(cudf::get_default_stream()); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); // output are hashes + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + auto result = base64 ? nvtext::word_minhash64(source->view(), seeds.view()) + : nvtext::word_minhash(source->view(), seeds.view()); + }); +} + +NVBENCH_BENCH(bench_word_minhash) + .set_name("word_minhash") + .add_int64_axis("num_rows", {131072, 262144, 524288, 1048576, 2097152}) + .add_int64_axis("row_width", {10, 100, 1000}) + .add_int64_axis("seed_count", {2, 25}) + .add_int64_axis("hash_type", {32, 64}); diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake index 0afdc526981..07cbf5150f4 100644 --- a/cpp/cmake/thirdparty/get_arrow.cmake +++ b/cpp/cmake/thirdparty/get_arrow.cmake @@ -22,82 +22,8 @@ include_guard(GLOBAL) -# Generate a FindArrow module for the case where we need to search for arrow within a pip install -# pyarrow. -function(find_libarrow_in_python_wheel PYARROW_VERSION) - string(REPLACE "." ";" PYARROW_VER_COMPONENTS "${PYARROW_VERSION}") - list(GET PYARROW_VER_COMPONENTS 0 PYARROW_MAJOR_VER) - list(GET PYARROW_VER_COMPONENTS 1 PYARROW_MINOR_VER) - - # Ensure that the major and minor versions are two digits long - string(LENGTH ${PYARROW_MAJOR_VER} PYARROW_MAJOR_LENGTH) - string(LENGTH ${PYARROW_MINOR_VER} PYARROW_MINOR_LENGTH) - if(${PYARROW_MAJOR_LENGTH} EQUAL 1) - set(PYARROW_MAJOR_VER "0${PYARROW_MAJOR_VER}") - endif() - if(${PYARROW_MINOR_LENGTH} EQUAL 1) - set(PYARROW_MINOR_VER "0${PYARROW_MINOR_VER}") - endif() - - set(PYARROW_LIB "libarrow.so.${PYARROW_MAJOR_VER}${PYARROW_MINOR_VER}") - - string( - APPEND - initial_code_block - [=[ -find_package(Python 3.9 REQUIRED COMPONENTS Interpreter) -execute_process( - COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])" - OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR - OUTPUT_STRIP_TRAILING_WHITESPACE - COMMAND_ERROR_IS_FATAL ANY -) -list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}") -]=] - ) - string( - APPEND - final_code_block - [=[ -list(POP_BACK CMAKE_PREFIX_PATH) -]=] - ) - rapids_find_generate_module( - Arrow NO_CONFIG - VERSION "${PYARROW_VERSION}" - LIBRARY_NAMES "${PYARROW_LIB}" - BUILD_EXPORT_SET cudf-exports - INSTALL_EXPORT_SET cudf-exports - HEADER_NAMES arrow/python/arrow_to_pandas.h INITIAL_CODE_BLOCK initial_code_block - FINAL_CODE_BLOCK final_code_block - ) - - find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL) - add_library(arrow_shared ALIAS Arrow::Arrow) - - rapids_export_package(BUILD Arrow cudf-exports) - rapids_export_package(INSTALL Arrow cudf-exports) -endfunction() - # This function finds arrow and sets any additional necessary environment variables. -function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON - ENABLE_PARQUET PYARROW_LIBARROW -) - - if(PYARROW_LIBARROW) - # Generate a FindArrow.cmake to find pyarrow's libarrow.so - find_libarrow_in_python_wheel(${VERSION}) - set(ARROW_FOUND - TRUE - PARENT_SCOPE - ) - set(ARROW_LIBRARIES - arrow_shared - PARENT_SCOPE - ) - return() - endif() - +function(find_and_configure_arrow VERSION BUILD_STATIC EXCLUDE_FROM_ALL ENABLE_PARQUET) if(BUILD_STATIC) if(TARGET arrow_static) set(ARROW_FOUND @@ -124,10 +50,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endif() endif() - if(NOT ARROW_ARMV8_ARCH) - set(ARROW_ARMV8_ARCH "armv8-a") - endif() - if(NOT ARROW_SIMD_LEVEL) set(ARROW_SIMD_LEVEL "NONE") endif() @@ -150,14 +72,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB set(ARROW_OPENSSL_USE_SHARED ON) endif() - set(ARROW_PYTHON_OPTIONS "") - if(ENABLE_PYTHON) - list(APPEND ARROW_PYTHON_OPTIONS "ARROW_PYTHON ON") - # Arrow's logic to build Boost from source is busted, so we have to get it from the system. - list(APPEND ARROW_PYTHON_OPTIONS "BOOST_SOURCE SYSTEM") - list(APPEND ARROW_PYTHON_OPTIONS "ARROW_DEPENDENCY_SOURCE AUTO") - endif() - set(ARROW_PARQUET_OPTIONS "") if(ENABLE_PARQUET) # Arrow's logic to build Boost from source is busted, so we have to get it from the system. @@ -174,6 +88,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB GIT_REPOSITORY https://github.com/apache/arrow.git GIT_TAG apache-arrow-${VERSION} GIT_SHALLOW TRUE SOURCE_SUBDIR cpp + EXCLUDE_FROM_ALL ${EXCLUDE_FROM_ALL} OPTIONS "CMAKE_VERBOSE_MAKEFILE ON" "ARROW_ACERO ON" "ARROW_IPC ON" @@ -181,16 +96,14 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB "ARROW_WITH_BACKTRACE ON" "ARROW_CXXFLAGS -w" "ARROW_JEMALLOC OFF" - "ARROW_S3 ${ENABLE_S3}" - "ARROW_ORC ${ENABLE_ORC}" - # e.g. needed by blazingsql-io + "ARROW_S3 OFF" + "ARROW_ORC OFF" ${ARROW_PARQUET_OPTIONS} "ARROW_PARQUET ${ENABLE_PARQUET}" "ARROW_FILESYSTEM ON" - ${ARROW_PYTHON_OPTIONS} + "ARROW_PYTHON OFF" # Arrow modifies CMake's GLOBAL RULE_LAUNCH_COMPILE unless this is off "ARROW_USE_CCACHE OFF" - "ARROW_ARMV8_ARCH ${ARROW_ARMV8_ARCH}" "ARROW_SIMD_LEVEL ${ARROW_SIMD_LEVEL}" "ARROW_BUILD_STATIC ${ARROW_BUILD_STATIC}" "ARROW_BUILD_SHARED ${ARROW_BUILD_SHARED}" @@ -269,7 +182,6 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB endif() if(Arrow_ADDED) - set(arrow_code_string [=[ if (TARGET cudf::arrow_shared AND (NOT TARGET arrow_shared)) @@ -324,101 +236,106 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB get_target_property(interface_libs arrow_static INTERFACE_LINK_LIBRARIES) endif() endif() - rapids_export( - BUILD Arrow - VERSION ${VERSION} - EXPORT_SET arrow_targets - GLOBAL_TARGETS arrow_shared arrow_static - NAMESPACE cudf:: - FINAL_CODE_BLOCK arrow_code_string - ) - - if(ENABLE_PARQUET) - - set(arrow_acero_code_string - [=[ - if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared)) - add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared) - endif() - if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static)) - add_library(arrow_acero_static ALIAS cudf::arrow_acero_static) - endif() - ]=] - ) + include(rapids-export) + if(NOT EXCLUDE_FROM_ALL) rapids_export( - BUILD ArrowAcero + BUILD Arrow VERSION ${VERSION} - EXPORT_SET arrow_acero_targets - GLOBAL_TARGETS arrow_acero_shared arrow_acero_static + EXPORT_SET arrow_targets + GLOBAL_TARGETS arrow_shared arrow_static NAMESPACE cudf:: - FINAL_CODE_BLOCK arrow_acero_code_string + FINAL_CODE_BLOCK arrow_code_string ) - set(arrow_dataset_code_string - [=[ - if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) - add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared) - endif() - if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static)) - add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static) - endif() - ]=] - ) + if(ENABLE_PARQUET) + set(arrow_acero_code_string + [=[ + if (TARGET cudf::arrow_acero_shared AND (NOT TARGET arrow_acero_shared)) + add_library(arrow_acero_shared ALIAS cudf::arrow_acero_shared) + endif() + if (TARGET cudf::arrow_acero_static AND (NOT TARGET arrow_acero_static)) + add_library(arrow_acero_static ALIAS cudf::arrow_acero_static) + endif() + ]=] + ) - rapids_export( - BUILD ArrowDataset - VERSION ${VERSION} - EXPORT_SET arrow_dataset_targets - GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static - NAMESPACE cudf:: - FINAL_CODE_BLOCK arrow_dataset_code_string - ) + rapids_export( + BUILD ArrowAcero + VERSION ${VERSION} + EXPORT_SET arrow_acero_targets + GLOBAL_TARGETS arrow_acero_shared arrow_acero_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_acero_code_string + ) - set(parquet_code_string - [=[ - if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared)) - add_library(parquet_shared ALIAS cudf::parquet_shared) - endif() - if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static)) - add_library(parquet_static ALIAS cudf::parquet_static) - endif() - ]=] - ) + set(arrow_dataset_code_string + [=[ + if (TARGET cudf::arrow_dataset_shared AND (NOT TARGET arrow_dataset_shared)) + add_library(arrow_dataset_shared ALIAS cudf::arrow_dataset_shared) + endif() + if (TARGET cudf::arrow_dataset_static AND (NOT TARGET arrow_dataset_static)) + add_library(arrow_dataset_static ALIAS cudf::arrow_dataset_static) + endif() + ]=] + ) - rapids_export( - BUILD Parquet - VERSION ${VERSION} - EXPORT_SET parquet_targets - GLOBAL_TARGETS parquet_shared parquet_static - NAMESPACE cudf:: - FINAL_CODE_BLOCK parquet_code_string - ) + rapids_export( + BUILD ArrowDataset + VERSION ${VERSION} + EXPORT_SET arrow_dataset_targets + GLOBAL_TARGETS arrow_dataset_shared arrow_dataset_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK arrow_dataset_code_string + ) + set(parquet_code_string + [=[ + if (TARGET cudf::parquet_shared AND (NOT TARGET parquet_shared)) + add_library(parquet_shared ALIAS cudf::parquet_shared) + endif() + if (TARGET cudf::parquet_static AND (NOT TARGET parquet_static)) + add_library(parquet_static ALIAS cudf::parquet_static) + endif() + ]=] + ) + + rapids_export( + BUILD Parquet + VERSION ${VERSION} + EXPORT_SET parquet_targets + GLOBAL_TARGETS parquet_shared parquet_static + NAMESPACE cudf:: + FINAL_CODE_BLOCK parquet_code_string + ) + endif() endif() endif() - # We generate the arrow-configfiles when we built arrow locally, so always do `find_dependency` - rapids_export_package(BUILD Arrow cudf-exports) - rapids_export_package(INSTALL Arrow cudf-exports) - if(ENABLE_PARQUET) - rapids_export_package(BUILD Parquet cudf-exports) - rapids_export_package(BUILD ArrowDataset cudf-exports) - endif() + if(NOT EXCLUDE_FROM_ALL) + # We generate the arrow-configfiles when we built arrow locally, so always do `find_dependency` + rapids_export_package(BUILD Arrow cudf-exports) + rapids_export_package(INSTALL Arrow cudf-exports) - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root( - BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports - ) - rapids_export_find_package_root( - BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] - EXPORT_SET cudf-exports - CONDITION ENABLE_PARQUET - ) - rapids_export_find_package_root( - BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] - EXPORT_SET cudf-exports - CONDITION ENABLE_PARQUET - ) + if(ENABLE_PARQUET) + rapids_export_package(BUILD Parquet cudf-exports) + rapids_export_package(BUILD ArrowDataset cudf-exports) + endif() + + include("${rapids-cmake-dir}/export/find_package_root.cmake") + rapids_export_find_package_root( + BUILD Arrow [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports + ) + rapids_export_find_package_root( + BUILD Parquet [=[${CMAKE_CURRENT_LIST_DIR}]=] + EXPORT_SET cudf-exports + CONDITION ENABLE_PARQUET + ) + rapids_export_find_package_root( + BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] + EXPORT_SET cudf-exports + CONDITION ENABLE_PARQUET + ) + endif() set(ARROW_LIBRARIES "${ARROW_LIBRARIES}" @@ -435,7 +352,21 @@ if(NOT DEFINED CUDF_VERSION_Arrow) ) endif() +# Default to static arrow builds +if(NOT DEFINED CUDF_USE_ARROW_STATIC) + set(CUDF_USE_ARROW_STATIC ON) +endif() + +# Default to excluding from installation since we generally privately and statically link Arrow. +if(NOT DEFINED CUDF_EXCLUDE_ARROW_FROM_ALL) + set(CUDF_EXCLUDE_ARROW_FROM_ALL OFF) +endif() + +if(NOT DEFINED CUDF_ENABLE_ARROW_PARQUET) + set(CUDF_ENABLE_ARROW_PARQUET OFF) +endif() + find_and_configure_arrow( - ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC} - ${CUDF_ENABLE_ARROW_PYTHON} ${CUDF_ENABLE_ARROW_PARQUET} ${USE_LIBARROW_FROM_PYARROW} + ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_EXCLUDE_ARROW_FROM_ALL} + ${CUDF_ENABLE_ARROW_PARQUET} ) diff --git a/cpp/cmake/thirdparty/get_spdlog.cmake b/cpp/cmake/thirdparty/get_spdlog.cmake index c0e07d02d94..90b0f4d8a8e 100644 --- a/cpp/cmake/thirdparty/get_spdlog.cmake +++ b/cpp/cmake/thirdparty/get_spdlog.cmake @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -16,21 +16,12 @@ function(find_and_configure_spdlog) include(${rapids-cmake-dir}/cpm/spdlog.cmake) - rapids_cpm_spdlog(FMT_OPTION "EXTERNAL_FMT_HO" INSTALL_EXPORT_SET cudf-exports) - rapids_export_package(BUILD spdlog cudf-exports) + rapids_cpm_spdlog( + FMT_OPTION "EXTERNAL_FMT_HO" + INSTALL_EXPORT_SET cudf-exports + BUILD_EXPORT_SET cudf-exports + ) - if(spdlog_ADDED) - rapids_export( - BUILD spdlog - EXPORT_SET spdlog - GLOBAL_TARGETS spdlog spdlog_header_only - NAMESPACE spdlog:: - ) - include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root( - BUILD spdlog [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cudf-exports - ) - endif() endfunction() find_and_configure_spdlog() diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md index aa054ba93e9..fce8adb4c06 100644 --- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md +++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md @@ -223,17 +223,17 @@ can be passed to libcudf functions via `rmm::device_async_resource_ref` paramete ### Current Device Memory Resource -RMM provides a "default" memory resource for each device that can be accessed and updated via the -`rmm::mr::get_current_device_resource()` and `rmm::mr::set_current_device_resource(...)` functions, -respectively. All memory resource parameters should be defaulted to use the return value of -`rmm::mr::get_current_device_resource()`. +RMM provides a "default" memory resource for each device and functions to access and set it. libcudf +provides wrappers for these functions in `cpp/include/cudf/utilities/memory_resource.hpp`. +All memory resource parameters should be defaulted to use the return value of +`cudf::get_current_device_resource_ref()`. ### Resource Refs Memory resources are passed via resource ref parameters. A resource ref is a memory resource wrapper that enables consumers to specify properties of resources that they expect. These are defined -in the `cuda::mr` namespace of libcu++, but RMM provides some convenience wrappers in -`rmm/resource_ref.hpp`: +in the `cuda::mr` namespace of libcu++, but RMM provides some convenience aliases in +`rmm/resource_ref.hpp`. - `rmm::device_resource_ref` accepts a memory resource that provides synchronous allocation of device-accessible memory. - `rmm::device_async_resource_ref` accepts a memory resource that provides stream-ordered allocation @@ -247,7 +247,8 @@ in the `cuda::mr` namespace of libcu++, but RMM provides some convenience wrappe - `rmm::host_async_resource_ref` accepts a memory resource that provides stream-ordered allocation of host- and device-accessible memory. -See the libcu++ [docs on `resource_ref`](https://nvidia.github.io/cccl/libcudacxx/extended_api/memory_resource/resource_ref.html) for more information. +See the libcu++ [docs on `resource_ref`](https://nvidia.github.io/cccl/libcudacxx/extended_api/memory_resource/resource_ref.html) +for more information. ## cudf::column @@ -515,7 +516,7 @@ For example: // cpp/include/cudf/header.hpp void external_function(..., rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); // cpp/include/cudf/detail/header.hpp namespace detail{ @@ -575,7 +576,7 @@ whose outputs will be returned. Example: // Returned `column` contains newly allocated memory, // therefore the API must accept a memory resource pointer std::unique_ptr returns_output_memory( - ..., rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + ..., rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); // This API does not allocate any new *output* memory, therefore // a memory resource is unnecessary @@ -586,17 +587,17 @@ This rule automatically applies to all detail APIs that allocate memory. Any det called by any public API, and therefore could be allocating memory that is returned to the user. To support such uses cases, all detail APIs allocating memory resources should accept an `mr` parameter. Callers are responsible for either passing through a provided `mr` or -`rmm::mr::get_current_device_resource()` as needed. +`cudf::get_current_device_resource_ref()` as needed. ### Temporary Memory Not all memory allocated within a libcudf API is returned to the caller. Often algorithms must allocate temporary, scratch memory for intermediate results. Always use the default resource -obtained from `rmm::mr::get_current_device_resource()` for temporary memory allocations. Example: +obtained from `cudf::get_current_device_resource_ref()` for temporary memory allocations. Example: ```c++ rmm::device_buffer some_function( - ..., rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) { + ..., rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { rmm::device_buffer returned_buffer(..., mr); // Returned buffer uses the passed in MR ... rmm::device_buffer temporary_buffer(...); // Temporary buffer uses default MR @@ -613,7 +614,7 @@ use memory resources for device memory allocation with automated lifetime manage #### rmm::device_buffer Allocates a specified number of bytes of untyped, uninitialized device memory using a memory resource. If no `rmm::device_async_resource_ref` is explicitly provided, it uses -`rmm::mr::get_current_device_resource()`. +`cudf::get_current_device_resource_ref()`. `rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the `device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the @@ -685,7 +686,7 @@ rmm::device_uvector v(100, s); // Initializes the elements to 0 thrust::uninitialized_fill(thrust::cuda::par.on(s.value()), v.begin(), v.end(), int32_t{0}); -rmm::mr::device_memory_resource * mr = new my_custom_resource{...}; +auto mr = new my_custom_resource{...}; // Allocates uninitialized storage for 100 `int32_t` elements on stream `s` using the resource `mr` rmm::device_uvector v2{100, s, mr}; ``` diff --git a/cpp/doxygen/regex.md b/cpp/doxygen/regex.md index 8d206f245dc..6d1c91a5752 100644 --- a/cpp/doxygen/regex.md +++ b/cpp/doxygen/regex.md @@ -17,6 +17,12 @@ The details are based on features documented at https://www.regular-expressions. **Note:** The alternation character is the pipe character `|` and not the character included in the tables on this page. There is an issue including the pipe character inside the table markdown that is rendered by doxygen. +By default, only the `\n` character is recognized as a line break. The [cudf::strings::regex_flags::EXT_NEWLINE](@ref cudf::strings::regex_flags) increases the set of line break characters to include: +- Paragraph separator (Unicode: `2029`, UTF-8: `E280A9`) +- Line separator (Unicode: `2028`, UTF-8: `E280A8`) +- Next line (Unicode: `0085`, UTF-8: `C285`) +- Carriage return (Unicode: `000D`, UTF-8: `0D`) + **Invalid regex patterns will result in undefined behavior**. This includes but is not limited to the following: - Unescaped special characters (listed in the third row of the Characters table below) when they are intended to match as literals. - Unmatched paired special characters like `()`, `[]`, and `{}`. diff --git a/cpp/examples/basic/src/process_csv.cpp b/cpp/examples/basic/src/process_csv.cpp index 0d2b6b099ac..d27789a78a6 100644 --- a/cpp/examples/basic/src/process_csv.cpp +++ b/cpp/examples/basic/src/process_csv.cpp @@ -90,7 +90,7 @@ int main(int argc, char** argv) // it being set as the default // Also, call this before the first libcudf API call to ensure all data is allocated by the same // memory resource. - rmm::mr::set_current_device_resource(&mr); + cudf::set_current_device_resource(&mr); // Read data auto stock_table_with_metadata = read_csv("4stock_5day.csv"); diff --git a/cpp/examples/billion_rows/CMakeLists.txt b/cpp/examples/billion_rows/CMakeLists.txt new file mode 100644 index 00000000000..d95bb73b258 --- /dev/null +++ b/cpp/examples/billion_rows/CMakeLists.txt @@ -0,0 +1,34 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +cmake_minimum_required(VERSION 3.26.4) + +include(../set_cuda_architecture.cmake) + +# initialize cuda architecture +rapids_cuda_init_architectures(billion_rows) +rapids_cuda_set_architectures(RAPIDS) + +project( + billion_rows + VERSION 0.0.1 + LANGUAGES CXX CUDA +) + +include(../fetch_dependencies.cmake) + +list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) + +add_library(groupby_results OBJECT groupby_results.cpp) +target_link_libraries(groupby_results PRIVATE cudf::cudf) + +add_executable(brc brc.cpp) +target_link_libraries(brc PRIVATE cudf::cudf nvToolsExt $) +install(TARGETS brc DESTINATION bin/examples/libcudf) + +add_executable(brc_chunks brc_chunks.cpp) +target_link_libraries(brc_chunks PRIVATE cudf::cudf nvToolsExt $) +install(TARGETS brc_chunks DESTINATION bin/examples/libcudf) + +add_executable(brc_pipeline brc_pipeline.cpp) +target_link_libraries(brc_pipeline PRIVATE cudf::cudf nvToolsExt $) +install(TARGETS brc_pipeline DESTINATION bin/examples/libcudf) diff --git a/cpp/examples/billion_rows/README.md b/cpp/examples/billion_rows/README.md new file mode 100644 index 00000000000..73ff7aa19f0 --- /dev/null +++ b/cpp/examples/billion_rows/README.md @@ -0,0 +1,44 @@ +# libcudf C++ example for the 1 billion row challenge + +This C++ example demonstrates using libcudf APIs to read and process +a table with 1 billion rows. The 1 billion row challenge is described here: +https://github.com/gunnarmorling/1brc + +The examples load the 1 billion row text file using the CSV reader. +The file contains around 400 unique city names (string type) along with +random temperature values (float type). +Once loaded, the examples performs groupby aggregations to find the +minimum, maximum, and average temperature for each city. + +There are three examples included: +1. `brc.cpp` + Loads the file in one call to the CSV reader. + This generally requires a large amount of available GPU memory. +2. `brc_chunks.cpp` + Loads and processes the file in chunks. + The number of chunks to use is a parameter to the executable. +3. `brc_pipeline.cpp` + Loads and processes the file in chunks with separate threads/streams. + The number of chunks and number of threads to use are parameters to the executable. + +An input file can be generated using the instructions from +https://github.com/gunnarmorling/1brc. + +## Compile and execute + +```bash +# Configure project +cmake -S . -B build/ +# Build +cmake --build build/ --parallel $PARALLEL_LEVEL +# Execute +build/brc input.txt +# Execute in chunked mode with 25 chunks (default) +build/brc_chunks input.txt 25 +# Execute in pipeline mode with 25 chunks and 2 threads (defaults) +build/brc_pipeline input.txt 25 2 +``` + +If your machine does not come with a pre-built libcudf binary, expect the +first build to take some time, as it would build libcudf on the host machine. +It may be sped up by configuring the proper `PARALLEL_LEVEL` number. diff --git a/cpp/examples/billion_rows/brc.cpp b/cpp/examples/billion_rows/brc.cpp new file mode 100644 index 00000000000..b7b292cf16e --- /dev/null +++ b/cpp/examples/billion_rows/brc.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common.hpp" +#include "groupby_results.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +using elapsed_t = std::chrono::duration; + +int main(int argc, char const** argv) +{ + if (argc < 2) { + std::cout << "required parameter: input-file-path\n"; + return 1; + } + + auto const input_file = std::string{argv[1]}; + std::cout << "Input: " << input_file << std::endl; + + auto const mr_name = std::string("pool"); + auto resource = create_memory_resource(mr_name); + auto stats_mr = + rmm::mr::statistics_resource_adaptor(resource.get()); + rmm::mr::set_current_device_resource(&stats_mr); + auto stream = cudf::get_default_stream(); + + auto start = std::chrono::steady_clock::now(); + + auto const csv_result = [input_file, stream] { + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{input_file}) + .header(-1) + .delimiter(';') + .doublequote(false) + .dtypes(std::vector{cudf::data_type{cudf::type_id::STRING}, + cudf::data_type{cudf::type_id::FLOAT32}}) + .na_filter(false); + return cudf::io::read_csv(in_opts, stream).tbl; + }(); + elapsed_t elapsed = std::chrono::steady_clock::now() - start; + std::cout << "File load time: " << elapsed.count() << " seconds\n"; + auto const csv_table = csv_result->view(); + std::cout << "Input rows: " << csv_table.num_rows() << std::endl; + + auto const cities = csv_table.column(0); + auto const temps = csv_table.column(1); + + std::vector> aggregations; + aggregations.emplace_back(cudf::make_min_aggregation()); + aggregations.emplace_back(cudf::make_max_aggregation()); + aggregations.emplace_back(cudf::make_mean_aggregation()); + + auto result = compute_results(cities, temps, std::move(aggregations), stream); + + // The other 2 examples employ sorting for the sub-aggregates so enabling + // the following line may be more comparable in performance with them. + // + // result = cudf::sort_by_key(result->view(), result->view().select({0}), {}, {}, stream); + + stream.synchronize(); + + elapsed = std::chrono::steady_clock::now() - start; + std::cout << "Number of keys: " << result->num_rows() << std::endl; + std::cout << "Process time: " << elapsed.count() << " seconds\n"; + std::cout << "Peak memory: " << (stats_mr.get_bytes_counter().peak / 1048576.0) << " MB\n"; + + return 0; +} diff --git a/cpp/examples/billion_rows/brc_chunks.cpp b/cpp/examples/billion_rows/brc_chunks.cpp new file mode 100644 index 00000000000..4a65c59e461 --- /dev/null +++ b/cpp/examples/billion_rows/brc_chunks.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common.hpp" +#include "groupby_results.hpp" + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +using elapsed_t = std::chrono::duration; + +std::unique_ptr load_chunk(std::string const& input_file, + std::size_t start, + std::size_t size, + rmm::cuda_stream_view stream) +{ + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{input_file}) + .header(-1) + .delimiter(';') + .doublequote(false) + .byte_range_offset(start) + .byte_range_size(size) + .dtypes(std::vector{cudf::data_type{cudf::type_id::STRING}, + cudf::data_type{cudf::type_id::FLOAT32}}) + .na_filter(false); + return cudf::io::read_csv(in_opts, stream).tbl; +} + +int main(int argc, char const** argv) +{ + if (argc < 2) { + std::cout << "required parameter: input-file-path\n"; + std::cout << "optional parameter: chunk-count\n"; + return 1; + } + + auto const input_file = std::string{argv[1]}; + auto const divider = (argc < 3) ? 25 : std::stoi(std::string(argv[2])); + + std::cout << "Input: " << input_file << std::endl; + std::cout << "Chunks: " << divider << std::endl; + + auto const mr_name = std::string("pool"); + auto resource = create_memory_resource(mr_name); + auto stats_mr = + rmm::mr::statistics_resource_adaptor(resource.get()); + rmm::mr::set_current_device_resource(&stats_mr); + auto stream = cudf::get_default_stream(); + + std::filesystem::path p = input_file; + auto const file_size = std::filesystem::file_size(p); + + auto start = std::chrono::steady_clock::now(); + + std::vector> agg_data; + std::size_t chunk_size = file_size / divider + ((file_size % divider) != 0); + std::size_t start_pos = 0; + cudf::size_type total_rows = 0; + do { + auto const input_table = load_chunk(input_file, start_pos, chunk_size, stream); + auto const read_rows = input_table->num_rows(); + if (read_rows == 0) break; + + auto const cities = input_table->view().column(0); + auto const temps = input_table->view().column(1); + + std::vector> aggregations; + aggregations.emplace_back(cudf::make_min_aggregation()); + aggregations.emplace_back(cudf::make_max_aggregation()); + aggregations.emplace_back(cudf::make_sum_aggregation()); + aggregations.emplace_back(cudf::make_count_aggregation()); + auto result = compute_results(cities, temps, std::move(aggregations), stream); + + agg_data.emplace_back( + cudf::sort_by_key(result->view(), result->view().select({0}), {}, {}, stream)); + start_pos += chunk_size; + chunk_size = std::min(chunk_size, file_size - start_pos); + total_rows += read_rows; + } while (start_pos < file_size && chunk_size > 0); + + // now aggregate the aggregate results + auto results = compute_final_aggregates(agg_data, stream); + stream.synchronize(); + + elapsed_t elapsed = std::chrono::steady_clock::now() - start; + std::cout << "Number of keys: " << results->num_rows() << std::endl; + std::cout << "Process time: " << elapsed.count() << " seconds\n"; + std::cout << "Peak memory: " << (stats_mr.get_bytes_counter().peak / 1048576.0) << " MB\n"; + + return 0; +} diff --git a/cpp/examples/billion_rows/brc_pipeline.cpp b/cpp/examples/billion_rows/brc_pipeline.cpp new file mode 100644 index 00000000000..c65edc163e1 --- /dev/null +++ b/cpp/examples/billion_rows/brc_pipeline.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "common.hpp" +#include "groupby_results.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +using elapsed_t = std::chrono::duration; +using byte_range = std::pair; +using result_t = std::unique_ptr; + +std::unique_ptr load_chunk(std::string const& input_file, + std::size_t start, + std::size_t size, + rmm::cuda_stream_view stream) +{ + cudf::io::csv_reader_options in_opts = + cudf::io::csv_reader_options::builder(cudf::io::source_info{input_file}) + .header(-1) + .delimiter(';') + .doublequote(false) + .byte_range_offset(start) + .byte_range_size(size) + .dtypes(std::vector{cudf::data_type{cudf::type_id::STRING}, + cudf::data_type{cudf::type_id::FLOAT32}}) + .na_filter(false); + return cudf::io::read_csv(in_opts, stream).tbl; +} + +struct chunk_fn { + std::string input_file; + std::vector& agg_data; + rmm::cuda_stream_view stream; + + std::vector byte_ranges{}; + bool first_range{}; + + void add_range(std::size_t start, std::size_t size) + { + byte_ranges.push_back(byte_range{start, size}); + if (!first_range) { first_range = (start == 0); } + } + + void operator()() + { + using namespace std::chrono_literals; + + // process each byte range assigned to this thread + for (auto& br : byte_ranges) { + auto const input_table = load_chunk(input_file, br.first, br.second, stream); + auto const read_rows = input_table->num_rows(); + if (read_rows == 0) continue; + + auto const cities = input_table->view().column(0); + auto const temps = input_table->view().column(1); + + std::vector> aggregations; + aggregations.emplace_back(cudf::make_min_aggregation()); + aggregations.emplace_back(cudf::make_max_aggregation()); + aggregations.emplace_back(cudf::make_sum_aggregation()); + aggregations.emplace_back(cudf::make_count_aggregation()); + auto result = compute_results(cities, temps, std::move(aggregations), stream); + + agg_data.emplace_back( + cudf::sort_by_key(result->view(), result->view().select({0}), {}, {}, stream)); + } + // done with this stream + stream.synchronize_no_throw(); + } +}; + +int main(int argc, char const** argv) +{ + if (argc < 2) { + std::cout << "required parameter: input-file-path\n"; + std::cout << "optional parameters: chunk-count thread-count\n"; + return 1; + } + + auto const input_file = std::string{argv[1]}; + auto const divider = (argc < 3) ? 25 : std::stoi(std::string(argv[2])); + auto const thread_count = (argc < 4) ? 2 : std::stoi(std::string(argv[3])); + + std::cout << "Input: " << input_file << std::endl; + std::cout << "Chunks: " << divider << std::endl; + std::cout << "Threads: " << thread_count << std::endl; + + auto const mr_name = std::string("pool"); + auto resource = create_memory_resource(mr_name); + auto stats_mr = + rmm::mr::statistics_resource_adaptor(resource.get()); + rmm::mr::set_current_device_resource(&stats_mr); + auto stream = cudf::get_default_stream(); + + std::filesystem::path p = input_file; + auto const file_size = std::filesystem::file_size(p); + + auto start = std::chrono::steady_clock::now(); + + std::size_t chunk_size = file_size / divider + ((file_size % divider) != 0); + std::size_t start_pos = 0; + + auto stream_pool = rmm::cuda_stream_pool(thread_count); + std::vector> chunk_results(thread_count); + + std::vector chunk_tasks; + for (auto& cr : chunk_results) { + chunk_tasks.emplace_back(chunk_fn{input_file, cr, stream_pool.get_stream()}); + } + for (std::size_t i = 0; i < divider; ++i) { + auto start = i * chunk_size; + auto size = std::min(chunk_size, file_size - start); + chunk_tasks[i % thread_count].add_range(start, size); + } + std::vector threads; + for (auto& c : chunk_tasks) { + threads.emplace_back(std::thread{c}); + } + for (auto& t : threads) { + t.join(); + } + + // in case some kernels are still running on the default stream + stream.synchronize(); + + // combine each thread's agg data into a single vector + std::vector agg_data(divider); + auto begin = agg_data.begin(); + for (auto& c : chunk_results) { + std::move(c.begin(), c.end(), begin); + begin += c.size(); + } + + // now aggregate the aggregate results + auto results = compute_final_aggregates(agg_data, stream); + stream.synchronize(); + + elapsed_t elapsed = std::chrono::steady_clock::now() - start; + std::cout << "Number of keys: " << results->num_rows() << std::endl; + std::cout << "Process time: " << elapsed.count() << " seconds\n"; + std::cout << "Peak memory: " << (stats_mr.get_bytes_counter().peak / 1048576.0) << " MB\n"; + + return 0; +} diff --git a/cpp/examples/billion_rows/common.hpp b/cpp/examples/billion_rows/common.hpp new file mode 100644 index 00000000000..d3063034d28 --- /dev/null +++ b/cpp/examples/billion_rows/common.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include + +/** + * @brief Create CUDA memory resource + */ +auto make_cuda_mr() { return std::make_shared(); } + +/** + * @brief Create a pool device memory resource + */ +auto make_pool_mr() +{ + return rmm::mr::make_owning_wrapper( + make_cuda_mr(), rmm::percent_of_free_device_memory(50)); +} + +/** + * @brief Create memory resource for libcudf functions + */ +std::shared_ptr create_memory_resource(std::string const& name) +{ + if (name == "pool") { return make_pool_mr(); } + return make_cuda_mr(); +} diff --git a/cpp/examples/billion_rows/groupby_results.cpp b/cpp/examples/billion_rows/groupby_results.cpp new file mode 100644 index 00000000000..0a7f24830f6 --- /dev/null +++ b/cpp/examples/billion_rows/groupby_results.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "groupby_results.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +std::unique_ptr compute_results( + cudf::column_view const& cities, + cudf::column_view const& temperatures, + std::vector>&& aggregations, + rmm::cuda_stream_view stream) +{ + auto groupby_obj = cudf::groupby::groupby(cudf::table_view({cities})); + auto aggregation_reqs = std::vector{}; + auto& req = aggregation_reqs.emplace_back(); + req.values = temperatures; + req.aggregations = std::move(aggregations); + + auto result = groupby_obj.aggregate(aggregation_reqs, stream); + + auto rtn = result.first->release(); + for (auto& r : result.second.front().results) { + rtn.emplace_back(std::move(r)); + } + + return std::make_unique(std::move(rtn)); +} + +std::unique_ptr compute_final_aggregates( + std::vector>& agg_data, rmm::cuda_stream_view stream) +{ + // first combine all the results into a vectors of columns + std::vector min_cols, max_cols, sum_cols, count_cols; + for (auto& tbl : agg_data) { + auto const tv = tbl->view(); + min_cols.push_back(tv.column(1)); + max_cols.push_back(tv.column(2)); + sum_cols.push_back(tv.column(3)); + count_cols.push_back(tv.column(4)); + } + + // Create single columns out of the aggregate table results. + // This relies on every key appearing in every chunk segment. + // All the values for each key become contiguous within the output column. + // For example, for N=min_cols.size() (number of unique cities): + // All of the mins for city[i] are in row[i] of each column of vector min_cols. + // The interleave_columns API transposes these into a single column where + // the first N rows are values for city[0], + // the next N rows are values for city[1], + // ... + // the last N rows are values for city[N-1] + // The final result for each city is computed using segmented_reduce. + auto mins = cudf::interleave_columns(cudf::table_view{min_cols}); + auto maxes = cudf::interleave_columns(cudf::table_view{max_cols}); + auto sums = cudf::interleave_columns(cudf::table_view{sum_cols}); + auto counts = cudf::interleave_columns(cudf::table_view{count_cols}); + + // Build the offsets needed for segmented reduce. + // These are increasing integer values spaced evenly as per the number of cities (keys). + auto const num_keys = agg_data.front()->num_rows(); + auto const size = static_cast(num_keys) + 1; + auto const start = cudf::numeric_scalar(0, true, stream); + auto const step = cudf::numeric_scalar(agg_data.size(), true, stream); + auto seg_offsets = cudf::sequence(size, start, step, stream); + auto offsets_span = cudf::device_span(seg_offsets->view()); + + // compute the min/max for each key by using segmented reduce + auto min_agg = cudf::make_min_aggregation(); + mins = cudf::segmented_reduce( + mins->view(), offsets_span, *min_agg, mins->type(), cudf::null_policy::EXCLUDE, stream); + auto max_agg = cudf::make_max_aggregation(); + maxes = cudf::segmented_reduce( + maxes->view(), offsets_span, *max_agg, maxes->type(), cudf::null_policy::EXCLUDE, stream); + + // compute the sum and total counts in the same way + auto sum_agg = cudf::make_sum_aggregation(); + sums = cudf::segmented_reduce( + sums->view(), offsets_span, *sum_agg, sums->type(), cudf::null_policy::EXCLUDE, stream); + counts = cudf::segmented_reduce( + counts->view(), offsets_span, *sum_agg, counts->type(), cudf::null_policy::EXCLUDE, stream); + + // compute the means using binary-operation to divide the individual rows sum/count + auto means = cudf::binary_operation( + sums->view(), counts->view(), cudf::binary_operator::DIV, sums->type(), stream); + + std::vector> results; + results.emplace_back(std::move(mins)); + results.emplace_back(std::move(maxes)); + results.emplace_back(std::move(means)); + return std::make_unique(std::move(results)); +} diff --git a/cpp/examples/billion_rows/groupby_results.hpp b/cpp/examples/billion_rows/groupby_results.hpp new file mode 100644 index 00000000000..d5a88428329 --- /dev/null +++ b/cpp/examples/billion_rows/groupby_results.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include + +/** + * @brief Process the cities and temperatures + * + * Perform the given aggregations using the cities as the keys and the + * temperatures as values. + * + * @param cities The city names + * @param temperatures The temperature values + * @param aggregations Which groupby aggregations to perform + * @param stream CUDA stream to use for launching kernels + * @return aggregated results + */ +std::unique_ptr compute_results( + cudf::column_view const& cities, + cudf::column_view const& temperatures, + std::vector>&& aggregations, + rmm::cuda_stream_view stream = cudf::get_default_stream()); + +/** + * @brief Produce the final aggregations from sub-aggregate results + * + * @param agg_data Sub-aggregations to summarize + * @param stream CUDA stream to use for launching kernels + * @return final results + */ +std::unique_ptr compute_final_aggregates( + std::vector>& agg_data, + rmm::cuda_stream_view stream = cudf::get_default_stream()); diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh index dce81fb1677..25984df1b60 100755 --- a/cpp/examples/build.sh +++ b/cpp/examples/build.sh @@ -57,7 +57,8 @@ build_example() { } build_example basic -build_example tpch build_example strings build_example nested_types build_example parquet_io +build_example billion_rows +build_example interop diff --git a/cpp/examples/interop/CMakeLists.txt b/cpp/examples/interop/CMakeLists.txt new file mode 100644 index 00000000000..2816f613d3d --- /dev/null +++ b/cpp/examples/interop/CMakeLists.txt @@ -0,0 +1,27 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +cmake_minimum_required(VERSION 3.26.4) + +include(../set_cuda_architecture.cmake) + +rapids_cuda_init_architectures(interop_example) +rapids_cuda_set_architectures(RAPIDS) + +project( + interop_example + VERSION 0.0.1 + LANGUAGES CXX CUDA +) + +include(../fetch_dependencies.cmake) + +# The Arrow CMake is currently broken if the build type is not set +set(CMAKE_BUILD_TYPE Release) +# No need to install Arrow libs when only the final example executable is shipped. +set(CUDF_EXCLUDE_ARROW_FROM_ALL ON) +include(../../cmake/thirdparty/get_arrow.cmake) + +add_executable(interop interop.cpp) +target_link_libraries(interop PRIVATE cudf::cudf) +target_compile_features(interop PRIVATE cxx_std_17) +target_link_libraries(interop PRIVATE ${ARROW_LIBRARIES}) diff --git a/cpp/examples/interop/interop.cpp b/cpp/examples/interop/interop.cpp new file mode 100644 index 00000000000..133a4e3a514 --- /dev/null +++ b/cpp/examples/interop/interop.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include +#include + +// Helper functuons to create StringViews +inline arrow::StringViewType::c_type to_inline_string_view(const void* data, int32_t const& size) +{ + arrow::StringViewType::c_type out; + out.inlined = {size, {}}; + memcpy(&out.inlined.data, data, size); + return out; +} +inline arrow::StringViewType::c_type to_inline_string_view(std::string_view const& v) +{ + return to_inline_string_view(v.data(), static_cast(v.size())); +} +inline arrow::StringViewType::c_type to_string_view(const void* data, + int32_t const& size, + int32_t const& buffer_index, + int32_t const& offset) +{ + if (size <= arrow::StringViewType::kInlineSize) { return to_inline_string_view(data, size); } + arrow::StringViewType::c_type out; + out.ref = {size, {}, buffer_index, offset}; + memcpy(&out.ref.prefix, data, sizeof(out.ref.prefix)); + return out; +} +inline arrow::StringViewType::c_type to_string_view(std::string_view const& v, + int32_t const& buffer_index, + int32_t const& offset) +{ + return to_string_view(v.data(), static_cast(v.size()), buffer_index, offset); +} + +/** + * @brief Create a StringViewArray + * + * @param data_buffers The data buffers + * @param views The string views + * @param validate Whether to validate the array + */ +arrow::Result> make_string_view_array( + arrow::BufferVector const& data_buffers, + std::vector const& views, + bool validate = true) +{ + auto const length = static_cast(views.size()); + auto const arr = std::make_shared( + arrow::utf8_view(), length, arrow::Buffer::FromVector(views), std::move(data_buffers)); + if (validate) { RETURN_NOT_OK(arr->ValidateFull()); } + return arr; +} + +/** + * @brief Convert a vector of strings into a vector of the + * constituent chars and a vector of offsets. + * + * @param strings The vector of strings + */ +auto make_chars_and_offsets(std::vector const& strings) +{ + std::vector chars{}; + std::vector offsets(1, 0); + for (auto& str : strings) { + chars.insert(chars.end(), std::cbegin(str), std::cend(str)); + auto const last_offset = static_cast(offsets.back()); + auto const next_offset = last_offset + str.length(); + CUDF_EXPECTS( + next_offset < static_cast(std::numeric_limits::max()), + "Cannot use arrow_string_view_to_cudf_column to build a large strings column"); + offsets.push_back(static_cast(next_offset)); + } + return std::make_tuple(std::move(chars), std::move(offsets)); +}; + +/** + * @brief Convert an Arrow StringViewArray to a cudf::column + * + * @param array The Arrow StringViewArray + * @param stream The CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + */ +std::unique_ptr arrow_string_view_to_cudf_column( + std::shared_ptr const& array, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) +{ + // Convert the string views into chars and offsets + std::vector strings; + for (auto i = 0; i < array->length(); i++) { + strings.push_back(array->GetString(i)); + } + auto const [chars, offsets] = make_chars_and_offsets(strings); + + // Copy the chars vector to the device + rmm::device_uvector d_chars(chars.size(), stream, mr); + CUDF_CUDA_TRY(cudaMemcpyAsync( + d_chars.data(), chars.data(), chars.size() * sizeof(char), cudaMemcpyDefault, stream.value())); + + // Copy the offsets vector to the device + // and wrap it in a cudf::column + rmm::device_uvector d_offsets(offsets.size(), stream, mr); + CUDF_CUDA_TRY(cudaMemcpyAsync(d_offsets.data(), + offsets.data(), + offsets.size() * sizeof(cudf::size_type), + cudaMemcpyDefault, + stream.value())); + auto offsets_col = + std::make_unique(std::move(d_offsets), rmm::device_buffer{0, stream, mr}, 0); + + // Create a string column out of the chars and offsets + return cudf::make_strings_column(array->length(), + std::move(offsets_col), + d_chars.release(), + 0, + rmm::device_buffer{0, stream, mr}); +} + +int main(int argc, char** argv) +{ + std::vector> data_buffers; + std::vector views; + + // Define the data buffers and string views + auto const buffer_a = + arrow::Buffer::FromString("hello rapids teamapache arrow interopnvidiacudf"); + data_buffers.push_back(buffer_a); + views.push_back(to_string_view("hello rapid steam", 0, 0)); + views.push_back(to_string_view("apache arrow interop", 0, 17)); + views.push_back(to_inline_string_view("nvidia")); + views.push_back(to_inline_string_view("cudf")); + + // Create a StringViewArray + auto const string_view_col = make_string_view_array(data_buffers, views, true).ValueOrDie(); + std::cout << string_view_col->ToString() << std::endl; + + // Convert the StringViewArray to a cudf::column + auto const cudf_col = arrow_string_view_to_cudf_column(string_view_col); + + // Write the cudf::column as CSV + auto const tbl_view = cudf::table_view({cudf_col->view()}); + std::vector const names = {"col_a"}; + + std::vector h_buffer; + cudf::io::csv_writer_options writer_options = + cudf::io::csv_writer_options::builder(cudf::io::sink_info(&h_buffer), tbl_view) + .include_header(not names.empty()) + .names(names); + + cudf::io::write_csv(writer_options); + auto const result = std::string(h_buffer.data(), h_buffer.size()); + std::cout << result << std::endl; + + return 0; +} diff --git a/cpp/examples/nested_types/deduplication.cpp b/cpp/examples/nested_types/deduplication.cpp index c7c54592b70..f067b358f2d 100644 --- a/cpp/examples/nested_types/deduplication.cpp +++ b/cpp/examples/nested_types/deduplication.cpp @@ -192,7 +192,7 @@ int main(int argc, char const** argv) auto pool = mr_name == "pool"; auto resource = create_memory_resource(pool); - rmm::mr::set_current_device_resource(resource.get()); + cudf::set_current_device_resource(resource.get()); std::cout << "Reading " << input_filepath << "..." << std::endl; // read input file diff --git a/cpp/examples/parquet_io/parquet_io.cpp b/cpp/examples/parquet_io/parquet_io.cpp index 274a2599189..9cda22d0695 100644 --- a/cpp/examples/parquet_io/parquet_io.cpp +++ b/cpp/examples/parquet_io/parquet_io.cpp @@ -18,6 +18,8 @@ #include "../utilities/timer.hpp" +#include + /** * @file parquet_io.cpp * @brief Demonstrates usage of the libcudf APIs to read and write @@ -123,7 +125,7 @@ int main(int argc, char const** argv) // Create and use a memory pool bool is_pool_used = true; auto resource = create_memory_resource(is_pool_used); - rmm::mr::set_current_device_resource(resource.get()); + cudf::set_current_device_resource(resource.get()); // Read input parquet file // We do not want to time the initial read time as it may include @@ -159,8 +161,11 @@ int main(int argc, char const** argv) // Left anti-join the original and transcoded tables // identical tables should not throw an exception and // return an empty indices vector - auto const indices = cudf::left_anti_join( - input->view(), transcoded_input->view(), cudf::null_equality::EQUAL, resource.get()); + auto const indices = cudf::left_anti_join(input->view(), + transcoded_input->view(), + cudf::null_equality::EQUAL, + cudf::get_default_stream(), + resource.get()); // No exception thrown, check indices auto const valid = indices->size() == 0; diff --git a/cpp/examples/strings/common.hpp b/cpp/examples/strings/common.hpp index 65a9c100c7c..1855374803a 100644 --- a/cpp/examples/strings/common.hpp +++ b/cpp/examples/strings/common.hpp @@ -93,7 +93,7 @@ int main(int argc, char const** argv) auto const mr_name = std::string{argc > 2 ? std::string(argv[2]) : std::string("cuda")}; auto resource = create_memory_resource(mr_name); - rmm::mr::set_current_device_resource(resource.get()); + cudf::set_current_device_resource(resource.get()); auto const csv_file = std::string{argv[1]}; auto const csv_result = [csv_file] { diff --git a/cpp/examples/tpch/CMakeLists.txt b/cpp/examples/tpch/CMakeLists.txt deleted file mode 100644 index 1b91d07e148..00000000000 --- a/cpp/examples/tpch/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. - -cmake_minimum_required(VERSION 3.26.4) - -include(../set_cuda_architecture.cmake) - -rapids_cuda_init_architectures(tpch_example) -rapids_cuda_set_architectures(RAPIDS) - -project( - tpch_example - VERSION 0.0.1 - LANGUAGES CXX CUDA -) - -include(../fetch_dependencies.cmake) - -add_executable(tpch_q1 q1.cpp) -target_link_libraries(tpch_q1 PRIVATE cudf::cudf) -target_compile_features(tpch_q1 PRIVATE cxx_std_17) - -add_executable(tpch_q5 q5.cpp) -target_link_libraries(tpch_q5 PRIVATE cudf::cudf) -target_compile_features(tpch_q5 PRIVATE cxx_std_17) - -add_executable(tpch_q6 q6.cpp) -target_link_libraries(tpch_q6 PRIVATE cudf::cudf) -target_compile_features(tpch_q6 PRIVATE cxx_std_17) - -add_executable(tpch_q9 q9.cpp) -target_link_libraries(tpch_q9 PRIVATE cudf::cudf) -target_compile_features(tpch_q9 PRIVATE cxx_std_17) diff --git a/cpp/examples/tpch/README.md b/cpp/examples/tpch/README.md deleted file mode 100644 index 1ea71ae9824..00000000000 --- a/cpp/examples/tpch/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# TPC-H Inspired Examples - -Implements TPC-H queries using `libcudf`. We leverage the data generator (wrapper around official TPC-H datagen) from [Apache Datafusion](https://github.com/apache/datafusion) for generating data in Parquet format. - -## Requirements - -- Rust - -## Generating the Dataset - -1. Clone the datafusion repository. -```bash -git clone git@github.com:apache/datafusion.git -``` - -2. Run the data generator. The data will be placed in a `data/` subdirectory. -```bash -cd datafusion/benchmarks/ -./bench.sh data tpch - -# for scale factor 10, -./bench.sh data tpch10 -``` - -## Running Queries - -1. Build the examples. -```bash -cd cpp/examples -./build.sh -``` -The TPC-H query binaries would be built inside `examples/tpch/build`. - -2. Execute the queries. -```bash -./tpch/build/tpch_q1 -``` -A parquet file named `q1.parquet` would be generated holding the results of the query. diff --git a/cpp/examples/tpch/utils.hpp b/cpp/examples/tpch/utils.hpp deleted file mode 100644 index e586da2c802..00000000000 --- a/cpp/examples/tpch/utils.hpp +++ /dev/null @@ -1,457 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -// RMM memory resource creation utilities -inline auto make_cuda() { return std::make_shared(); } -inline auto make_pool() -{ - return rmm::mr::make_owning_wrapper( - make_cuda(), rmm::percent_of_free_device_memory(50)); -} -inline auto make_managed() { return std::make_shared(); } -inline auto make_managed_pool() -{ - return rmm::mr::make_owning_wrapper( - make_managed(), rmm::percent_of_free_device_memory(50)); -} -inline std::shared_ptr create_memory_resource( - std::string const& mode) -{ - if (mode == "cuda") return make_cuda(); - if (mode == "pool") return make_pool(); - if (mode == "managed") return make_managed(); - if (mode == "managed_pool") return make_managed_pool(); - CUDF_FAIL("Unknown rmm_mode parameter: " + mode + - "\nExpecting: cuda, pool, managed, or managed_pool"); -} - -/** - * @brief A class to represent a table with column names attached - */ -class table_with_names { - public: - table_with_names(std::unique_ptr tbl, std::vector col_names) - : tbl(std::move(tbl)), col_names(col_names) - { - } - /** - * @brief Return the table view - */ - [[nodiscard]] cudf::table_view table() const { return tbl->view(); } - /** - * @brief Return the column view for a given column name - * - * @param col_name The name of the column - */ - [[nodiscard]] cudf::column_view column(std::string const& col_name) const - { - return tbl->view().column(col_id(col_name)); - } - /** - * @param Return the column names of the table - */ - [[nodiscard]] std::vector column_names() const { return col_names; } - /** - * @brief Translate a column name to a column index - * - * @param col_name The name of the column - */ - [[nodiscard]] cudf::size_type col_id(std::string const& col_name) const - { - CUDF_FUNC_RANGE(); - auto it = std::find(col_names.begin(), col_names.end(), col_name); - if (it == col_names.end()) { throw std::runtime_error("Column not found"); } - return std::distance(col_names.begin(), it); - } - /** - * @brief Append a column to the table - * - * @param col The column to append - * @param col_name The name of the appended column - */ - table_with_names& append(std::unique_ptr& col, std::string const& col_name) - { - CUDF_FUNC_RANGE(); - auto cols = tbl->release(); - cols.push_back(std::move(col)); - tbl = std::make_unique(std::move(cols)); - col_names.push_back(col_name); - return (*this); - } - /** - * @brief Select a subset of columns from the table - * - * @param col_names The names of the columns to select - */ - [[nodiscard]] cudf::table_view select(std::vector const& col_names) const - { - CUDF_FUNC_RANGE(); - std::vector col_indices; - for (auto const& col_name : col_names) { - col_indices.push_back(col_id(col_name)); - } - return tbl->select(col_indices); - } - /** - * @brief Write the table to a parquet file - * - * @param filepath The path to the parquet file - */ - void to_parquet(std::string const& filepath) const - { - CUDF_FUNC_RANGE(); - auto const sink_info = cudf::io::sink_info(filepath); - cudf::io::table_metadata metadata; - metadata.schema_info = - std::vector(col_names.begin(), col_names.end()); - auto const table_input_metadata = cudf::io::table_input_metadata{metadata}; - auto builder = cudf::io::parquet_writer_options::builder(sink_info, tbl->view()); - builder.metadata(table_input_metadata); - auto const options = builder.build(); - cudf::io::write_parquet(options); - } - - private: - std::unique_ptr tbl; - std::vector col_names; -}; - -/** - * @brief Concatenate two vectors - * - * @param lhs The left vector - * @param rhs The right vector - */ -template -std::vector concat(std::vector const& lhs, std::vector const& rhs) -{ - std::vector result; - result.reserve(lhs.size() + rhs.size()); - std::copy(lhs.begin(), lhs.end(), std::back_inserter(result)); - std::copy(rhs.begin(), rhs.end(), std::back_inserter(result)); - return result; -} - -/** - * @brief Inner join two tables and gather the result - * - * @param left_input The left input table - * @param right_input The right input table - * @param left_on The columns to join on in the left table - * @param right_on The columns to join on in the right table - * @param compare_nulls The null equality policy - */ -[[nodiscard]] std::unique_ptr join_and_gather( - cudf::table_view const& left_input, - cudf::table_view const& right_input, - std::vector const& left_on, - std::vector const& right_on, - cudf::null_equality compare_nulls) -{ - CUDF_FUNC_RANGE(); - constexpr auto oob_policy = cudf::out_of_bounds_policy::DONT_CHECK; - auto const left_selected = left_input.select(left_on); - auto const right_selected = right_input.select(right_on); - auto const [left_join_indices, right_join_indices] = cudf::inner_join( - left_selected, right_selected, compare_nulls, rmm::mr::get_current_device_resource()); - - auto const left_indices_span = cudf::device_span{*left_join_indices}; - auto const right_indices_span = cudf::device_span{*right_join_indices}; - - auto const left_indices_col = cudf::column_view{left_indices_span}; - auto const right_indices_col = cudf::column_view{right_indices_span}; - - auto const left_result = cudf::gather(left_input, left_indices_col, oob_policy); - auto const right_result = cudf::gather(right_input, right_indices_col, oob_policy); - - auto joined_cols = left_result->release(); - auto right_cols = right_result->release(); - joined_cols.insert(joined_cols.end(), - std::make_move_iterator(right_cols.begin()), - std::make_move_iterator(right_cols.end())); - return std::make_unique(std::move(joined_cols)); -} - -/** - * @brief Apply an inner join operation to two tables - * - * @param left_input The left input table - * @param right_input The right input table - * @param left_on The columns to join on in the left table - * @param right_on The columns to join on in the right table - * @param compare_nulls The null equality policy - */ -[[nodiscard]] std::unique_ptr apply_inner_join( - std::unique_ptr const& left_input, - std::unique_ptr const& right_input, - std::vector const& left_on, - std::vector const& right_on, - cudf::null_equality compare_nulls = cudf::null_equality::EQUAL) -{ - CUDF_FUNC_RANGE(); - std::vector left_on_indices; - std::vector right_on_indices; - std::transform( - left_on.begin(), left_on.end(), std::back_inserter(left_on_indices), [&](auto const& col_name) { - return left_input->col_id(col_name); - }); - std::transform(right_on.begin(), - right_on.end(), - std::back_inserter(right_on_indices), - [&](auto const& col_name) { return right_input->col_id(col_name); }); - auto table = join_and_gather( - left_input->table(), right_input->table(), left_on_indices, right_on_indices, compare_nulls); - return std::make_unique( - std::move(table), concat(left_input->column_names(), right_input->column_names())); -} - -/** - * @brief Apply a filter predicated to a table - * - * @param table The input table - * @param predicate The filter predicate - */ -[[nodiscard]] std::unique_ptr apply_filter( - std::unique_ptr const& table, cudf::ast::operation const& predicate) -{ - CUDF_FUNC_RANGE(); - auto const boolean_mask = cudf::compute_column(table->table(), predicate); - auto result_table = cudf::apply_boolean_mask(table->table(), boolean_mask->view()); - return std::make_unique(std::move(result_table), table->column_names()); -} - -/** - * @brief Apply a boolean mask to a table - * - * @param table The input table - * @param mask The boolean mask - */ -[[nodiscard]] std::unique_ptr apply_mask( - std::unique_ptr const& table, std::unique_ptr const& mask) -{ - CUDF_FUNC_RANGE(); - auto result_table = cudf::apply_boolean_mask(table->table(), mask->view()); - return std::make_unique(std::move(result_table), table->column_names()); -} - -struct groupby_context_t { - std::vector keys; - std::unordered_map>> - values; -}; - -/** - * @brief Apply a groupby operation to a table - * - * @param table The input table - * @param ctx The groupby context - */ -[[nodiscard]] std::unique_ptr apply_groupby( - std::unique_ptr const& table, groupby_context_t const& ctx) -{ - CUDF_FUNC_RANGE(); - auto const keys = table->select(ctx.keys); - cudf::groupby::groupby groupby_obj(keys); - std::vector result_column_names; - result_column_names.insert(result_column_names.end(), ctx.keys.begin(), ctx.keys.end()); - std::vector requests; - for (auto& [value_col, aggregations] : ctx.values) { - requests.emplace_back(cudf::groupby::aggregation_request()); - for (auto& agg : aggregations) { - if (agg.first == cudf::aggregation::Kind::SUM) { - requests.back().aggregations.push_back( - cudf::make_sum_aggregation()); - } else if (agg.first == cudf::aggregation::Kind::MEAN) { - requests.back().aggregations.push_back( - cudf::make_mean_aggregation()); - } else if (agg.first == cudf::aggregation::Kind::COUNT_ALL) { - requests.back().aggregations.push_back( - cudf::make_count_aggregation()); - } else { - throw std::runtime_error("Unsupported aggregation"); - } - result_column_names.push_back(agg.second); - } - requests.back().values = table->column(value_col); - } - auto agg_results = groupby_obj.aggregate(requests); - std::vector> result_columns; - for (size_t i = 0; i < agg_results.first->num_columns(); i++) { - auto col = std::make_unique(agg_results.first->get_column(i)); - result_columns.push_back(std::move(col)); - } - for (size_t i = 0; i < agg_results.second.size(); i++) { - for (size_t j = 0; j < agg_results.second[i].results.size(); j++) { - result_columns.push_back(std::move(agg_results.second[i].results[j])); - } - } - auto result_table = std::make_unique(std::move(result_columns)); - return std::make_unique(std::move(result_table), result_column_names); -} - -/** - * @brief Apply an order by operation to a table - * - * @param table The input table - * @param sort_keys The sort keys - * @param sort_key_orders The sort key orders - */ -[[nodiscard]] std::unique_ptr apply_orderby( - std::unique_ptr const& table, - std::vector const& sort_keys, - std::vector const& sort_key_orders) -{ - CUDF_FUNC_RANGE(); - std::vector column_views; - for (auto& key : sort_keys) { - column_views.push_back(table->column(key)); - } - auto result_table = - cudf::sort_by_key(table->table(), cudf::table_view{column_views}, sort_key_orders); - return std::make_unique(std::move(result_table), table->column_names()); -} - -/** - * @brief Apply a reduction operation to a column - * - * @param column The input column - * @param agg_kind The aggregation kind - * @param col_name The name of the output column - */ -[[nodiscard]] std::unique_ptr apply_reduction( - cudf::column_view const& column, - cudf::aggregation::Kind const& agg_kind, - std::string const& col_name) -{ - CUDF_FUNC_RANGE(); - auto const agg = cudf::make_sum_aggregation(); - auto const result = cudf::reduce(column, *agg, column.type()); - cudf::size_type const len = 1; - auto col = cudf::make_column_from_scalar(*result, len); - std::vector> columns; - columns.push_back(std::move(col)); - auto result_table = std::make_unique(std::move(columns)); - std::vector col_names = {col_name}; - return std::make_unique(std::move(result_table), col_names); -} - -/** - * @brief Read a parquet file into a table - * - * @param filename The path to the parquet file - * @param columns The columns to read - * @param predicate The filter predicate to pushdown - */ -[[nodiscard]] std::unique_ptr read_parquet( - std::string const& filename, - std::vector const& columns = {}, - std::unique_ptr const& predicate = nullptr) -{ - CUDF_FUNC_RANGE(); - auto const source = cudf::io::source_info(filename); - auto builder = cudf::io::parquet_reader_options_builder(source); - if (!columns.empty()) { builder.columns(columns); } - if (predicate) { builder.filter(*predicate); } - auto const options = builder.build(); - auto table_with_metadata = cudf::io::read_parquet(options); - std::vector column_names; - for (auto const& col_info : table_with_metadata.metadata.schema_info) { - column_names.push_back(col_info.name); - } - return std::make_unique(std::move(table_with_metadata.tbl), column_names); -} - -/** - * @brief Generate the `std::tm` structure from year, month, and day - * - * @param year The year - * @param month The month - * @param day The day - */ -std::tm make_tm(int year, int month, int day) -{ - std::tm tm{}; - tm.tm_year = year - 1900; - tm.tm_mon = month - 1; - tm.tm_mday = day; - return tm; -} - -/** - * @brief Calculate the number of days since the UNIX epoch - * - * @param year The year - * @param month The month - * @param day The day - */ -int32_t days_since_epoch(int year, int month, int day) -{ - std::tm tm = make_tm(year, month, day); - std::tm epoch = make_tm(1970, 1, 1); - std::time_t time = std::mktime(&tm); - std::time_t epoch_time = std::mktime(&epoch); - double diff = std::difftime(time, epoch_time) / (60 * 60 * 24); - return static_cast(diff); -} - -struct tpch_example_args { - std::string dataset_dir; - std::string memory_resource_type; -}; - -/** - * @brief Parse command line arguments into a struct - * - * @param argc The number of command line arguments - * @param argv The command line arguments - */ -tpch_example_args parse_args(int argc, char const** argv) -{ - if (argc < 3) { - std::string usage_message = "Usage: " + std::string(argv[0]) + - " \n The query result will be " - "saved to a parquet file named q{query_no}.parquet in the current " - "working directory "; - throw std::runtime_error(usage_message); - } - tpch_example_args args; - args.dataset_dir = argv[1]; - args.memory_resource_type = argv[2]; - return args; -} diff --git a/cpp/examples/versions.cmake b/cpp/examples/versions.cmake index 144b3d3721b..44493011673 100644 --- a/cpp/examples/versions.cmake +++ b/cpp/examples/versions.cmake @@ -12,4 +12,4 @@ # the License. # ============================================================================= -set(CUDF_TAG branch-24.08) +set(CUDF_TAG branch-24.10) diff --git a/cpp/include/cudf/ast/detail/expression_evaluator.cuh b/cpp/include/cudf/ast/detail/expression_evaluator.cuh index 105d87ff96f..9d8762555d7 100644 --- a/cpp/include/cudf/ast/detail/expression_evaluator.cuh +++ b/cpp/include/cudf/ast/detail/expression_evaluator.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,8 +29,6 @@ #include -#include - namespace cudf { namespace ast { @@ -278,7 +276,7 @@ struct expression_evaluator { detail::device_data_reference const& input_reference, IntermediateDataType* thread_intermediate_storage, cudf::size_type left_row_index, - thrust::optional right_row_index = {}) const + cudf::size_type right_row_index = {}) const { // TODO: Everywhere in the code assumes that the table reference is either // left or right. Should we error-check somewhere to prevent @@ -291,7 +289,7 @@ struct expression_evaluator { // any case where input_reference.table_source == table_reference::RIGHT. // Otherwise, behavior is undefined. auto const row_index = - (input_reference.table_source == table_reference::LEFT) ? left_row_index : *right_row_index; + (input_reference.table_source == table_reference::LEFT) ? left_row_index : right_row_index; if constexpr (has_nulls) { return table.column(input_reference.data_index).is_valid(row_index) ? ReturnType(table.column(input_reference.data_index).element(row_index)) @@ -329,7 +327,7 @@ struct expression_evaluator { detail::device_data_reference const& device_data_reference, IntermediateDataType* thread_intermediate_storage, cudf::size_type left_row_index, - thrust::optional right_row_index = {}) const + cudf::size_type right_row_index = {}) const { CUDF_UNREACHABLE("Unsupported type in resolve_input."); } diff --git a/cpp/include/cudf/ast/detail/expression_parser.hpp b/cpp/include/cudf/ast/detail/expression_parser.hpp index da552d95421..a254171ef11 100644 --- a/cpp/include/cudf/ast/detail/expression_parser.hpp +++ b/cpp/include/cudf/ast/detail/expression_parser.hpp @@ -20,8 +20,7 @@ #include #include #include - -#include +#include #include diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp index 51199bb5792..63908f6c870 100644 --- a/cpp/include/cudf/binaryop.hpp +++ b/cpp/include/cudf/binaryop.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -171,7 +169,7 @@ std::unique_ptr binary_operation( binary_operator op, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a binary operation between a column and a scalar. @@ -202,7 +200,7 @@ std::unique_ptr binary_operation( binary_operator op, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a binary operation between two columns. @@ -232,7 +230,7 @@ std::unique_ptr binary_operation( binary_operator op, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a binary operation between two columns using a @@ -263,7 +261,7 @@ std::unique_ptr binary_operation( std::string const& ptx, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the `scale` for a `fixed_point` number based on given binary operator `op` @@ -315,7 +313,7 @@ std::pair scalar_col_valid_mask_and( column_view const& col, scalar const& s, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); } // namespace binops diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index 5d1d74c3f28..de19a076cc4 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -19,12 +19,11 @@ #include #include #include +#include #include #include #include -#include -#include #include #include @@ -65,7 +64,7 @@ class column { */ column(column const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Move the contents from `other` to create a new column. @@ -143,7 +142,7 @@ class column { */ explicit column(column_view view, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the column's logical element type diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 89fe59bfeaa..c3238cb94fd 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -32,9 +32,9 @@ #include +#include #include #include -#include #include #include @@ -614,7 +614,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { /** * @brief Return an optional iterator to the first element of the column. * - * Dereferencing the returned iterator returns a `thrust::optional`. + * Dereferencing the returned iterator returns a `cuda::std::optional`. * * The element of this iterator contextually converts to bool. The conversion returns true * if the object contains a value and false if it does not contain a value. @@ -739,7 +739,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base { /** * @brief Return an optional iterator to the element following the last element of the column. * - * The returned iterator represents a `thrust::optional` element. + * The returned iterator represents a `cuda::std::optional` element. * * This function does not participate in overload resolution if * `column_device_view::has_element_accessor()` is false. @@ -1272,21 +1272,21 @@ struct value_accessor { * @brief optional accessor of a column * * - * The optional_accessor always returns a `thrust::optional` of `column[i]`. The validity + * The optional_accessor always returns a `cuda::std::optional` of `column[i]`. The validity * of the optional is determined by the `Nullate` parameter which may be one of the following: * * - `nullate::YES` means that the column supports nulls and the optional returned * might be valid or invalid. * * - `nullate::NO` means the caller attests that the column has no null values, - * no checks will occur and `thrust::optional{column[i]}` will be + * no checks will occur and `cuda::std::optional{column[i]}` will be * return for each `i`. * * - `nullate::DYNAMIC` defers the assumption of nullability to runtime and the caller * specifies if the column has nulls at runtime. - * For `DYNAMIC{true}` the return value will be `thrust::optional{column[i]}` if - * element `i` is not null and `thrust::optional{}` if element `i` is null. - * For `DYNAMIC{false}` the return value will always be `thrust::optional{column[i]}`. + * For `DYNAMIC{true}` the return value will be `cuda::std::optional{column[i]}` if + * element `i` is not null and `cuda::std::optional{}` if element `i` is null. + * For `DYNAMIC{false}` the return value will always be `cuda::std::optional{column[i]}`. * * @throws cudf::logic_error if column datatype and template T type mismatch. * @throws cudf::logic_error if the column is not nullable and `with_nulls` evaluates to true @@ -1312,19 +1312,19 @@ struct optional_accessor { } /** - * @brief Returns a `thrust::optional` of `column[i]`. + * @brief Returns a `cuda::std::optional` of `column[i]`. * * @param i The index of the element to return - * @return A `thrust::optional` that contains the value of `column[i]` is not null. If that + * @return A `cuda::std::optional` that contains the value of `column[i]` is not null. If that * element is null, the resulting optional will not contain a value. */ - __device__ inline thrust::optional operator()(cudf::size_type i) const + __device__ inline cuda::std::optional operator()(cudf::size_type i) const { if (has_nulls) { - return (col.is_valid_nocheck(i)) ? thrust::optional{col.element(i)} - : thrust::optional{thrust::nullopt}; + return (col.is_valid_nocheck(i)) ? cuda::std::optional{col.element(i)} + : cuda::std::optional{cuda::std::nullopt}; } - return thrust::optional{col.element(i)}; + return cuda::std::optional{col.element(i)}; } Nullate has_nulls{}; ///< Indicates if the `col` should be checked for nulls. diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp index c1f295b7ea8..c3b68b52c36 100644 --- a/cpp/include/cudf/column/column_factories.hpp +++ b/cpp/include/cudf/column/column_factories.hpp @@ -18,12 +18,11 @@ #include #include #include +#include #include #include #include -#include -#include #include @@ -78,7 +77,7 @@ std::unique_ptr make_numeric_column( size_type size, mask_state state = mask_state::UNALLOCATED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -104,7 +103,7 @@ std::unique_ptr make_numeric_column( B&& null_mask, size_type null_count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type."); return std::make_unique(type, @@ -136,7 +135,7 @@ std::unique_ptr make_fixed_point_column( size_type size, mask_state state = mask_state::UNALLOCATED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -161,7 +160,7 @@ std::unique_ptr make_fixed_point_column( B&& null_mask, size_type null_count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type."); return std::make_unique(type, @@ -194,7 +193,7 @@ std::unique_ptr make_timestamp_column( size_type size, mask_state state = mask_state::UNALLOCATED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -220,7 +219,7 @@ std::unique_ptr make_timestamp_column( B&& null_mask, size_type null_count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type."); return std::make_unique(type, @@ -253,7 +252,7 @@ std::unique_ptr make_duration_column( size_type size, mask_state state = mask_state::UNALLOCATED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -279,7 +278,7 @@ std::unique_ptr make_duration_column( B&& null_mask, size_type null_count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type."); return std::make_unique(type, @@ -312,7 +311,7 @@ std::unique_ptr make_fixed_width_column( size_type size, mask_state state = mask_state::UNALLOCATED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct column with sufficient uninitialized storage to hold `size` elements of the @@ -338,7 +337,7 @@ std::unique_ptr make_fixed_width_column( B&& null_mask, size_type null_count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type."); if (is_timestamp(type)) { @@ -377,7 +376,7 @@ std::unique_ptr make_fixed_width_column( std::unique_ptr make_strings_column( cudf::device_span const> strings, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a STRING type column given a device span of string_view. @@ -409,7 +408,7 @@ std::unique_ptr make_strings_column( cudf::device_span string_views, string_view const null_placeholder, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a STRING type column given offsets column, chars columns, and null mask and null @@ -469,7 +468,7 @@ std::unique_ptr make_strings_column(size_type num_strings, * offsets (depth 1) {0, 2, 5, 7} * data (depth 1) * offsets (depth 2) - * data (depth 1) {1, 2, 3, 4, 5, 6, 7} + * data (depth 2) {1, 2, 3, 4, 5, 6, 7} * @endcode * * @param[in] num_rows The number of lists the column represents. @@ -497,7 +496,7 @@ std::unique_ptr make_lists_column( size_type null_count, rmm::device_buffer&& null_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a STRUCT column using specified child columns as members. @@ -528,7 +527,7 @@ std::unique_ptr make_structs_column( size_type null_count, rmm::device_buffer&& null_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a column with size elements that are all equal to the given scalar. @@ -548,7 +547,7 @@ std::unique_ptr make_column_from_scalar( scalar const& s, size_type size, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a dictionary column with size elements that are all equal to the given scalar. @@ -568,7 +567,7 @@ std::unique_ptr make_dictionary_from_scalar( scalar const& s, size_type size, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/concatenate.hpp b/cpp/include/cudf/concatenate.hpp index 0935bdf7def..155740dc29e 100644 --- a/cpp/include/cudf/concatenate.hpp +++ b/cpp/include/cudf/concatenate.hpp @@ -19,11 +19,9 @@ #include #include #include +#include #include -#include -#include - #include namespace CUDF_EXPORT cudf { @@ -49,7 +47,7 @@ namespace CUDF_EXPORT cudf { rmm::device_buffer concatenate_masks( host_span views, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Concatenates multiple columns into a single column @@ -66,7 +64,7 @@ rmm::device_buffer concatenate_masks( std::unique_ptr concatenate( host_span columns_to_concat, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Columns of `tables_to_concat` are concatenated vertically to return a @@ -95,7 +93,7 @@ std::unique_ptr concatenate( std::unique_ptr concatenate( host_span tables_to_concat, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/contiguous_split.hpp b/cpp/include/cudf/contiguous_split.hpp index 195dac25268..41eef9559b8 100644 --- a/cpp/include/cudf/contiguous_split.hpp +++ b/cpp/include/cudf/contiguous_split.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include #include #include @@ -122,7 +121,7 @@ struct packed_table { std::vector contiguous_split( cudf::table_view const& input, std::vector const& splits, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); namespace detail { @@ -154,7 +153,7 @@ struct contiguous_split_state; * // Choose a memory resource (optional). This memory resource is used for scratch/thrust temporary * // data. In memory constrained cases, this can be used to set aside scratch memory * // for `chunked_pack` at the beginning of a program. - * auto mr = rmm::mr::get_current_device_resource(); + * auto mr = cudf::get_current_device_resource_ref(); * * // Define a buffer size for each chunk: the larger the buffer is, the more SMs can be * // occupied by this algorithm. @@ -205,7 +204,7 @@ class chunked_pack { explicit chunked_pack( cudf::table_view const& input, std::size_t user_buffer_size, - rmm::device_async_resource_ref temp_mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref temp_mr = cudf::get_current_device_resource_ref()); /** * @brief Destructor that will be implemented as default. Declared with definition here because @@ -270,7 +269,7 @@ class chunked_pack { [[nodiscard]] static std::unique_ptr create( cudf::table_view const& input, std::size_t user_buffer_size, - rmm::device_async_resource_ref temp_mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref temp_mr = cudf::get_current_device_resource_ref()); private: // internal state of contiguous split @@ -290,7 +289,7 @@ class chunked_pack { * and device memory respectively */ packed_columns pack(cudf::table_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Produce the metadata used for packing a table stored in a contiguous buffer. diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 3c44ff48fdf..388f19abea2 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -24,9 +24,7 @@ #include #include #include - -#include -#include +#include #include #include @@ -88,7 +86,7 @@ std::unique_ptr
gather( column_view const& gather_map, out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Reverses the rows within a table. @@ -108,7 +106,7 @@ std::unique_ptr
gather( std::unique_ptr
reverse( table_view const& source_table, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Reverses the elements of a column @@ -128,7 +126,7 @@ std::unique_ptr
reverse( std::unique_ptr reverse( column_view const& source_column, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Scatters the rows of the source table into a copy of the target table @@ -177,7 +175,7 @@ std::unique_ptr
scatter( column_view const& scatter_map, table_view const& target, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Scatters a row of scalar values into a copy of the target table @@ -220,7 +218,7 @@ std::unique_ptr
scatter( column_view const& indices, table_view const& target, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Indicates when to allocate a mask, based on an existing mask. @@ -268,7 +266,7 @@ std::unique_ptr allocate_like( column_view const& input, mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates an uninitialized new column of the specified size and same type as the `input`. @@ -291,7 +289,7 @@ std::unique_ptr allocate_like( size_type size, mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a table of empty columns with the same types as the `input_table` @@ -383,7 +381,7 @@ std::unique_ptr copy_range( size_type source_end, size_type target_begin, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a new column by shifting all values by an offset. @@ -427,7 +425,7 @@ std::unique_ptr shift( size_type offset, scalar const& fill_value, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Slices a `column_view` into a set of `column_view`s according to a set of indices. @@ -630,7 +628,7 @@ std::unique_ptr copy_if_else( column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new column, where each element is selected from either @p lhs or @@ -656,7 +654,7 @@ std::unique_ptr copy_if_else( column_view const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new column, where each element is selected from either @p lhs or @@ -682,7 +680,7 @@ std::unique_ptr copy_if_else( scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new column, where each element is selected from either @p lhs or @@ -706,7 +704,7 @@ std::unique_ptr copy_if_else( scalar const& rhs, column_view const& boolean_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Scatters rows from the input table to rows of the output corresponding @@ -750,7 +748,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Scatters scalar values to rows of the output corresponding @@ -789,7 +787,7 @@ std::unique_ptr
boolean_mask_scatter( table_view const& target, column_view const& boolean_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Get the element at specified index from a column @@ -809,7 +807,7 @@ std::unique_ptr get_element( column_view const& input, size_type index, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Indicates whether a row can be sampled more than once. @@ -853,7 +851,7 @@ std::unique_ptr
sample( sample_with_replacement replacement = sample_with_replacement::FALSE, int64_t const seed = 0, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Checks if a column or its descendants have non-empty null rows @@ -970,7 +968,7 @@ bool may_have_nonempty_nulls(column_view const& input); std::unique_ptr purge_nonempty_nulls( column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index f7bed8bdc7e..7359a0d5fde 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -17,10 +17,11 @@ #pragma once #include +#include #include +#include -#include -#include +#include #include @@ -42,6 +43,7 @@ namespace datetime { * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t years @@ -49,13 +51,15 @@ namespace datetime { */ std::unique_ptr extract_year( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts month from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t months @@ -63,13 +67,15 @@ std::unique_ptr extract_year( */ std::unique_ptr extract_month( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts day from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t days @@ -77,13 +83,15 @@ std::unique_ptr extract_month( */ std::unique_ptr extract_day( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts a weekday from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t days @@ -91,13 +99,15 @@ std::unique_ptr extract_day( */ std::unique_ptr extract_weekday( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts hour from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t hours @@ -105,13 +115,15 @@ std::unique_ptr extract_weekday( */ std::unique_ptr extract_hour( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts minute from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t minutes @@ -119,13 +131,15 @@ std::unique_ptr extract_hour( */ std::unique_ptr extract_minute( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts second from any datetime type and returns an int16_t * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t seconds @@ -133,7 +147,8 @@ std::unique_ptr extract_minute( */ std::unique_ptr extract_second( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts millisecond fraction from any datetime type and returns an int16_t @@ -143,6 +158,7 @@ std::unique_ptr extract_second( * For example, the millisecond fraction of 1.234567890 seconds is 234. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t milliseconds @@ -150,7 +166,8 @@ std::unique_ptr extract_second( */ std::unique_ptr extract_millisecond_fraction( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts microsecond fraction from any datetime type and returns an int16_t @@ -160,6 +177,7 @@ std::unique_ptr extract_millisecond_fraction( * For example, the microsecond fraction of 1.234567890 seconds is 567. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t microseconds @@ -167,7 +185,8 @@ std::unique_ptr extract_millisecond_fraction( */ std::unique_ptr extract_microsecond_fraction( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extracts nanosecond fraction from any datetime type and returns an int16_t @@ -177,6 +196,7 @@ std::unique_ptr extract_microsecond_fraction( * For example, the nanosecond fraction of 1.234567890 seconds is 890. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of the extracted int16_t nanoseconds @@ -184,7 +204,8 @@ std::unique_ptr extract_microsecond_fraction( */ std::unique_ptr extract_nanosecond_fraction( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group /** @@ -198,6 +219,7 @@ std::unique_ptr extract_nanosecond_fraction( * cudf::column. * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column containing last day of the month as TIMESTAMP_DAYS @@ -205,13 +227,15 @@ std::unique_ptr extract_nanosecond_fraction( */ std::unique_ptr last_day_of_month( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the day number since the start of the year from the datetime and * returns an int16_t cudf::column. The value is between [1, {365-366}] * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of datatype INT16 containing the day number since the start of the year @@ -219,7 +243,8 @@ std::unique_ptr last_day_of_month( */ std::unique_ptr day_of_year( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Adds or subtracts a number of months from the datetime type and returns a @@ -247,6 +272,7 @@ std::unique_ptr day_of_year( * * @param timestamps cudf::column_view of timestamp type * @param months cudf::column_view of integer type containing the number of months to add + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of timestamp type containing the computed timestamps @@ -254,7 +280,8 @@ std::unique_ptr day_of_year( std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, cudf::column_view const& months, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Adds or subtracts a number of months from the datetime type and returns a @@ -282,6 +309,7 @@ std::unique_ptr add_calendrical_months( * * @param timestamps cudf::column_view of timestamp type * @param months cudf::scalar of integer type containing the number of months to add + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @return cudf::column of timestamp type containing the computed timestamps @@ -289,7 +317,8 @@ std::unique_ptr add_calendrical_months( std::unique_ptr add_calendrical_months( cudf::column_view const& timestamps, cudf::scalar const& months, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Check if the year of the given date is a leap year @@ -299,6 +328,7 @@ std::unique_ptr add_calendrical_months( * `output[i] is null` if `column[i]` is null * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @returns cudf::column of datatype BOOL8 truth value of the corresponding date @@ -306,7 +336,8 @@ std::unique_ptr add_calendrical_months( */ std::unique_ptr is_leap_year( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Extract the number of days in the month @@ -317,12 +348,14 @@ std::unique_ptr is_leap_year( * @throw cudf::logic_error if input column datatype is not a TIMESTAMP * * @param column cudf::column_view of the input datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * @return cudf::column of datatype INT16 of days in month of the corresponding date */ std::unique_ptr days_in_month( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the quarter of the date @@ -333,12 +366,14 @@ std::unique_ptr days_in_month( * @throw cudf::logic_error if input column datatype is not a TIMESTAMP * * @param column The input column containing datetime values + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * @return A column of INT16 type indicating which quarter the date is in */ std::unique_ptr extract_quarter( cudf::column_view const& column, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Fixed frequencies supported by datetime rounding functions ceil, floor, round. @@ -359,6 +394,7 @@ enum class rounding_frequency : int32_t { * * @param column cudf::column_view of the input datetime values * @param freq rounding_frequency indicating the frequency to round up to + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @throw cudf::logic_error if input column datatype is not TIMESTAMP. @@ -367,13 +403,15 @@ enum class rounding_frequency : int32_t { std::unique_ptr ceil_datetimes( cudf::column_view const& column, rounding_frequency freq, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Round datetimes down to the nearest multiple of the given frequency. * * @param column cudf::column_view of the input datetime values * @param freq rounding_frequency indicating the frequency to round down to + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @throw cudf::logic_error if input column datatype is not TIMESTAMP. @@ -382,13 +420,15 @@ std::unique_ptr ceil_datetimes( std::unique_ptr floor_datetimes( cudf::column_view const& column, rounding_frequency freq, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Round datetimes to the nearest multiple of the given frequency. * * @param column cudf::column_view of the input datetime values * @param freq rounding_frequency indicating the frequency to round to + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate device memory of the returned column * * @throw cudf::logic_error if input column datatype is not TIMESTAMP. @@ -397,7 +437,8 @@ std::unique_ptr floor_datetimes( std::unique_ptr round_datetimes( cudf::column_view const& column, rounding_frequency freq, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index b257eef1e9e..4255faea702 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1497,8 +1497,7 @@ AGG_KIND_MAPPING(aggregation::VARIANCE, var_aggregation); * * @tparam F Type of callable * @param k The `aggregation::Kind` value to dispatch - * aram f The callable that accepts an `aggregation::Kind` non-type template - * argument. + * @param f The callable that accepts an `aggregation::Kind` callable function object. * @param args Parameter pack forwarded to the `operator()` invocation * @return Forwards the return value of the callable. */ @@ -1626,6 +1625,7 @@ struct dispatch_source { * parameter of the callable `F` * @param k The `aggregation::Kind` used to dispatch an `aggregation::Kind` * non-type template parameter for the second template parameter of the callable + * @param f The callable that accepts `data_type` and `aggregation::Kind` function object. * @param args Parameter pack forwarded to the `operator()` invocation * `F`. */ @@ -1644,8 +1644,8 @@ CUDF_HOST_DEVICE inline constexpr decltype(auto) dispatch_type_and_aggregation(d * @brief Returns the target `data_type` for the specified aggregation k * performed on elements of type source_type. * - * aram source_type The element type to be aggregated - * aram k The aggregation + * @param source_type The element type to be aggregated + * @param k The aggregation kind * @return data_type The target_type of k performed on source_type * elements */ diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp index fe739327a08..91f774839d9 100644 --- a/cpp/include/cudf/detail/binaryop.hpp +++ b/cpp/include/cudf/detail/binaryop.hpp @@ -18,9 +18,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { //! Inner interfaces and implementations diff --git a/cpp/include/cudf/detail/calendrical_month_sequence.cuh b/cpp/include/cudf/detail/calendrical_month_sequence.cuh index a9cf54e29b8..2097411357d 100644 --- a/cpp/include/cudf/detail/calendrical_month_sequence.cuh +++ b/cpp/include/cudf/detail/calendrical_month_sequence.cuh @@ -21,11 +21,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp index 1be269710b2..51166f6054b 100644 --- a/cpp/include/cudf/detail/concatenate.hpp +++ b/cpp/include/cudf/detail/concatenate.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/include/cudf/detail/concatenate_masks.hpp b/cpp/include/cudf/detail/concatenate_masks.hpp index fc829361fde..4f9e7f9cd13 100644 --- a/cpp/include/cudf/detail/concatenate_masks.hpp +++ b/cpp/include/cudf/detail/concatenate_masks.hpp @@ -18,12 +18,12 @@ #include #include #include +#include #include #include #include #include -#include namespace CUDF_EXPORT cudf { //! Inner interfaces and implementations diff --git a/cpp/include/cudf/detail/contiguous_split.hpp b/cpp/include/cudf/detail/contiguous_split.hpp index 52c51daa917..52ca091e1cd 100644 --- a/cpp/include/cudf/detail/contiguous_split.hpp +++ b/cpp/include/cudf/detail/contiguous_split.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace detail { diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp index 2be432c0825..60aa500f129 100644 --- a/cpp/include/cudf/detail/copy.hpp +++ b/cpp/include/cudf/detail/copy.hpp @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include -#include #include diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index b6310e6cd2f..dfb646c66c4 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include @@ -37,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index 8418e279ce7..a70cd5a0661 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -21,12 +21,12 @@ #include #include #include +#include #include -#include +#include #include -#include namespace cudf { namespace detail { @@ -70,7 +70,7 @@ __launch_bounds__(block_size) CUDF_KERNEL while (warp_cur <= warp_end) { auto const index = static_cast(tidx); auto const opt_value = - (index < end) ? (filter(index) ? lhs[index] : rhs[index]) : thrust::nullopt; + (index < end) ? (filter(index) ? lhs[index] : rhs[index]) : cuda::std::nullopt; if (opt_value) { out.element(index) = static_cast(*opt_value); } // update validity @@ -156,7 +156,7 @@ std::unique_ptr copy_if_else(bool nullable, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - // This is the type of the thrust::optional element in the passed iterators + // This is the type of the cuda::std::optional element in the passed iterators using Element = typename thrust::iterator_traits::value_type::value_type; size_type size = std::distance(lhs_begin, lhs_end); diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh index 1b3b2056c6c..3aa136d630b 100644 --- a/cpp/include/cudf/detail/copy_range.cuh +++ b/cpp/include/cudf/detail/copy_range.cuh @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/detail/cuco_helpers.hpp b/cpp/include/cudf/detail/cuco_helpers.hpp index dca5a39bece..926df921715 100644 --- a/cpp/include/cudf/detail/cuco_helpers.hpp +++ b/cpp/include/cudf/detail/cuco_helpers.hpp @@ -36,19 +36,10 @@ static double constexpr CUCO_DESIRED_LOAD_FACTOR = 0.5; * later expects a standard C++ `Allocator` interface. This allocator helper provides a simple way * to handle cuco memory allocation/deallocation with the given `stream` and the rmm default memory * resource. + * + * @tparam T The allocator's value type. */ -class cuco_allocator - : public rmm::mr::stream_allocator_adaptor> { - /// Default stream-ordered allocator type - using default_allocator = rmm::mr::polymorphic_allocator; - /// The base allocator adaptor type - using base_type = rmm::mr::stream_allocator_adaptor; - - public: - /** - * @brief Constructs the allocator adaptor with the given `stream` - */ - cuco_allocator(rmm::cuda_stream_view stream) : base_type{default_allocator{}, stream} {} -}; +template +using cuco_allocator = rmm::mr::stream_allocator_adaptor>; } // namespace cudf::detail diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 95469de8ae6..9db7e48498f 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -18,8 +18,7 @@ #include #include - -#include +#include #include @@ -27,111 +26,108 @@ namespace CUDF_EXPORT cudf { namespace datetime { namespace detail { /** - * @copydoc cudf::extract_year(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::extract_year(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_year(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_month(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::extract_month(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_month(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_day(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::extract_day(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_day(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::extract_weekday(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_weekday(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_hour(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::extract_hour(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_hour(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_minute(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::extract_minute(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_minute(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_second(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::extract_second(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_second(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, + * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&, rmm::cuda_stream_view, * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_millisecond_fraction(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, + * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&, rmm::cuda_stream_view, * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_microsecond_fraction(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, + * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&, rmm::cuda_stream_view, * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr extract_nanosecond_fraction(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr last_day_of_month(cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::day_of_year(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::day_of_year(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr day_of_year(cudf::column_view const& column, rmm::cuda_stream_view stream, @@ -139,9 +135,8 @@ std::unique_ptr day_of_year(cudf::column_view const& column, /** * @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::column_view const&, - * rmm::device_async_resource_ref) + * rmm::cuda_stream_view, rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps, cudf::column_view const& months, @@ -150,9 +145,8 @@ std::unique_ptr add_calendrical_months(cudf::column_view const& ti /** * @copydoc cudf::add_calendrical_months(cudf::column_view const&, cudf::scalar const&, - * rmm::device_async_resource_ref) + * rmm::cuda_stream_view, rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr add_calendrical_months(cudf::column_view const& timestamps, cudf::scalar const& months, @@ -160,9 +154,9 @@ std::unique_ptr add_calendrical_months(cudf::column_view const& ti rmm::device_async_resource_ref mr); /** - * @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::device_async_resource_ref) + * @copydoc cudf::is_leap_year(cudf::column_view const&, rmm::cuda_stream_view, + * rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr is_leap_year(cudf::column_view const& column, rmm::cuda_stream_view stream, diff --git a/cpp/include/cudf/detail/distinct_hash_join.cuh b/cpp/include/cudf/detail/distinct_hash_join.cuh index c3bc3ad89fa..2acc10105cf 100644 --- a/cpp/include/cudf/detail/distinct_hash_join.cuh +++ b/cpp/include/cudf/detail/distinct_hash_join.cuh @@ -18,10 +18,10 @@ #include #include #include +#include #include #include -#include #include @@ -99,7 +99,7 @@ struct distinct_hash_join { cuda::thread_scope_device, comparator_adapter, probing_scheme_type, - cudf::detail::cuco_allocator, + cudf::detail::cuco_allocator, cuco_storage_type>; bool _has_nulls; ///< true if nulls are present in either build table or probe table diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp index 82c6af8b611..04b3b63a9ed 100644 --- a/cpp/include/cudf/detail/fill.hpp +++ b/cpp/include/cudf/detail/fill.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh index 073c37ccb77..d91c3df719a 100644 --- a/cpp/include/cudf/detail/gather.cuh +++ b/cpp/include/cudf/detail/gather.cuh @@ -33,12 +33,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -518,7 +518,7 @@ struct column_gatherer_impl { * Positive indices are unchanged by this transformation. */ template -struct index_converter : public thrust::unary_function { +struct index_converter { index_converter(size_type n_rows) : n_rows(n_rows) {} __device__ map_type operator()(map_type in) const { return ((in % n_rows) + n_rows) % n_rows; } @@ -582,11 +582,11 @@ void gather_bitmask(table_view const& source, return col->mutable_view().null_mask(); }); auto d_target_masks = - make_device_uvector_async(target_masks, stream, rmm::mr::get_current_device_resource()); + make_device_uvector_async(target_masks, stream, cudf::get_current_device_resource_ref()); auto const device_source = table_device_view::create(source, stream); auto d_valid_counts = make_zeroed_device_uvector_async( - target.size(), stream, rmm::mr::get_current_device_resource()); + target.size(), stream, cudf::get_current_device_resource_ref()); // Dispatch operation enum to get implementation auto const impl = [op]() { @@ -609,7 +609,7 @@ void gather_bitmask(table_view const& source, stream); // Copy the valid counts into each column - auto const valid_counts = make_std_vector_sync(d_valid_counts, stream); + auto const valid_counts = make_host_vector_sync(d_valid_counts, stream); for (size_t i = 0; i < target.size(); ++i) { if (target[i]->nullable()) { auto const null_count = target_rows - valid_counts[i]; diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp index 39cd43934e3..48fb60aa5dd 100644 --- a/cpp/include/cudf/detail/gather.hpp +++ b/cpp/include/cudf/detail/gather.hpp @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/include/cudf/detail/groupby.hpp b/cpp/include/cudf/detail/groupby.hpp index 36eae05ce39..3e9511de5e4 100644 --- a/cpp/include/cudf/detail/groupby.hpp +++ b/cpp/include/cudf/detail/groupby.hpp @@ -17,10 +17,10 @@ #include #include +#include #include #include -#include #include #include diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp index c0910b4d5ae..e3a6f7db2b5 100644 --- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp +++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace groupby { namespace detail { diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp index a411a890622..ce8783d8b79 100644 --- a/cpp/include/cudf/detail/groupby/sort_helper.hpp +++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace groupby::detail::sort { diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh index dfe79646167..7de79b31bc7 100644 --- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh +++ b/cpp/include/cudf/detail/hash_reduce_by_row.cuh @@ -18,11 +18,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -32,7 +32,7 @@ namespace cudf::detail { using hash_map_type = cuco::legacy:: - static_map; + static_map>; /** * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index c264dff2181..f0510c86c3a 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -22,9 +22,9 @@ #include #include +#include #include #include -#include #include namespace cudf { @@ -376,10 +376,10 @@ struct indexalator_factory { iter = make_input_iterator(col); } - __device__ thrust::optional operator()(size_type i) const + __device__ cuda::std::optional operator()(size_type i) const { - return has_nulls && !bit_is_set(null_mask, i + offset) ? thrust::nullopt - : thrust::make_optional(iter[i]); + return has_nulls && !bit_is_set(null_mask, i + offset) ? cuda::std::nullopt + : cuda::std::make_optional(iter[i]); } }; @@ -400,9 +400,9 @@ struct indexalator_factory { iter = indexalator_factory::make_input_iterator(input); } - __device__ thrust::optional operator()(size_type) const + __device__ cuda::std::optional operator()(size_type) const { - return is_null ? thrust::nullopt : thrust::make_optional(*iter); + return is_null ? cuda::std::nullopt : cuda::std::make_optional(*iter); } }; diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp index 0b9319ba663..938d0e95097 100644 --- a/cpp/include/cudf/detail/interop.hpp +++ b/cpp/include/cudf/detail/interop.hpp @@ -16,30 +16,14 @@ #pragma once -// We disable warning 611 because the `arrow::TableBatchReader` only partially -// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext` -// triggering warning 611-D from nvcc. -#ifdef __CUDACC__ -#pragma nv_diag_suppress 611 -#pragma nv_diag_suppress 2810 -#endif -#include - -#include -#ifdef __CUDACC__ -#pragma nv_diag_default 611 -#pragma nv_diag_default 2810 -#endif - #include #include #include #include +#include #include -#include - namespace CUDF_EXPORT cudf { namespace detail { @@ -61,89 +45,6 @@ DLManagedTensor* to_dlpack(table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -// Creating arrow as per given type_id and buffer arguments -template -std::shared_ptr to_arrow_array(cudf::type_id id, Ts&&... args) -{ - switch (id) { - case type_id::BOOL8: return std::make_shared(std::forward(args)...); - case type_id::INT8: return std::make_shared(std::forward(args)...); - case type_id::INT16: return std::make_shared(std::forward(args)...); - case type_id::INT32: return std::make_shared(std::forward(args)...); - case type_id::INT64: return std::make_shared(std::forward(args)...); - case type_id::UINT8: return std::make_shared(std::forward(args)...); - case type_id::UINT16: return std::make_shared(std::forward(args)...); - case type_id::UINT32: return std::make_shared(std::forward(args)...); - case type_id::UINT64: return std::make_shared(std::forward(args)...); - case type_id::FLOAT32: return std::make_shared(std::forward(args)...); - case type_id::FLOAT64: return std::make_shared(std::forward(args)...); - case type_id::TIMESTAMP_DAYS: - return std::make_shared(std::make_shared(), - std::forward(args)...); - case type_id::TIMESTAMP_SECONDS: - return std::make_shared(arrow::timestamp(arrow::TimeUnit::SECOND), - std::forward(args)...); - case type_id::TIMESTAMP_MILLISECONDS: - return std::make_shared(arrow::timestamp(arrow::TimeUnit::MILLI), - std::forward(args)...); - case type_id::TIMESTAMP_MICROSECONDS: - return std::make_shared(arrow::timestamp(arrow::TimeUnit::MICRO), - std::forward(args)...); - case type_id::TIMESTAMP_NANOSECONDS: - return std::make_shared(arrow::timestamp(arrow::TimeUnit::NANO), - std::forward(args)...); - case type_id::DURATION_SECONDS: - return std::make_shared(arrow::duration(arrow::TimeUnit::SECOND), - std::forward(args)...); - case type_id::DURATION_MILLISECONDS: - return std::make_shared(arrow::duration(arrow::TimeUnit::MILLI), - std::forward(args)...); - case type_id::DURATION_MICROSECONDS: - return std::make_shared(arrow::duration(arrow::TimeUnit::MICRO), - std::forward(args)...); - case type_id::DURATION_NANOSECONDS: - return std::make_shared(arrow::duration(arrow::TimeUnit::NANO), - std::forward(args)...); - default: CUDF_FAIL("Unsupported type_id conversion to arrow"); - } -} - -// Converting arrow type to cudf type -data_type arrow_to_cudf_type(arrow::DataType const& arrow_type); - -/** - * @copydoc cudf::to_arrow(table_view input, std::vector const& metadata, - * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr) - */ -std::shared_ptr to_arrow(table_view input, - std::vector const& metadata, - rmm::cuda_stream_view stream, - arrow::MemoryPool* ar_mr); - -/** - * @copydoc cudf::to_arrow(cudf::scalar const& input, column_metadata const& metadata, - * rmm::cuda_stream_view stream, arrow::MemoryPool* ar_mr) - */ -std::shared_ptr to_arrow(cudf::scalar const& input, - column_metadata const& metadata, - rmm::cuda_stream_view stream, - arrow::MemoryPool* ar_mr); -/** - * @copydoc cudf::from_arrow(arrow::Table const& input_table, rmm::cuda_stream_view stream, - * rmm::device_async_resource_ref mr) - */ -std::unique_ptr
from_arrow(arrow::Table const& input_table, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -/** - * @copydoc cudf::from_arrow(arrow::Scalar const& input, rmm::cuda_stream_view stream, - * rmm::device_async_resource_ref mr) - */ -std::unique_ptr from_arrow(arrow::Scalar const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - /** * @brief Return a maximum precision for a given type. * diff --git a/cpp/include/cudf/detail/iterator.cuh b/cpp/include/cudf/detail/iterator.cuh index 9e6227ec19b..4349e1b70fd 100644 --- a/cpp/include/cudf/detail/iterator.cuh +++ b/cpp/include/cudf/detail/iterator.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,10 +37,10 @@ #include #include +#include #include #include #include -#include #include #include @@ -186,7 +186,7 @@ auto make_null_replacement_iterator(column_device_view const& column, /** * @brief Constructs an optional iterator over a column's values and its validity. * - * Dereferencing the returned iterator returns a `thrust::optional`. + * Dereferencing the returned iterator returns a `cuda::std::optional`. * * The element of this iterator contextually converts to bool. The conversion returns true * if the object contains a value and false if it does not contain a value. @@ -237,7 +237,7 @@ auto make_null_replacement_iterator(column_device_view const& column, * @param column The column to iterate * @param has_nulls Indicates whether `column` is checked for nulls. * @return Iterator that returns valid column elements and the validity of the - * element in a `thrust::optional` + * element in a `cuda::std::optional` */ template auto make_optional_iterator(column_device_view const& column, Nullate has_nulls) @@ -393,7 +393,7 @@ auto inline make_scalar_iterator(scalar const& scalar_value) /** * @brief Optional accessor for a scalar * - * The `scalar_optional_accessor` always returns a `thrust::optional` of the scalar. + * The `scalar_optional_accessor` always returns a `cuda::std::optional` of the scalar. * The validity of the optional is determined by the `Nullate` parameter which may * be one of the following: * @@ -401,14 +401,14 @@ auto inline make_scalar_iterator(scalar const& scalar_value) * will contain a value only if the scalar is valid. * * - `nullate::NO` means the caller attests that the scalar will always be valid, - * no checks will occur and `thrust::optional{column[i]}` will return a value + * no checks will occur and `cuda::std::optional{column[i]}` will return a value * for each `i`. * * - `nullate::DYNAMIC` defers the assumption of nullability to runtime and the caller * specifies if the scalar may be valid or invalid. - * For `DYNAMIC{true}` the return value will be a `thrust::optional{scalar}` when the - * scalar is valid and a `thrust::optional{}` when the scalar is invalid. - * For `DYNAMIC{false}` the return value will always be a `thrust::optional{scalar}`. + * For `DYNAMIC{true}` the return value will be a `cuda::std::optional{scalar}` when the + * scalar is valid and a `cuda::std::optional{}` when the scalar is invalid. + * For `DYNAMIC{false}` the return value will always be a `cuda::std::optional{scalar}`. * * @throws `cudf::logic_error` if scalar datatype and Element type mismatch. * @@ -418,7 +418,7 @@ auto inline make_scalar_iterator(scalar const& scalar_value) template struct scalar_optional_accessor : public scalar_value_accessor { using super_t = scalar_value_accessor; - using value_type = thrust::optional; + using value_type = cuda::std::optional; scalar_optional_accessor(scalar const& scalar_value, Nullate with_nulls) : scalar_value_accessor(scalar_value), has_nulls{with_nulls} @@ -427,7 +427,7 @@ struct scalar_optional_accessor : public scalar_value_accessor { __device__ inline value_type const operator()(size_type) const { - if (has_nulls && !super_t::dscalar.is_valid()) { return value_type{thrust::nullopt}; } + if (has_nulls && !super_t::dscalar.is_valid()) { return value_type{cuda::std::nullopt}; } if constexpr (cudf::is_fixed_point()) { using namespace numeric; @@ -519,7 +519,7 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor`. + * Dereferencing the returned iterator returns a `cuda::std::optional`. * * The element of this iterator contextually converts to bool. The conversion returns true * if the object contains a value and false if it does not contain a value. @@ -575,7 +575,7 @@ struct scalar_representation_pair_accessor : public scalar_value_accessor auto inline make_optional_iterator(scalar const& scalar_value, Nullate has_nulls) diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index ff7da4462a2..b084a94cbc8 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -59,7 +59,7 @@ struct hash_join { cuco::static_multimap, cuco::legacy::double_hashing>; hash_join() = delete; diff --git a/cpp/include/cudf/detail/label_bins.hpp b/cpp/include/cudf/detail/label_bins.hpp index 92a417b0132..44fcba0d2d6 100644 --- a/cpp/include/cudf/detail/label_bins.hpp +++ b/cpp/include/cudf/detail/label_bins.hpp @@ -21,11 +21,10 @@ #include #include #include +#include #include #include -#include -#include namespace CUDF_EXPORT cudf { diff --git a/cpp/include/cudf/detail/merge.hpp b/cpp/include/cudf/detail/merge.hpp index 72e34b76158..43a0387ab99 100644 --- a/cpp/include/cudf/detail/merge.hpp +++ b/cpp/include/cudf/detail/merge.hpp @@ -17,9 +17,9 @@ #pragma once #include +#include #include -#include #include diff --git a/cpp/include/cudf/detail/null_mask.cuh b/cpp/include/cudf/detail/null_mask.cuh index ae6db5409cc..327c732716c 100644 --- a/cpp/include/cudf/detail/null_mask.cuh +++ b/cpp/include/cudf/detail/null_mask.cuh @@ -21,12 +21,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -164,7 +164,7 @@ size_type inplace_bitmask_binop(Binop op, CUDF_EXPECTS(std::all_of(masks.begin(), masks.end(), [](auto p) { return p != nullptr; }), "Mask pointer cannot be null"); - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource(); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref(); rmm::device_scalar d_counter{0, stream, mr}; rmm::device_uvector d_masks(masks.size(), stream, mr); rmm::device_uvector d_begin_bits(masks_begin_bits.size(), stream, mr); @@ -434,7 +434,7 @@ std::vector segmented_count_bits(bitmask_type const* bitmask, std::distance(indices_begin, indices_end), stream); std::copy(indices_begin, indices_end, std::back_inserter(h_indices)); auto const d_indices = - make_device_uvector_async(h_indices, stream, rmm::mr::get_current_device_resource()); + make_device_uvector_async(h_indices, stream, cudf::get_current_device_resource_ref()); // Compute the bit counts over each segment. auto first_bit_indices_begin = thrust::make_transform_iterator( @@ -449,7 +449,7 @@ std::vector segmented_count_bits(bitmask_type const* bitmask, last_bit_indices_begin, count_bits, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Copy the results back to the host. return make_std_vector_sync(d_bit_counts, stream); @@ -576,7 +576,7 @@ std::pair segmented_null_mask_reduction( last_bit_indices_begin, cudf::detail::count_bits_policy::SET_BITS, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const length_and_valid_count = thrust::make_zip_iterator(segment_length_iterator, segment_valid_counts.begin()); return cudf::detail::valid_if( diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp index 67e3617d873..b8c52a4ae2c 100644 --- a/cpp/include/cudf/detail/null_mask.hpp +++ b/cpp/include/cudf/detail/null_mask.hpp @@ -18,10 +18,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/include/cudf/detail/quantiles.hpp b/cpp/include/cudf/detail/quantiles.hpp index 23d5fb73ba3..4f912077e59 100644 --- a/cpp/include/cudf/detail/quantiles.hpp +++ b/cpp/include/cudf/detail/quantiles.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace detail { diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp index e17f1b7c5fd..81ac5bf2b14 100644 --- a/cpp/include/cudf/detail/repeat.hpp +++ b/cpp/include/cudf/detail/repeat.hpp @@ -18,9 +18,9 @@ #include #include +#include #include -#include #include diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp index e2bd729861b..3b18b95ce75 100644 --- a/cpp/include/cudf/detail/replace.hpp +++ b/cpp/include/cudf/detail/replace.hpp @@ -18,9 +18,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp index 30f8b88b116..aeeed282d8b 100644 --- a/cpp/include/cudf/detail/reshape.hpp +++ b/cpp/include/cudf/detail/reshape.hpp @@ -18,9 +18,9 @@ #include #include +#include #include -#include #include @@ -28,8 +28,6 @@ namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::tile - * - * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr
tile(table_view const& input, size_type count, @@ -38,8 +36,6 @@ std::unique_ptr
tile(table_view const& input, /** * @copydoc cudf::interleave_columns - * - * @param stream CUDA stream used for device memory operations and kernel launches */ std::unique_ptr interleave_columns(table_view const& input, rmm::cuda_stream_view, diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp index 5bfa5679531..d8d5506969b 100644 --- a/cpp/include/cudf/detail/rolling.hpp +++ b/cpp/include/cudf/detail/rolling.hpp @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp index ba3ef1c1ce7..df1faf05dbd 100644 --- a/cpp/include/cudf/detail/round.hpp +++ b/cpp/include/cudf/detail/round.hpp @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace CUDF_EXPORT cudf { //! Inner interfaces and implementations diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp index bd60309c5c3..313964a6341 100644 --- a/cpp/include/cudf/detail/scan.hpp +++ b/cpp/include/cudf/detail/scan.hpp @@ -18,9 +18,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace detail { diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh index 80bc87731ca..fa93ce4e13c 100644 --- a/cpp/include/cudf/detail/scatter.cuh +++ b/cpp/include/cudf/detail/scatter.cuh @@ -30,12 +30,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -223,7 +223,7 @@ struct column_scatterer_impl { auto target_matched = dictionary::detail::add_keys(target, source.keys(), stream, mr); auto const target_view = dictionary_column_view(target_matched->view()); auto source_matched = dictionary::detail::set_keys( - source, target_view.keys(), stream, rmm::mr::get_current_device_resource()); + source, target_view.keys(), stream, cudf::get_current_device_resource_ref()); auto const source_view = dictionary_column_view(source_matched->view()); // now build the new indices by doing a scatter on just the matched indices diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp index 6691ddc5c09..39f973bb611 100644 --- a/cpp/include/cudf/detail/scatter.hpp +++ b/cpp/include/cudf/detail/scatter.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index 72e2cf074bc..da3b98660dc 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace detail { diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp index a08010a610f..41d9fe41080 100644 --- a/cpp/include/cudf/detail/sequence.hpp +++ b/cpp/include/cudf/detail/sequence.hpp @@ -19,16 +19,16 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::sequence(size_type size, scalar const& init, scalar const& step, * rmm::device_async_resource_ref mr = - *rmm::mr::get_current_device_resource()) + *cudf::get_current_device_resource_ref()) * * @param stream CUDA stream used for device memory operations and kernel launches. */ @@ -41,7 +41,7 @@ std::unique_ptr sequence(size_type size, /** * @copydoc cudf::sequence(size_type size, scalar const& init, rmm::device_async_resource_ref mr = - rmm::mr::get_current_device_resource()) + cudf::get_current_device_resource_ref()) * * @param stream CUDA stream used for device memory operations and kernel launches. */ diff --git a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh index 63e4fca8915..88ec0c07dc5 100644 --- a/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh +++ b/cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp index 08cf329f199..185855e1fc0 100644 --- a/cpp/include/cudf/detail/sorting.hpp +++ b/cpp/include/cudf/detail/sorting.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp index 05194148a70..8a4366bdd63 100644 --- a/cpp/include/cudf/detail/stream_compaction.hpp +++ b/cpp/include/cudf/detail/stream_compaction.hpp @@ -20,18 +20,16 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::drop_nulls(table_view const&, std::vector const&, - * cudf::size_type, rmm::device_async_resource_ref) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * cudf::size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr
drop_nulls(table_view const& input, std::vector const& keys, @@ -41,9 +39,7 @@ std::unique_ptr
drop_nulls(table_view const& input, /** * @copydoc cudf::drop_nans(table_view const&, std::vector const&, - * cudf::size_type, rmm::device_async_resource_ref) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * cudf::size_type, rmm::cuda_stream_view, rmm::device_async_resource_ref) */ std::unique_ptr
drop_nans(table_view const& input, std::vector const& keys, @@ -53,8 +49,6 @@ std::unique_ptr
drop_nans(table_view const& input, /** * @copydoc cudf::apply_boolean_mask - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
apply_boolean_mask(table_view const& input, column_view const& boolean_mask, @@ -63,8 +57,6 @@ std::unique_ptr
apply_boolean_mask(table_view const& input, /** * @copydoc cudf::unique - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
unique(table_view const& input, std::vector const& keys, @@ -75,8 +67,6 @@ std::unique_ptr
unique(table_view const& input, /** * @copydoc cudf::distinct - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
distinct(table_view const& input, std::vector const& keys, @@ -110,9 +100,7 @@ rmm::device_uvector distinct_indices(table_view const& input, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::unique_count(column_view const&, null_policy, nan_policy, rmm::cuda_stream_view) */ cudf::size_type unique_count(column_view const& input, null_policy null_handling, @@ -120,18 +108,14 @@ cudf::size_type unique_count(column_view const& input, rmm::cuda_stream_view stream); /** - * @copydoc cudf::unique_count(table_view const&, null_equality) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::unique_count(table_view const&, null_equality, rmm::cuda_stream_view) */ cudf::size_type unique_count(table_view const& input, null_equality nulls_equal, rmm::cuda_stream_view stream); /** - * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy, rmm::cuda_stream_view) */ cudf::size_type distinct_count(column_view const& input, null_policy null_handling, @@ -139,9 +123,7 @@ cudf::size_type distinct_count(column_view const& input, rmm::cuda_stream_view stream); /** - * @copydoc cudf::distinct_count(table_view const&, null_equality) - * - * @param[in] stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::distinct_count(table_view const&, null_equality, rmm::cuda_stream_view) */ cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal, diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp index 7de68035b19..261c54afd51 100644 --- a/cpp/include/cudf/detail/structs/utilities.hpp +++ b/cpp/include/cudf/detail/structs/utilities.hpp @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include namespace CUDF_EXPORT cudf { namespace structs::detail { diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp index 10eb3d389c7..80a4460023f 100644 --- a/cpp/include/cudf/detail/tdigest/tdigest.hpp +++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp @@ -19,10 +19,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace tdigest::detail { diff --git a/cpp/include/cudf/detail/timezone.hpp b/cpp/include/cudf/detail/timezone.hpp index c7798ff60ed..f51d1ba42b2 100644 --- a/cpp/include/cudf/detail/timezone.hpp +++ b/cpp/include/cudf/detail/timezone.hpp @@ -16,25 +16,25 @@ #pragma once #include +#include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace detail { /** * @copydoc cudf::make_timezone_transition_table(std::optional, std::string_view, - * rmm::device_async_resource_ref) + * rmm::cuda_stream_view, rmm::device_async_resource_ref) * - * @param stream CUDA stream used for device memory operations and kernel launches. */ std::unique_ptr
make_timezone_transition_table( std::optional tzif_dir, std::string_view timezone_name, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); } // namespace detail } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp index 02849ef023c..4cfa95468f2 100644 --- a/cpp/include/cudf/detail/transform.hpp +++ b/cpp/include/cudf/detail/transform.hpp @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace detail { diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp index 559b2c32996..22382fa0713 100644 --- a/cpp/include/cudf/detail/transpose.hpp +++ b/cpp/include/cudf/detail/transpose.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace detail { diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp index bb05138bc8c..18b1e9b2d2e 100644 --- a/cpp/include/cudf/detail/unary.hpp +++ b/cpp/include/cudf/detail/unary.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh index 5007af7f9f1..d31ca3d92d1 100644 --- a/cpp/include/cudf/detail/utilities/cuda.cuh +++ b/cpp/include/cudf/detail/utilities/cuda.cuh @@ -189,35 +189,6 @@ __device__ T single_lane_block_sum_reduce(T lane_value) return result; } -/** - * @brief Get the number of elements that can be processed per thread. - * - * @param[in] kernel The kernel for which the elements per thread needs to be assessed - * @param[in] total_size Number of elements - * @param[in] block_size Expected block size - * - * @return cudf::size_type Elements per thread that can be processed for given specification. - */ -template -cudf::size_type elements_per_thread(Kernel kernel, - cudf::size_type total_size, - cudf::size_type block_size, - cudf::size_type max_per_thread = 32) -{ - CUDF_FUNC_RANGE(); - - // calculate theoretical occupancy - int max_blocks = 0; - CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0)); - - int device = 0; - CUDF_CUDA_TRY(cudaGetDevice(&device)); - int num_sms = 0; - CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device)); - int per_thread = total_size / (max_blocks * num_sms * block_size); - return std::clamp(per_thread, 1, max_per_thread); -} - /** * @brief Finds the smallest value not less than `number_to_round` and modulo `modulus` is * zero. Expects modulus to be a power of 2. diff --git a/cpp/include/cudf/detail/utilities/cuda.hpp b/cpp/include/cudf/detail/utilities/cuda.hpp new file mode 100644 index 00000000000..58c7ae8ed6a --- /dev/null +++ b/cpp/include/cudf/detail/utilities/cuda.hpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace CUDF_EXPORT cudf { +namespace detail { + +/** + * @brief Get the number of multiprocessors on the device + */ +cudf::size_type num_multiprocessors(); + +/** + * @brief Get the number of elements that can be processed per thread. + * + * @param[in] kernel The kernel for which the elements per thread needs to be assessed + * @param[in] total_size Number of elements + * @param[in] block_size Expected block size + * + * @return cudf::size_type Elements per thread that can be processed for given specification. + */ +template +cudf::size_type elements_per_thread(Kernel kernel, + cudf::size_type total_size, + cudf::size_type block_size, + cudf::size_type max_per_thread = 32) +{ + CUDF_FUNC_RANGE(); + + // calculate theoretical occupancy + int max_blocks = 0; + CUDF_CUDA_TRY(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks, kernel, block_size, 0)); + + int per_thread = total_size / (max_blocks * num_multiprocessors() * block_size); + return std::clamp(per_thread, 1, max_per_thread); +} + +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/detail/utilities/host_memory.hpp b/cpp/include/cudf/detail/utilities/host_memory.hpp index c6775a950c9..c661faf1fbe 100644 --- a/cpp/include/cudf/detail/utilities/host_memory.hpp +++ b/cpp/include/cudf/detail/utilities/host_memory.hpp @@ -18,10 +18,9 @@ #include #include +#include #include -#include - #include namespace cudf::detail { diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp index d4dd7b0d626..ecb8f910463 100644 --- a/cpp/include/cudf/detail/utilities/host_vector.hpp +++ b/cpp/include/cudf/detail/utilities/host_vector.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include #include @@ -33,7 +33,7 @@ namespace CUDF_EXPORT cudf { namespace detail { /*! \p rmm_host_allocator is a CUDA-specific host memory allocator - * that employs \c a `rmm::host_async_resource_ref` for allocation. + * that employs \c a `cudf::host_async_resource_ref` for allocation. * * \see https://en.cppreference.com/w/cpp/memory/allocator */ @@ -68,10 +68,10 @@ inline constexpr bool contains_property = (cuda::std::is_same_v || ... || false); /*! \p rmm_host_allocator is a CUDA-specific host memory allocator - * that employs \c `rmm::host_async_resource_ref` for allocation. + * that employs \c `cudf::host_async_resource_ref` for allocation. * * The \p rmm_host_allocator provides an interface for host memory allocation through the user - * provided \c `rmm::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of + * provided \c `cudf::host_async_resource_ref`. The \p rmm_host_allocator does not take ownership of * this reference and therefore it is the user's responsibility to ensure its lifetime for the * duration of the lifetime of the \p rmm_host_allocator. * diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp index a9d91cdeee1..953ae5b9308 100644 --- a/cpp/include/cudf/detail/utilities/vector_factories.hpp +++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp @@ -27,13 +27,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh index 56a2c76b741..cfb2e70bfed 100644 --- a/cpp/include/cudf/detail/valid_if.cuh +++ b/cpp/include/cudf/detail/valid_if.cuh @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp index 0eb17aa06f4..12f09616295 100644 --- a/cpp/include/cudf/dictionary/detail/concatenate.hpp +++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp @@ -18,10 +18,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace dictionary::detail { diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp index cc7ffbd397f..600ba8d6c67 100644 --- a/cpp/include/cudf/dictionary/detail/encode.hpp +++ b/cpp/include/cudf/dictionary/detail/encode.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace dictionary::detail { diff --git a/cpp/include/cudf/dictionary/detail/merge.hpp b/cpp/include/cudf/dictionary/detail/merge.hpp index a1777d412fe..69d0d9fa9b0 100644 --- a/cpp/include/cudf/dictionary/detail/merge.hpp +++ b/cpp/include/cudf/dictionary/detail/merge.hpp @@ -18,9 +18,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace dictionary::detail { diff --git a/cpp/include/cudf/dictionary/detail/replace.hpp b/cpp/include/cudf/dictionary/detail/replace.hpp index 1e1ee182fc5..c854e794b17 100644 --- a/cpp/include/cudf/dictionary/detail/replace.hpp +++ b/cpp/include/cudf/dictionary/detail/replace.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace dictionary::detail { diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp index 921acc258a9..09907c9070d 100644 --- a/cpp/include/cudf/dictionary/detail/search.hpp +++ b/cpp/include/cudf/dictionary/detail/search.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace dictionary { diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp index 9eb812eb8ee..0848df64596 100644 --- a/cpp/include/cudf/dictionary/detail/update_keys.hpp +++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp @@ -19,10 +19,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace dictionary::detail { diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp index 2f663c4af61..4a63ee05479 100644 --- a/cpp/include/cudf/dictionary/dictionary_factories.hpp +++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp @@ -18,10 +18,9 @@ #include #include #include +#include #include -#include -#include namespace CUDF_EXPORT cudf { /** @@ -67,7 +66,7 @@ std::unique_ptr make_dictionary_column( column_view const& keys_column, column_view const& indices_column, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a dictionary column by taking ownership of the provided keys @@ -97,7 +96,7 @@ std::unique_ptr make_dictionary_column( rmm::device_buffer&& null_mask, size_type null_count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a dictionary column by taking ownership of the provided keys @@ -124,7 +123,7 @@ std::unique_ptr make_dictionary_column( std::unique_ptr keys_column, std::unique_ptr indices_column, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp index 9e68c947793..dc81fd74992 100644 --- a/cpp/include/cudf/dictionary/encode.hpp +++ b/cpp/include/cudf/dictionary/encode.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace dictionary { @@ -62,7 +60,7 @@ std::unique_ptr encode( column_view const& column, data_type indices_type = data_type{type_id::UINT32}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a column by gathering the keys from the provided @@ -82,7 +80,7 @@ std::unique_ptr encode( std::unique_ptr decode( dictionary_column_view const& dictionary_column, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/search.hpp b/cpp/include/cudf/dictionary/search.hpp index 66275de33e9..16d59318dd0 100644 --- a/cpp/include/cudf/dictionary/search.hpp +++ b/cpp/include/cudf/dictionary/search.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace dictionary { @@ -46,7 +44,7 @@ std::unique_ptr get_index( dictionary_column_view const& dictionary, scalar const& key, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace dictionary diff --git a/cpp/include/cudf/dictionary/update_keys.hpp b/cpp/include/cudf/dictionary/update_keys.hpp index c02e91f8d78..85e5af8cf22 100644 --- a/cpp/include/cudf/dictionary/update_keys.hpp +++ b/cpp/include/cudf/dictionary/update_keys.hpp @@ -17,11 +17,9 @@ #include #include +#include #include -#include -#include - namespace CUDF_EXPORT cudf { namespace dictionary { /** @@ -61,7 +59,7 @@ std::unique_ptr add_keys( dictionary_column_view const& dictionary_column, column_view const& new_keys, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a new dictionary column by removing the specified keys @@ -93,7 +91,7 @@ std::unique_ptr remove_keys( dictionary_column_view const& dictionary_column, column_view const& keys_to_remove, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a new dictionary column by removing any keys @@ -115,7 +113,7 @@ std::unique_ptr remove_keys( std::unique_ptr remove_unused_keys( dictionary_column_view const& dictionary_column, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a new dictionary column by applying only the specified keys @@ -149,7 +147,7 @@ std::unique_ptr set_keys( dictionary_column_view const& dictionary_column, column_view const& keys, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create new dictionaries that have keys merged from the input dictionaries. @@ -165,7 +163,7 @@ std::unique_ptr set_keys( std::vector> match_dictionaries( cudf::host_span input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace dictionary diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp index 054f1e859f4..15a21b44f3b 100644 --- a/cpp/include/cudf/filling.hpp +++ b/cpp/include/cudf/filling.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -94,7 +92,7 @@ std::unique_ptr fill( size_type end, scalar const& value, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Repeat rows of a Table. @@ -128,7 +126,7 @@ std::unique_ptr
repeat( table_view const& input_table, column_view const& count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Repeat rows of a Table. @@ -153,7 +151,7 @@ std::unique_ptr
repeat( table_view const& input_table, size_type count, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Fills a column with a sequence of value specified by an initial value and a step. @@ -184,7 +182,7 @@ std::unique_ptr sequence( scalar const& init, scalar const& step, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Fills a column with a sequence of value specified by an initial value and a step of 1. @@ -211,7 +209,7 @@ std::unique_ptr sequence( size_type size, scalar const& init, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Generate a sequence of timestamps beginning at `init` and incrementing by `months` for @@ -242,7 +240,7 @@ std::unique_ptr calendrical_month_sequence( scalar const& init, size_type months, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index f7df9c1aa9b..11c778408fe 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -22,11 +22,10 @@ #include #include #include +#include #include #include -#include -#include #include #include @@ -186,7 +185,7 @@ class groupby { */ std::pair, std::vector> aggregate( host_span requests, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @copydoc aggregate(host_span, rmm::device_async_resource_ref) @@ -196,7 +195,7 @@ class groupby { std::pair, std::vector> aggregate( host_span requests, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs grouped scans on the specified values. * @@ -250,7 +249,7 @@ class groupby { */ std::pair, std::vector> scan( host_span requests, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs grouped shifts for specified values. @@ -306,7 +305,7 @@ class groupby { table_view const& values, host_span offsets, std::vector> const& fill_values, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief The grouped data corresponding to a groupby operation on a set of values. @@ -335,7 +334,7 @@ class groupby { * @return A `groups` object representing grouped keys and values */ groups get_groups(cudf::table_view values = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs grouped replace nulls on @p value @@ -375,7 +374,7 @@ class groupby { std::pair, std::unique_ptr
> replace_nulls( table_view const& values, host_span replace_policies, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); private: table_view _keys; ///< Keys that determine grouping diff --git a/cpp/include/cudf/hashing.hpp b/cpp/include/cudf/hashing.hpp index b8be2af6967..0c5327edb91 100644 --- a/cpp/include/cudf/hashing.hpp +++ b/cpp/include/cudf/hashing.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { @@ -62,7 +60,7 @@ std::unique_ptr murmurhash3_x86_32( table_view const& input, uint32_t seed = DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the MurmurHash3 64-bit hash value of each row in the given table @@ -81,7 +79,7 @@ std::unique_ptr
murmurhash3_x64_128( table_view const& input, uint64_t seed = DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the MD5 hash value of each row in the given table @@ -95,7 +93,7 @@ std::unique_ptr
murmurhash3_x64_128( std::unique_ptr md5( table_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the SHA-1 hash value of each row in the given table @@ -109,7 +107,7 @@ std::unique_ptr md5( std::unique_ptr sha1( table_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the SHA-224 hash value of each row in the given table @@ -123,7 +121,7 @@ std::unique_ptr sha1( std::unique_ptr sha224( table_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the SHA-256 hash value of each row in the given table @@ -137,7 +135,7 @@ std::unique_ptr sha224( std::unique_ptr sha256( table_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the SHA-384 hash value of each row in the given table @@ -151,7 +149,7 @@ std::unique_ptr sha256( std::unique_ptr sha384( table_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the SHA-512 hash value of each row in the given table @@ -165,7 +163,7 @@ std::unique_ptr sha384( std::unique_ptr sha512( table_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the XXHash_64 hash value of each row in the given table @@ -183,7 +181,7 @@ std::unique_ptr xxhash_64( table_view const& input, uint64_t seed = DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); } // namespace hashing diff --git a/cpp/include/cudf/hashing/detail/hashing.hpp b/cpp/include/cudf/hashing/detail/hashing.hpp index 1a459430346..a978e54a1b9 100644 --- a/cpp/include/cudf/hashing/detail/hashing.hpp +++ b/cpp/include/cudf/hashing/detail/hashing.hpp @@ -17,9 +17,9 @@ #include #include +#include #include -#include #include #include diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp index 9a8f87b4a46..f789d950e51 100644 --- a/cpp/include/cudf/interop.hpp +++ b/cpp/include/cudf/interop.hpp @@ -16,32 +16,15 @@ #pragma once -// We disable warning 611 because the `arrow::TableBatchReader` only partially -// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext` -// triggering warning 611-D from nvcc. -#ifdef __CUDACC__ -#pragma nv_diag_suppress 611 -#pragma nv_diag_suppress 2810 -#endif -#include - -#include -#ifdef __CUDACC__ -#pragma nv_diag_default 611 -#pragma nv_diag_default 2810 -#endif - #include #include #include #include #include #include +#include #include -#include -#include - #include struct DLManagedTensor; @@ -80,7 +63,7 @@ namespace CUDF_EXPORT cudf { */ std::unique_ptr
from_dlpack( DLManagedTensor const* managed_tensor, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Convert a cudf table into a DLPack DLTensor @@ -102,7 +85,7 @@ std::unique_ptr
from_dlpack( */ DLManagedTensor* to_dlpack( table_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group @@ -131,59 +114,6 @@ struct column_metadata { column_metadata() = default; }; -/** - * @brief Create `arrow::Table` from cudf table `input` - * - * Converts the `cudf::table_view` to `arrow::Table` with the provided - * metadata `column_names`. - * - * @deprecated Since 24.08. Use cudf::to_arrow_host instead. - * - * @throws cudf::logic_error if `column_names` size doesn't match with number of columns. - * - * @param input table_view that needs to be converted to arrow Table - * @param metadata Contains hierarchy of names of columns and children - * @param stream CUDA stream used for device memory operations and kernel launches - * @param ar_mr arrow memory pool to allocate memory for arrow Table - * @return arrow Table generated from `input` - * - * @note For decimals, since the precision is not stored for them in libcudf, - * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type - * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision - * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be - * converted to Arrow decimal128 of the precision 38. - */ -[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr to_arrow( - table_view input, - std::vector const& metadata = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); - -/** - * @brief Create `arrow::Scalar` from cudf scalar `input` - * - * Converts the `cudf::scalar` to `arrow::Scalar`. - * - * @deprecated Since 24.08. - * - * @param input scalar that needs to be converted to arrow Scalar - * @param metadata Contains hierarchy of names of columns and children - * @param stream CUDA stream used for device memory operations and kernel launches - * @param ar_mr arrow memory pool to allocate memory for arrow Scalar - * @return arrow Scalar generated from `input` - * - * @note For decimals, since the precision is not stored for them in libcudf, - * it will be converted to an Arrow decimal128 that has the widest-precision the cudf decimal type - * supports. For example, numeric::decimal32 will be converted to Arrow decimal128 of the precision - * 9 which is the maximum precision for 32-bit types. Similarly, numeric::decimal128 will be - * converted to Arrow decimal128 of the precision 38. - */ -[[deprecated("Use cudf::to_arrow_host")]] std::shared_ptr to_arrow( - cudf::scalar const& input, - column_metadata const& metadata = {}, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - arrow::MemoryPool* ar_mr = arrow::default_memory_pool()); - /** * @brief typedef for a unique_ptr to an ArrowSchema with custom deleter * @@ -241,7 +171,7 @@ unique_schema_t to_arrow_schema(cudf::table_view const& input, unique_device_array_t to_arrow_device( cudf::table&& table, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `ArrowDeviceArray` from cudf column and metadata @@ -270,7 +200,7 @@ unique_device_array_t to_arrow_device( unique_device_array_t to_arrow_device( cudf::column&& col, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `ArrowDeviceArray` from a table view @@ -302,7 +232,7 @@ unique_device_array_t to_arrow_device( unique_device_array_t to_arrow_device( cudf::table_view const& table, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `ArrowDeviceArray` from a column view @@ -334,7 +264,7 @@ unique_device_array_t to_arrow_device( unique_device_array_t to_arrow_device( cudf::column_view const& col, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Copy table view data to host and create `ArrowDeviceArray` for it @@ -359,7 +289,7 @@ unique_device_array_t to_arrow_device( unique_device_array_t to_arrow_host( cudf::table_view const& table, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Copy column view data to host and create `ArrowDeviceArray` for it @@ -384,40 +314,7 @@ unique_device_array_t to_arrow_host( unique_device_array_t to_arrow_host( cudf::column_view const& col, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Create `cudf::table` from given arrow Table input - * - * @deprecated Since 24.08. Use cudf::from_arrow_host instead. - * - * @param input arrow:Table that needs to be converted to `cudf::table` - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate `cudf::table` - * @return cudf table generated from given arrow Table - */ -[[deprecated("Use cudf::from_arrow_host")]] std::unique_ptr
from_arrow( - arrow::Table const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Create `cudf::scalar` from given arrow Scalar input - * - * @deprecated Since 24.08. Use arrow's `MakeArrayFromScalar` on the - * input, followed by `ExportArray` to obtain something that can be - * consumed by `from_arrow_host`. Then use `cudf::get_element` to - * extract a device scalar from the column. - * - * @param input `arrow::Scalar` that needs to be converted to `cudf::scalar` - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate `cudf::scalar` - * @return cudf scalar generated from given arrow Scalar - */ -[[deprecated("See docstring for migration strategies")]] std::unique_ptr from_arrow( - arrow::Scalar const& input, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `cudf::table` from given ArrowArray and ArrowSchema input @@ -438,7 +335,7 @@ std::unique_ptr from_arrow( ArrowSchema const* schema, ArrowArray const* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `cudf::column` from a given ArrowArray and ArrowSchema input @@ -457,7 +354,7 @@ std::unique_ptr from_arrow_column( ArrowSchema const* schema, ArrowArray const* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `cudf::table` from given ArrowDeviceArray input @@ -481,7 +378,7 @@ std::unique_ptr
from_arrow_host( ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `cudf::table` from given ArrowArrayStream input @@ -499,7 +396,7 @@ std::unique_ptr
from_arrow_host( std::unique_ptr
from_arrow_stream( ArrowArrayStream* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create `cudf::column` from given ArrowDeviceArray input @@ -522,7 +419,7 @@ std::unique_ptr from_arrow_host_column( ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray @@ -603,7 +500,7 @@ unique_table_view_t from_arrow_device( ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter @@ -646,7 +543,7 @@ unique_column_view_t from_arrow_device_column( ArrowSchema const* schema, ArrowDeviceArray const* input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/arrow_io_source.hpp b/cpp/include/cudf/io/arrow_io_source.hpp deleted file mode 100644 index ed5c839cbb4..00000000000 --- a/cpp/include/cudf/io/arrow_io_source.hpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "datasource.hpp" - -#include - -#include -#include - -#include -#include -#include - -namespace CUDF_EXPORT cudf { -namespace io { -/** - * @addtogroup io_datasources - * @{ - * @file - */ - -/** - * @brief Implementation class for reading from an Apache Arrow file. The file - * could be a memory-mapped file or other implementation supported by Arrow. - */ -class arrow_io_source : public datasource { - public: - /** - * @brief Constructs an object from an Apache Arrow Filesystem URI - * - * @param arrow_uri Apache Arrow Filesystem URI - */ - explicit arrow_io_source(std::string const& arrow_uri); - - /** - * @brief Constructs an object from an `arrow` source object. - * - * @param file The `arrow` object from which the data is read - */ - explicit arrow_io_source(std::shared_ptr file) - : arrow_file(std::move(file)) - { - } - - /** - * @brief Returns a buffer with a subset of data from the `arrow` source. - * - * @param offset The offset in bytes from which to read - * @param size The number of bytes to read - * @return A buffer with the read data - */ - std::unique_ptr host_read(size_t offset, size_t size) override; - - /** - * @brief Reads a selected range from the `arrow` source into a preallocated buffer. - * - * @param[in] offset The offset in bytes from which to read - * @param[in] size The number of bytes to read - * @param[out] dst The preallocated buffer to read into - * @return The number of bytes read - */ - size_t host_read(size_t offset, size_t size, uint8_t* dst) override; - /** - * @brief Returns the size of the data in the `arrow` source. - * - * @return The size of the data in the `arrow` source - */ - [[nodiscard]] size_t size() const override; - - private: - std::shared_ptr filesystem; - std::shared_ptr arrow_file; -}; - -/** @} */ // end of group -} // namespace io -} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/avro.hpp b/cpp/include/cudf/io/avro.hpp index 63f9ea3a624..b307d05c09d 100644 --- a/cpp/include/cudf/io/avro.hpp +++ b/cpp/include/cudf/io/avro.hpp @@ -20,9 +20,7 @@ #include #include - -#include -#include +#include #include #include @@ -217,7 +215,7 @@ class avro_reader_options_builder { */ table_with_metadata read_avro( avro_reader_options const& options, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace io diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index bbb4636a5a3..dae056ef157 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -20,9 +20,7 @@ #include #include #include - -#include -#include +#include #include #include @@ -1354,7 +1352,7 @@ class csv_reader_options_builder { table_with_metadata read_csv( csv_reader_options options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group /** diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp index 13f695d6866..ab6cb422296 100644 --- a/cpp/include/cudf/io/detail/avro.hpp +++ b/cpp/include/cudf/io/detail/avro.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace io::detail::avro { diff --git a/cpp/include/cudf/io/detail/batched_memset.hpp b/cpp/include/cudf/io/detail/batched_memset.hpp new file mode 100644 index 00000000000..1c74be4a9fe --- /dev/null +++ b/cpp/include/cudf/io/detail/batched_memset.hpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace CUDF_EXPORT cudf { +namespace io::detail { + +/** + * @brief A helper function that takes in a vector of device spans and memsets them to the + * value provided using batches sent to the GPU. + * + * @param bufs Vector with device spans of data + * @param value Value to memset all device spans to + * @param _stream Stream used for device memory operations and kernel launches + * + * @return The data in device spans all set to value + */ +template +void batched_memset(std::vector> const& bufs, + T const value, + rmm::cuda_stream_view stream) +{ + // define task and bytes parameters + auto const num_bufs = bufs.size(); + + // copy bufs into device memory and then get sizes + auto gpu_bufs = + cudf::detail::make_device_uvector_async(bufs, stream, cudf::get_current_device_resource_ref()); + + // get a vector with the sizes of all buffers + auto sizes = cudf::detail::make_counting_transform_iterator( + static_cast(0), + cuda::proclaim_return_type( + [gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].size(); })); + + // get an iterator with a constant value to memset + auto iter_in = thrust::make_constant_iterator(thrust::make_constant_iterator(value)); + + // get an iterator pointing to each device span + auto iter_out = thrust::make_transform_iterator( + thrust::counting_iterator(0), + cuda::proclaim_return_type( + [gpu_bufs = gpu_bufs.data()] __device__(std::size_t i) { return gpu_bufs[i].data(); })); + + size_t temp_storage_bytes = 0; + + cub::DeviceCopy::Batched(nullptr, temp_storage_bytes, iter_in, iter_out, sizes, num_bufs, stream); + + rmm::device_buffer d_temp_storage( + temp_storage_bytes, stream, cudf::get_current_device_resource_ref()); + + cub::DeviceCopy::Batched( + d_temp_storage.data(), temp_storage_bytes, iter_in, iter_out, sizes, num_bufs, stream); +} + +} // namespace io::detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp index d4cad2f70fd..409663938a9 100644 --- a/cpp/include/cudf/io/detail/csv.hpp +++ b/cpp/include/cudf/io/detail/csv.hpp @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace io::detail::csv { diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp index 42b10a78ce8..940d03cdb41 100644 --- a/cpp/include/cudf/io/detail/json.hpp +++ b/cpp/include/cudf/io/detail/json.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace io::json::detail { @@ -61,7 +61,7 @@ void write_json(data_sink* sink, * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation */ -void normalize_single_quotes(datasource::owning_buffer>& indata, +void normalize_single_quotes(datasource::owning_buffer& indata, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); @@ -69,11 +69,21 @@ void normalize_single_quotes(datasource::owning_buffer * @brief Normalize unquoted whitespace (space and tab characters) using FST * * @param indata Input device buffer + * @param col_offsets Offsets to column contents in input buffer + * @param col_lengths Length of contents of each row in column * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource to use for device memory allocation + * + * @returns Tuple of the normalized column, offsets to each row in column, and lengths of contents + * of each row */ -void normalize_whitespace(datasource::owning_buffer>& indata, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + normalize_whitespace(device_span d_input, + device_span col_offsets, + device_span col_lengths, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + } // namespace io::json::detail } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp index 7538cf7d29c..4a240d76696 100644 --- a/cpp/include/cudf/io/detail/orc.hpp +++ b/cpp/include/cudf/io/detail/orc.hpp @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp index a6945e0b7ab..1528ac0124a 100644 --- a/cpp/include/cudf/io/detail/parquet.hpp +++ b/cpp/include/cudf/io/detail/parquet.hpp @@ -25,10 +25,9 @@ #include #include #include +#include #include -#include -#include #include #include diff --git a/cpp/include/cudf/io/detail/tokenize_json.hpp b/cpp/include/cudf/io/detail/tokenize_json.hpp index 715eb855daa..a5b5caf300f 100644 --- a/cpp/include/cudf/io/detail/tokenize_json.hpp +++ b/cpp/include/cudf/io/detail/tokenize_json.hpp @@ -18,11 +18,11 @@ #include #include +#include #include #include #include -#include namespace cudf::io::json { diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 0cb39d15cd5..6798557e14e 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -20,9 +20,8 @@ #include #include - -#include -#include +#include +#include #include #include @@ -106,6 +105,8 @@ class json_reader_options { char _delimiter = '\n'; // Prune columns on read, selected based on the _dtypes option bool _prune_columns = false; + // Experimental features: new column tree construction + bool _experimental = false; // Bytes to skip from the start size_t _byte_range_offset = 0; @@ -130,6 +131,19 @@ class json_reader_options { // Whether to recover after an invalid JSON line json_recovery_mode_t _recovery_mode = json_recovery_mode_t::FAIL; + // Validation checks for spark + // Should the json validation be strict or not + // Note: strict validation enforces the JSON specification https://www.json.org/json-en.html + bool _strict_validation = false; + // Allow leading zeros for numeric values. + bool _allow_numeric_leading_zeros = true; + // Allow non-numeric numbers: NaN, +INF, -INF, +Infinity, Infinity, -Infinity + bool _allow_nonnumeric_numbers = true; + // Allow unquoted control characters + bool _allow_unquoted_control_chars = true; + // Additional values to recognize as null values + std::vector _na_values; + /** * @brief Constructor from source info. * @@ -265,6 +279,15 @@ class json_reader_options { */ [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; } + /** + * @brief Whether to enable experimental features. + * + * When set to true, experimental features, such as the new column tree construction, + * utf-8 matching of field names will be enabled. + * @return true if experimental features are enabled + */ + [[nodiscard]] bool is_enabled_experimental() const { return _experimental; } + /** * @brief Whether to parse dates as DD/MM versus MM/DD. * @@ -300,6 +323,55 @@ class json_reader_options { */ [[nodiscard]] json_recovery_mode_t recovery_mode() const { return _recovery_mode; } + /** + * @brief Whether json validation should be enforced strictly or not. + * + * @return true if it should be. + */ + [[nodiscard]] bool is_strict_validation() const { return _strict_validation; } + + /** + * @brief Whether leading zeros are allowed in numeric values. + * + * @note: This validation is enforced only if strict validation is enabled. + * + * @return true if leading zeros are allowed in numeric values + */ + [[nodiscard]] bool is_allowed_numeric_leading_zeros() const + { + return _allow_numeric_leading_zeros; + } + + /** + * @brief Whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, Infinity, + * and -Infinity. + * + * @note: This validation is enforced only if strict validation is enabled. + * + * @return true if leading zeros are allowed in numeric values + */ + [[nodiscard]] bool is_allowed_nonnumeric_numbers() const { return _allow_nonnumeric_numbers; } + + /** + * @brief Whether in a quoted string should characters greater than or equal to 0 and less than 32 + * be allowed without some form of escaping. + * + * @note: This validation is enforced only if strict validation is enabled. + * + * @return true if unquoted control chars are allowed. + */ + [[nodiscard]] bool is_allowed_unquoted_control_chars() const + { + return _allow_unquoted_control_chars; + } + + /** + * @brief Returns additional values to recognize as null values. + * + * @return Additional values to recognize as null values + */ + [[nodiscard]] std::vector const& get_na_values() const { return _na_values; } + /** * @brief Set data types for columns to be read. * @@ -392,6 +464,16 @@ class json_reader_options { */ void enable_prune_columns(bool val) { _prune_columns = val; } + /** + * @brief Set whether to enable experimental features. + * + * When set to true, experimental features, such as the new column tree construction, + * utf-8 matching of field names will be enabled. + * + * @param val Boolean value to enable/disable experimental features + */ + void enable_experimental(bool val) { _experimental = val; } + /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * @@ -429,6 +511,63 @@ class json_reader_options { * @param val An enum value to indicate the JSON reader's behavior on invalid JSON lines. */ void set_recovery_mode(json_recovery_mode_t val) { _recovery_mode = val; } + + /** + * @brief Set whether strict validation is enabled or not. + * + * @param val Boolean value to indicate whether strict validation is enabled. + */ + void set_strict_validation(bool val) { _strict_validation = val; } + + /** + * @brief Set whether leading zeros are allowed in numeric values. Strict validation + * must be enabled for this to work. + * + * @throw cudf::logic_error if `strict_validation` is not enabled before setting this option. + * + * @param val Boolean value to indicate whether leading zeros are allowed in numeric values + */ + void allow_numeric_leading_zeros(bool val) + { + CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work."); + _allow_numeric_leading_zeros = val; + } + + /** + * @brief Set whether unquoted number values should be allowed NaN, +INF, -INF, +Infinity, + * Infinity, and -Infinity. Strict validation must be enabled for this to work. + * + * @throw cudf::logic_error if `strict_validation` is not enabled before setting this option. + * + * @param val Boolean value to indicate whether leading zeros are allowed in numeric values + */ + void allow_nonnumeric_numbers(bool val) + { + CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work."); + _allow_nonnumeric_numbers = val; + } + + /** + * @brief Set whether in a quoted string should characters greater than or equal to 0 + * and less than 32 be allowed without some form of escaping. Strict validation must + * be enabled for this to work. + * + * @throw cudf::logic_error if `strict_validation` is not enabled before setting this option. + * + * @param val true to indicate whether unquoted control chars are allowed. + */ + void allow_unquoted_control_chars(bool val) + { + CUDF_EXPECTS(_strict_validation, "Strict validation must be enabled for this to work."); + _allow_unquoted_control_chars = val; + } + + /** + * @brief Sets additional values to recognize as null values. + * + * @param vals Vector of values to be considered to be null + */ + void set_na_values(std::vector vals) { _na_values = std::move(vals); } }; /** @@ -577,6 +716,21 @@ class json_reader_options_builder { return *this; } + /** + * @brief Set whether to enable experimental features. + * + * When set to true, experimental features, such as the new column tree construction, + * utf-8 matching of field names will be enabled. + * + * @param val Boolean value to enable/disable experimental features + * @return this for chaining + */ + json_reader_options_builder& experimental(bool val) + { + options._experimental = val; + return *this; + } + /** * @brief Set whether to parse dates as DD/MM versus MM/DD. * @@ -640,6 +794,76 @@ class json_reader_options_builder { return *this; } + /** + * @brief Set whether json validation should be strict or not. + * + * @param val Boolean value to indicate whether json validation should be strict or not. + * @return this for chaining + */ + json_reader_options_builder& strict_validation(bool val) + { + options.set_strict_validation(val); + return *this; + } + + /** + * @brief Set Whether leading zeros are allowed in numeric values. Strict validation must + * be enabled for this to have any effect. + * + * @throw cudf::logic_error if `strict_validation` is not enabled before setting this option. + * + * @param val Boolean value to indicate whether leading zeros are allowed in numeric values + * @return this for chaining + */ + json_reader_options_builder& numeric_leading_zeros(bool val) + { + options.allow_numeric_leading_zeros(val); + return *this; + } + + /** + * @brief Set whether specific unquoted number values are valid JSON. The values are NaN, + * +INF, -INF, +Infinity, Infinity, and -Infinity. + * Strict validation must be enabled for this to have any effect. + * + * @throw cudf::logic_error if `strict_validation` is not enabled before setting this option. + * + * @param val Boolean value to indicate if unquoted nonnumeric values are valid json or not. + * @return this for chaining + */ + json_reader_options_builder& nonnumeric_numbers(bool val) + { + options.allow_nonnumeric_numbers(val); + return *this; + } + + /** + * @brief Set whether chars >= 0 and < 32 are allowed in a quoted string without + * some form of escaping. Strict validation must be enabled for this to have any effect. + * + * @throw cudf::logic_error if `strict_validation` is not enabled before setting this option. + * + * @param val Boolean value to indicate if unquoted control chars are allowed or not. + * @return this for chaining + */ + json_reader_options_builder& unquoted_control_chars(bool val) + { + options.allow_unquoted_control_chars(val); + return *this; + } + + /** + * @brief Sets additional values to recognize as null values. + * + * @param vals Vector of values to be considered to be null + * @return this for chaining + */ + json_reader_options_builder& na_values(std::vector vals) + { + options.set_na_values(std::move(vals)); + return *this; + } + /** * @brief move json_reader_options member once it's built. */ @@ -675,7 +899,7 @@ class json_reader_options_builder { table_with_metadata read_json( json_reader_options options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group @@ -696,6 +920,8 @@ class json_writer_options_builder; class json_writer_options { // Specify the sink to use for writer output sink_info _sink; + // maximum number of rows to write in each chunk (limits memory use) + size_type _rows_per_chunk = std::numeric_limits::max(); // Set of columns to output table_view _table; // string to use for null entries @@ -704,8 +930,6 @@ class json_writer_options { bool _include_nulls = false; // Indicates whether to use JSON lines for records format bool _lines = false; - // maximum number of rows to write in each chunk (limits memory use) - size_type _rows_per_chunk = std::numeric_limits::max(); // string to use for values != 0 in INT8 types (default 'true') std::string _true_value = std::string{"true"}; // string to use for values == 0 in INT8 types (default 'false') @@ -720,7 +944,7 @@ class json_writer_options { * @param table Table to be written to output */ explicit json_writer_options(sink_info sink, table_view table) - : _sink(std::move(sink)), _table(std::move(table)), _rows_per_chunk(table.num_rows()) + : _sink(std::move(sink)), _rows_per_chunk(table.num_rows()), _table(std::move(table)) { } diff --git a/cpp/include/cudf/io/nvcomp_adapter.hpp b/cpp/include/cudf/io/nvcomp_adapter.hpp index f3260d0cb53..0d74a4158ad 100644 --- a/cpp/include/cudf/io/nvcomp_adapter.hpp +++ b/cpp/include/cudf/io/nvcomp_adapter.hpp @@ -24,7 +24,7 @@ namespace CUDF_EXPORT cudf { namespace io::nvcomp { -enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4 }; +enum class compression_type { SNAPPY, ZSTD, DEFLATE, LZ4, GZIP }; /** * @brief Set of parameters that impact whether nvCOMP features are enabled. @@ -36,33 +36,20 @@ struct feature_status_parameters { int lib_patch_version; ///< patch version bool are_all_integrations_enabled; ///< all integrations bool are_stable_integrations_enabled; ///< stable integrations - int compute_capability_major; ///< cuda compute major version /** - * @brief Default Constructor + * @brief Default constructor using the current version of nvcomp and current environment + * variables */ feature_status_parameters(); /** - * @brief feature_status_parameters Constructor + * @brief Constructor using the current version of nvcomp * - * @param major positive integer representing major value of nvcomp - * @param minor positive integer representing minor value of nvcomp - * @param patch positive integer representing patch value of nvcomp * @param all_enabled if all integrations are enabled * @param stable_enabled if stable integrations are enabled - * @param cc_major CUDA compute capability */ - feature_status_parameters( - int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major) - : lib_major_version{major}, - lib_minor_version{minor}, - lib_patch_version{patch}, - are_all_integrations_enabled{all_enabled}, - are_stable_integrations_enabled{stable_enabled}, - compute_capability_major{cc_major} - { - } + feature_status_parameters(bool all_enabled, bool stable_enabled); }; /** @@ -74,8 +61,7 @@ inline bool operator==(feature_status_parameters const& lhs, feature_status_para lhs.lib_minor_version == rhs.lib_minor_version and lhs.lib_patch_version == rhs.lib_patch_version and lhs.are_all_integrations_enabled == rhs.are_all_integrations_enabled and - lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled and - lhs.compute_capability_major == rhs.compute_capability_major; + lhs.are_stable_integrations_enabled == rhs.are_stable_integrations_enabled; } /** diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp index 8d484b15872..163fa20806d 100644 --- a/cpp/include/cudf/io/orc.hpp +++ b/cpp/include/cudf/io/orc.hpp @@ -21,9 +21,7 @@ #include #include #include - -#include -#include +#include #include #include @@ -409,7 +407,7 @@ class orc_reader_options_builder { table_with_metadata read_orc( orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief The chunked orc reader class to read an ORC file iteratively into a series of @@ -479,7 +477,7 @@ class chunked_orc_reader { size_type output_row_granularity, orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct the reader from input/output size limits along with other ORC reader options. @@ -500,7 +498,7 @@ class chunked_orc_reader { std::size_t pass_read_limit, orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct the reader from output size limits along with other ORC reader options. @@ -518,7 +516,7 @@ class chunked_orc_reader { std::size_t chunk_read_limit, orc_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Destructor, destroying the internal reader instance. diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp index 12897ac77ef..ee03a382bec 100644 --- a/cpp/include/cudf/io/parquet.hpp +++ b/cpp/include/cudf/io/parquet.hpp @@ -22,9 +22,7 @@ #include #include #include - -#include -#include +#include #include #include @@ -41,8 +39,9 @@ namespace io { * @file */ -constexpr size_t default_row_group_size_bytes = 128 * 1024 * 1024; ///< 128MB per row group -constexpr size_type default_row_group_size_rows = 1000000; ///< 1 million rows per row group +constexpr size_t default_row_group_size_bytes = + std::numeric_limits::max(); ///< Infinite bytes per row group +constexpr size_type default_row_group_size_rows = 1'000'000; ///< 1 million rows per row group constexpr size_t default_max_page_size_bytes = 512 * 1024; ///< 512KB per page constexpr size_type default_max_page_size_rows = 20000; ///< 20k rows per page constexpr int32_t default_column_index_truncate_length = 64; ///< truncate to 64 bytes @@ -76,6 +75,8 @@ class parquet_reader_options { bool _use_pandas_metadata = true; // Whether to read and use ARROW schema bool _use_arrow_schema = true; + // Whether to allow reading matching select columns from mismatched Parquet files. + bool _allow_mismatched_pq_schemas = false; // Cast timestamp columns to a specific type data_type _timestamp_type{type_id::EMPTY}; @@ -138,6 +139,18 @@ class parquet_reader_options { */ [[nodiscard]] bool is_enabled_use_arrow_schema() const { return _use_arrow_schema; } + /** + * @brief Returns true/false depending on whether to read matching projected and filter columns + * from mismatched Parquet sources. + * + * @return `true` if mismatched projected and filter columns will be read from mismatched Parquet + * sources. + */ + [[nodiscard]] bool is_enabled_allow_mismatched_pq_schemas() const + { + return _allow_mismatched_pq_schemas; + } + /** * @brief Returns optional tree of metadata. * @@ -258,6 +271,15 @@ class parquet_reader_options { */ void enable_use_arrow_schema(bool val) { _use_arrow_schema = val; } + /** + * @brief Sets to enable/disable reading of matching projected and filter columns from mismatched + * Parquet sources. + * + * @param val Boolean value whether to read matching projected and filter columns from mismatched + * Parquet sources. + */ + void enable_allow_mismatched_pq_schemas(bool val) { _allow_mismatched_pq_schemas = val; } + /** * @brief Sets reader column schema. * @@ -382,6 +404,20 @@ class parquet_reader_options_builder { return *this; } + /** + * @brief Sets to enable/disable reading of matching projected and filter columns from mismatched + * Parquet sources. + * + * @param val Boolean value whether to read matching projected and filter columns from mismatched + * Parquet sources. + * @return this for chaining. + */ + parquet_reader_options_builder& allow_mismatched_pq_schemas(bool val) + { + options._allow_mismatched_pq_schemas = val; + return *this; + } + /** * @brief Sets reader metadata. * @@ -465,7 +501,7 @@ class parquet_reader_options_builder { table_with_metadata read_parquet( parquet_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief The chunked parquet reader class to read Parquet file iteratively in to a series of @@ -503,7 +539,7 @@ class chunked_parquet_reader { std::size_t chunk_read_limit, parquet_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Constructor for chunked reader. @@ -529,7 +565,7 @@ class chunked_parquet_reader { std::size_t pass_read_limit, parquet_reader_options const& options, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Destructor, destroying the internal reader instance. diff --git a/cpp/include/cudf/io/text/byte_range_info.hpp b/cpp/include/cudf/io/text/byte_range_info.hpp index 7e9256be1d3..5f3c91dc99c 100644 --- a/cpp/include/cudf/io/text/byte_range_info.hpp +++ b/cpp/include/cudf/io/text/byte_range_info.hpp @@ -16,7 +16,6 @@ #pragma once -#include #include #include @@ -40,53 +39,49 @@ class byte_range_info { int64_t _size{}; ///< size in bytes public: - constexpr byte_range_info() = default; + byte_range_info() = default; /** * @brief Constructs a byte_range_info object * * @param offset offset in bytes * @param size size in bytes */ - constexpr byte_range_info(int64_t offset, int64_t size) : _offset(offset), _size(size) - { - CUDF_EXPECTS(offset >= 0, "offset must be non-negative"); - CUDF_EXPECTS(size >= 0, "size must be non-negative"); - } + byte_range_info(int64_t offset, int64_t size); /** * @brief Copy constructor * * @param other byte_range_info object to copy */ - constexpr byte_range_info(byte_range_info const& other) noexcept = default; + byte_range_info(byte_range_info const& other) noexcept = default; /** * @brief Copy assignment operator * * @param other byte_range_info object to copy * @return this object after copying */ - constexpr byte_range_info& operator=(byte_range_info const& other) noexcept = default; + byte_range_info& operator=(byte_range_info const& other) noexcept = default; /** * @brief Get the offset in bytes * * @return Offset in bytes */ - [[nodiscard]] constexpr int64_t offset() { return _offset; } + [[nodiscard]] int64_t offset() const { return _offset; } /** * @brief Get the size in bytes * * @return Size in bytes */ - [[nodiscard]] constexpr int64_t size() { return _size; } + [[nodiscard]] int64_t size() const { return _size; } /** * @brief Returns whether the span is empty. * - * @return true iff the span is empty, i.e. `size() == 0` + * @return true iff the range is empty, i.e. `size() == 0` */ - [[nodiscard]] constexpr bool empty() { return size() == 0; } + [[nodiscard]] bool is_empty() const { return size() == 0; } }; /** diff --git a/cpp/include/cudf/io/text/detail/trie.hpp b/cpp/include/cudf/io/text/detail/trie.hpp index eee3fefc79f..70e06eeac93 100644 --- a/cpp/include/cudf/io/text/detail/trie.hpp +++ b/cpp/include/cudf/io/text/detail/trie.hpp @@ -22,7 +22,6 @@ #include #include -#include #include #include diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp index 8624a386d0f..99f9e7534ac 100644 --- a/cpp/include/cudf/io/text/multibyte_split.hpp +++ b/cpp/include/cudf/io/text/multibyte_split.hpp @@ -19,10 +19,9 @@ #include #include #include +#include #include -#include -#include #include #include @@ -94,27 +93,7 @@ std::unique_ptr multibyte_split( std::string const& delimiter, parse_options options = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); - -/** - * @brief Splits the source text into a strings column using a multiple byte delimiter. - * - * @deprecated Since 24.08 - * - * @param source The source input data encoded in UTF-8 - * @param delimiter UTF-8 encoded string for which to find offsets in the source - * @param byte_range The position and size within `source` to produce the column from - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Memory resource to use for the device memory allocation - * @return The strings found by splitting the source by the delimiter within the relevant byte - * range. - */ -[[deprecated]] std::unique_ptr multibyte_split( - data_chunk_source const& source, - std::string const& delimiter, - std::optional byte_range, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp index 3df737413fa..a34881942ce 100644 --- a/cpp/include/cudf/io/types.hpp +++ b/cpp/include/cudf/io/types.hpp @@ -54,7 +54,7 @@ namespace io { /** * @brief Compression algorithms */ -enum class compression_type { +enum class compression_type : int32_t { NONE, ///< No compression AUTO, ///< Automatically detect or select compression format SNAPPY, ///< Snappy format, using byte-oriented LZ77 @@ -72,7 +72,7 @@ enum class compression_type { /** * @brief Data source or destination types */ -enum class io_type { +enum class io_type : int32_t { FILEPATH, ///< Input/output is a file path HOST_BUFFER, ///< Input/output is a buffer in host memory DEVICE_BUFFER, ///< Input/output is a buffer in device memory @@ -83,7 +83,7 @@ enum class io_type { /** * @brief Behavior when handling quotations in field data */ -enum class quote_style { +enum class quote_style : int32_t { MINIMAL, ///< Quote only fields which contain special characters ALL, ///< Quote all fields NONNUMERIC, ///< Quote all non-numeric fields @@ -93,7 +93,7 @@ enum class quote_style { /** * @brief Column statistics granularity type for parquet/orc writers */ -enum statistics_freq { +enum statistics_freq : int32_t { STATISTICS_NONE = 0, ///< No column statistics STATISTICS_ROWGROUP = 1, ///< Per-Rowgroup column statistics STATISTICS_PAGE = 2, ///< Per-page column statistics @@ -103,7 +103,7 @@ enum statistics_freq { /** * @brief Valid encodings for use with `column_in_metadata::set_encoding()` */ -enum class column_encoding { +enum class column_encoding : int32_t { // Common encodings: USE_DEFAULT = -1, ///< No encoding has been requested, use default encoding DICTIONARY, ///< Use dictionary encoding @@ -222,7 +222,7 @@ class writer_compression_statistics { /** * @brief Control use of dictionary encoding for parquet writer */ -enum dictionary_policy { +enum dictionary_policy : int32_t { NEVER = 0, ///< Never use dictionary encoding ADAPTIVE = 1, ///< Use dictionary when it will not impact compression ALWAYS = 2 ///< Use dictionary regardless of impact on compression diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index f4139721475..a590eb27511 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -22,12 +22,11 @@ #include #include #include +#include #include #include #include -#include -#include #include #include @@ -98,6 +97,7 @@ class distinct_hash_join; * @param[in] right_keys The right table * @param[in] compare_nulls controls whether null join-key values * should match or not. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -109,7 +109,8 @@ std::pair>, inner_join(cudf::table_view const& left_keys, cudf::table_view const& right_keys, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a pair of row index vectors corresponding to a @@ -138,6 +139,7 @@ inner_join(cudf::table_view const& left_keys, * @param[in] right_keys The right table * @param[in] compare_nulls controls whether null join-key values * should match or not. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -149,7 +151,8 @@ std::pair>, left_join(cudf::table_view const& left_keys, cudf::table_view const& right_keys, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a pair of row index vectors corresponding to a @@ -177,6 +180,7 @@ left_join(cudf::table_view const& left_keys, * @param[in] right_keys The right table * @param[in] compare_nulls controls whether null join-key values * should match or not. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -188,7 +192,8 @@ std::pair>, full_join(cudf::table_view const& left_keys, cudf::table_view const& right_keys, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a vector of row indices corresponding to a left semi-join @@ -206,6 +211,7 @@ full_join(cudf::table_view const& left_keys, * @param left_keys The left table * @param right_keys The right table * @param compare_nulls Controls whether null join-key values should match or not + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A vector `left_indices` that can be used to construct @@ -216,7 +222,8 @@ std::unique_ptr> left_semi_join( cudf::table_view const& left_keys, cudf::table_view const& right_keys, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a vector of row indices corresponding to a left anti join @@ -237,6 +244,7 @@ std::unique_ptr> left_semi_join( * @param[in] right_keys The right table * @param[in] compare_nulls controls whether null join-key values * should match or not. + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A column `left_indices` that can be used to construct @@ -247,7 +255,8 @@ std::unique_ptr> left_anti_join( cudf::table_view const& left_keys, cudf::table_view const& right_keys, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a cross join on two tables (`left`, `right`) @@ -267,6 +276,7 @@ std::unique_ptr> left_anti_join( * * @param left The left table * @param right The right table + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * * @return Result of cross joining `left` and `right` tables @@ -274,7 +284,8 @@ std::unique_ptr> left_anti_join( std::unique_ptr cross_join( cudf::table_view const& left, cudf::table_view const& right, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief The enum class to specify if any of the input join tables (`build` table and any later @@ -353,7 +364,7 @@ class hash_join { inner_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** * Returns the row indices that can be used to construct the result of performing @@ -378,7 +389,7 @@ class hash_join { left_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** * Returns the row indices that can be used to construct the result of performing @@ -403,7 +414,7 @@ class hash_join { full_join(cudf::table_view const& probe, std::optional output_size = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** * Returns the exact number of matches (rows) when performing an inner join with the specified @@ -455,7 +466,7 @@ class hash_join { [[nodiscard]] std::size_t full_join_size( cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: const std::unique_ptr _impl; @@ -511,7 +522,7 @@ class distinct_hash_join { [[nodiscard]] std::pair>, std::unique_ptr>> inner_join(rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; /** * @brief Returns the build table indices that can be used to construct the result of performing @@ -530,7 +541,7 @@ class distinct_hash_join { */ [[nodiscard]] std::unique_ptr> left_join( rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) const; + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) const; private: using impl_type = typename cudf::detail::distinct_hash_join; ///< Implementation type @@ -568,6 +579,7 @@ class distinct_hash_join { * @param right The right table * @param binary_predicate The condition on which to join * @param output_size Optional value which allows users to specify the exact output size + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -579,7 +591,8 @@ conditional_inner_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, std::optional output_size = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a pair of row index vectors corresponding to all pairs @@ -613,6 +626,7 @@ conditional_inner_join(table_view const& left, * @param right The right table * @param binary_predicate The condition on which to join * @param output_size Optional value which allows users to specify the exact output size + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -624,7 +638,8 @@ conditional_left_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, std::optional output_size = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a pair of row index vectors corresponding to all pairs @@ -656,6 +671,7 @@ conditional_left_join(table_view const& left, * @param left The left table * @param right The right table * @param binary_predicate The condition on which to join + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -666,7 +682,8 @@ std::pair>, conditional_full_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns an index vector corresponding to all rows in the left table @@ -694,6 +711,7 @@ conditional_full_join(table_view const& left, * @param right The right table * @param binary_predicate The condition on which to join * @param output_size Optional value which allows users to specify the exact output size + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A vector `left_indices` that can be used to construct the result of @@ -705,7 +723,8 @@ std::unique_ptr> conditional_left_semi_join( table_view const& right, ast::expression const& binary_predicate, std::optional output_size = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns an index vector corresponding to all rows in the left table @@ -733,6 +752,7 @@ std::unique_ptr> conditional_left_semi_join( * @param right The right table * @param binary_predicate The condition on which to join * @param output_size Optional value which allows users to specify the exact output size + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A vector `left_indices` that can be used to construct the result of @@ -744,7 +764,8 @@ std::unique_ptr> conditional_left_anti_join( table_view const& right, ast::expression const& binary_predicate, std::optional output_size = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a pair of row index vectors corresponding to all pairs of @@ -787,6 +808,7 @@ std::unique_ptr> conditional_left_anti_join( * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_inner_join_size API). + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -802,7 +824,8 @@ mixed_inner_join( ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, std::optional>> output_size_data = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a pair of row index vectors corresponding to all pairs of @@ -847,6 +870,7 @@ mixed_inner_join( * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_left_join_size API). + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -862,7 +886,8 @@ mixed_left_join( ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, std::optional>> output_size_data = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a pair of row index vectors corresponding to all pairs of @@ -907,6 +932,7 @@ mixed_left_join( * @param output_size_data An optional pair of values indicating the exact output size and the * number of matches for each row in the larger of the two input tables, left or right (may be * precomputed using the corresponding mixed_full_join_size API). + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -922,7 +948,8 @@ mixed_full_join( ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, std::optional>> output_size_data = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns an index vector corresponding to all rows in the left tables @@ -957,6 +984,7 @@ mixed_full_join( * @param right_conditional The right table used for the conditional join * @param binary_predicate The condition on which to join * @param compare_nulls Whether or not null values join to each other or not + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -969,7 +997,8 @@ std::unique_ptr> mixed_left_semi_join( table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns an index vector corresponding to all rows in the left tables @@ -1005,6 +1034,7 @@ std::unique_ptr> mixed_left_semi_join( * @param right_conditional The right table used for the conditional join * @param binary_predicate The condition on which to join * @param compare_nulls Whether or not null values join to each other or not + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair of vectors [`left_indices`, `right_indices`] that can be used to construct @@ -1017,7 +1047,8 @@ std::unique_ptr> mixed_left_anti_join( table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1042,6 +1073,7 @@ std::unique_ptr> mixed_left_anti_join( * @param right_conditional The right table used for the conditional join * @param binary_predicate The condition on which to join * @param compare_nulls Whether or not null values join to each other or not + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair containing the size that would result from performing the @@ -1057,7 +1089,8 @@ std::pair>> mixed_in table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1082,6 +1115,7 @@ std::pair>> mixed_in * @param right_conditional The right table used for the conditional join * @param binary_predicate The condition on which to join * @param compare_nulls Whether or not null values join to each other or not + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return A pair containing the size that would result from performing the @@ -1097,7 +1131,8 @@ std::pair>> mixed_le table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1112,6 +1147,7 @@ std::pair>> mixed_le * @param left The left table * @param right The right table * @param binary_predicate The condition on which to join + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return The size that would result from performing the requested join @@ -1120,7 +1156,8 @@ std::size_t conditional_inner_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1135,6 +1172,7 @@ std::size_t conditional_inner_join_size( * @param left The left table * @param right The right table * @param binary_predicate The condition on which to join + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return The size that would result from performing the requested join @@ -1143,7 +1181,8 @@ std::size_t conditional_left_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1158,6 +1197,7 @@ std::size_t conditional_left_join_size( * @param left The left table * @param right The right table * @param binary_predicate The condition on which to join + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return The size that would result from performing the requested join @@ -1166,7 +1206,8 @@ std::size_t conditional_left_semi_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the exact number of matches (rows) when performing a @@ -1181,6 +1222,7 @@ std::size_t conditional_left_semi_join_size( * @param left The left table * @param right The right table * @param binary_predicate The condition on which to join + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table and columns' device memory * * @return The size that would result from performing the requested join @@ -1189,6 +1231,7 @@ std::size_t conditional_left_anti_join_size( table_view const& left, table_view const& right, ast::expression const& binary_predicate, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/json/json.hpp b/cpp/include/cudf/json/json.hpp index 48d5dcf7727..2ad3421d27d 100644 --- a/cpp/include/cudf/json/json.hpp +++ b/cpp/include/cudf/json/json.hpp @@ -18,11 +18,7 @@ #include #include #include - -#include -#include - -#include +#include namespace CUDF_EXPORT cudf { @@ -171,7 +167,7 @@ std::unique_ptr get_json_object( cudf::string_scalar const& json_path, get_json_object_options options = get_json_object_options{}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/labeling/label_bins.hpp b/cpp/include/cudf/labeling/label_bins.hpp index 7eb25134ca5..1d0ead35d96 100644 --- a/cpp/include/cudf/labeling/label_bins.hpp +++ b/cpp/include/cudf/labeling/label_bins.hpp @@ -19,10 +19,9 @@ #include #include #include +#include #include -#include -#include namespace CUDF_EXPORT cudf { @@ -76,7 +75,7 @@ std::unique_ptr label_bins( column_view const& right_edges, inclusive right_inclusive, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/lists/combine.hpp b/cpp/include/cudf/lists/combine.hpp index 5a310e6651f..fd2f42cf649 100644 --- a/cpp/include/cudf/lists/combine.hpp +++ b/cpp/include/cudf/lists/combine.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { @@ -68,7 +66,7 @@ std::unique_ptr concatenate_rows( table_view const& input, concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Concatenating multiple lists on the same row of a lists column into a single list. @@ -99,7 +97,7 @@ std::unique_ptr concatenate_list_elements( column_view const& input, concatenate_null_policy null_policy = concatenate_null_policy::IGNORE, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp index cd0a216488c..e498c60682e 100644 --- a/cpp/include/cudf/lists/contains.hpp +++ b/cpp/include/cudf/lists/contains.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace lists { @@ -52,7 +50,7 @@ std::unique_ptr contains( cudf::lists_column_view const& lists, cudf::scalar const& search_key, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a column of `bool` values indicating whether the list rows of the first @@ -76,7 +74,7 @@ std::unique_ptr contains( cudf::lists_column_view const& lists, cudf::column_view const& search_keys, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a column of `bool` values indicating whether each row in the `lists` column @@ -98,7 +96,7 @@ std::unique_ptr contains( std::unique_ptr contains_nulls( cudf::lists_column_view const& lists, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Option to choose whether `index_of()` returns the first or last match @@ -142,7 +140,7 @@ std::unique_ptr index_of( cudf::scalar const& search_key, duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a column of values indicating the position of a search key @@ -179,7 +177,7 @@ std::unique_ptr index_of( cudf::column_view const& search_keys, duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/count_elements.hpp b/cpp/include/cudf/lists/count_elements.hpp index a6f2ea6e68a..e7d50f11099 100644 --- a/cpp/include/cudf/lists/count_elements.hpp +++ b/cpp/include/cudf/lists/count_elements.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace lists { @@ -54,7 +52,7 @@ namespace lists { std::unique_ptr count_elements( lists_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of lists_elements group diff --git a/cpp/include/cudf/lists/detail/combine.hpp b/cpp/include/cudf/lists/detail/combine.hpp index 07309da2814..ee7a6a465c3 100644 --- a/cpp/include/cudf/lists/detail/combine.hpp +++ b/cpp/include/cudf/lists/detail/combine.hpp @@ -18,8 +18,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp index edfa3355dcd..d3a3a48dbb2 100644 --- a/cpp/include/cudf/lists/detail/concatenate.hpp +++ b/cpp/include/cudf/lists/detail/concatenate.hpp @@ -19,10 +19,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/contains.hpp b/cpp/include/cudf/lists/detail/contains.hpp index 1ca3651b55a..9d30ef90723 100644 --- a/cpp/include/cudf/lists/detail/contains.hpp +++ b/cpp/include/cudf/lists/detail/contains.hpp @@ -17,8 +17,7 @@ #include #include - -#include +#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/copying.hpp b/cpp/include/cudf/lists/detail/copying.hpp index 76154ae7064..04e6b18cd27 100644 --- a/cpp/include/cudf/lists/detail/copying.hpp +++ b/cpp/include/cudf/lists/detail/copying.hpp @@ -16,9 +16,9 @@ #pragma once #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/extract.hpp b/cpp/include/cudf/lists/detail/extract.hpp index e14b93ff912..7448f513788 100644 --- a/cpp/include/cudf/lists/detail/extract.hpp +++ b/cpp/include/cudf/lists/detail/extract.hpp @@ -17,8 +17,7 @@ #include #include - -#include +#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh index 294282d7caa..31b18c90c68 100644 --- a/cpp/include/cudf/lists/detail/gather.cuh +++ b/cpp/include/cudf/lists/detail/gather.cuh @@ -22,11 +22,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/lists/detail/interleave_columns.hpp b/cpp/include/cudf/lists/detail/interleave_columns.hpp index ae8caa853f3..ebf554f0964 100644 --- a/cpp/include/cudf/lists/detail/interleave_columns.hpp +++ b/cpp/include/cudf/lists/detail/interleave_columns.hpp @@ -17,9 +17,9 @@ #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/lists_column_factories.hpp b/cpp/include/cudf/lists/detail/lists_column_factories.hpp index 18d66f15b1e..b726264aa65 100644 --- a/cpp/include/cudf/lists/detail/lists_column_factories.hpp +++ b/cpp/include/cudf/lists/detail/lists_column_factories.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/reverse.hpp b/cpp/include/cudf/lists/detail/reverse.hpp index d10d7784e6c..a5a86f4d44d 100644 --- a/cpp/include/cudf/lists/detail/reverse.hpp +++ b/cpp/include/cudf/lists/detail/reverse.hpp @@ -17,8 +17,7 @@ #include #include - -#include +#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index be76e456900..51f2fa3cd23 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -26,11 +26,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/lists/detail/scatter_helper.cuh b/cpp/include/cudf/lists/detail/scatter_helper.cuh index fc44e0bc290..49678c97554 100644 --- a/cpp/include/cudf/lists/detail/scatter_helper.cuh +++ b/cpp/include/cudf/lists/detail/scatter_helper.cuh @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/include/cudf/lists/detail/set_operations.hpp b/cpp/include/cudf/lists/detail/set_operations.hpp index abfcef72d47..51293969e58 100644 --- a/cpp/include/cudf/lists/detail/set_operations.hpp +++ b/cpp/include/cudf/lists/detail/set_operations.hpp @@ -19,10 +19,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/sorting.hpp b/cpp/include/cudf/lists/detail/sorting.hpp index 8cbfbbae769..748fb7acfee 100644 --- a/cpp/include/cudf/lists/detail/sorting.hpp +++ b/cpp/include/cudf/lists/detail/sorting.hpp @@ -16,9 +16,9 @@ #pragma once #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace lists::detail { diff --git a/cpp/include/cudf/lists/detail/stream_compaction.hpp b/cpp/include/cudf/lists/detail/stream_compaction.hpp index c11e07cd190..fa7c0c173d2 100644 --- a/cpp/include/cudf/lists/detail/stream_compaction.hpp +++ b/cpp/include/cudf/lists/detail/stream_compaction.hpp @@ -18,18 +18,15 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace lists::detail { /** - * @copydoc cudf::lists::apply_boolean_mask(lists_column_view const&, lists_column_view const&, - * rmm::device_async_resource_ref) - * - * @param stream CUDA stream used for device memory operations and kernel launches + * @copydoc cudf::lists::apply_boolean_mask */ std::unique_ptr apply_boolean_mask(lists_column_view const& input, lists_column_view const& boolean_mask, @@ -37,9 +34,7 @@ std::unique_ptr apply_boolean_mask(lists_column_view const& input, rmm::device_async_resource_ref mr); /** - * @copydoc cudf::list::distinct - * - * @param stream CUDA stream used for device memory operations and kernel launches. + * @copydoc cudf::lists::distinct */ std::unique_ptr distinct(lists_column_view const& input, null_equality nulls_equal, diff --git a/cpp/include/cudf/lists/explode.hpp b/cpp/include/cudf/lists/explode.hpp index a3375887815..23745e8a443 100644 --- a/cpp/include/cudf/lists/explode.hpp +++ b/cpp/include/cudf/lists/explode.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -75,7 +73,7 @@ std::unique_ptr
explode( table_view const& input_table, size_type explode_column_idx, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Explodes a list column's elements and includes a position column. @@ -121,7 +119,7 @@ std::unique_ptr
explode_position( table_view const& input_table, size_type explode_column_idx, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Explodes a list column's elements retaining any null entries or empty lists inside. @@ -165,7 +163,7 @@ std::unique_ptr
explode_outer( table_view const& input_table, size_type explode_column_idx, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Explodes a list column's elements retaining any null entries or empty lists and includes a @@ -211,7 +209,7 @@ std::unique_ptr
explode_outer_position( table_view const& input_table, size_type explode_column_idx, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/lists/extract.hpp b/cpp/include/cudf/lists/extract.hpp index 29a02308c66..f584dff6bed 100644 --- a/cpp/include/cudf/lists/extract.hpp +++ b/cpp/include/cudf/lists/extract.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace lists { @@ -69,7 +67,7 @@ std::unique_ptr extract_list_element( lists_column_view const& lists_column, size_type index, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a column where each row is a single element from the corresponding sublist @@ -110,7 +108,7 @@ std::unique_ptr extract_list_element( lists_column_view const& lists_column, column_view const& indices, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/filling.hpp b/cpp/include/cudf/lists/filling.hpp index a1f3c37ad9e..d887a844aba 100644 --- a/cpp/include/cudf/lists/filling.hpp +++ b/cpp/include/cudf/lists/filling.hpp @@ -18,10 +18,9 @@ #include #include +#include #include -#include -#include #include @@ -69,7 +68,7 @@ std::unique_ptr sequences( column_view const& starts, column_view const& sizes, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a lists column in which each row contains a sequence of values specified by a tuple @@ -111,7 +110,7 @@ std::unique_ptr sequences( column_view const& steps, column_view const& sizes, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/gather.hpp b/cpp/include/cudf/lists/gather.hpp index 6359e0488c9..3e3c09cfea1 100644 --- a/cpp/include/cudf/lists/gather.hpp +++ b/cpp/include/cudf/lists/gather.hpp @@ -20,9 +20,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace lists { @@ -77,7 +75,7 @@ std::unique_ptr segmented_gather( lists_column_view const& gather_map_list, out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/reverse.hpp b/cpp/include/cudf/lists/reverse.hpp index f00e6e5117a..0c99dcbe8ae 100644 --- a/cpp/include/cudf/lists/reverse.hpp +++ b/cpp/include/cudf/lists/reverse.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include #include @@ -52,7 +50,7 @@ namespace lists { std::unique_ptr reverse( lists_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group diff --git a/cpp/include/cudf/lists/set_operations.hpp b/cpp/include/cudf/lists/set_operations.hpp index 55b1591fc44..f8ea972528c 100644 --- a/cpp/include/cudf/lists/set_operations.hpp +++ b/cpp/include/cudf/lists/set_operations.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace lists { @@ -64,7 +64,7 @@ std::unique_ptr have_overlap( null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a lists column of distinct elements common to two input lists columns. @@ -101,7 +101,7 @@ std::unique_ptr intersect_distinct( null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a lists column of distinct elements found in either of two input lists columns. @@ -138,7 +138,7 @@ std::unique_ptr union_distinct( null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a lists column of distinct elements found only in the left input column. @@ -175,7 +175,7 @@ std::unique_ptr difference_distinct( null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/sorting.hpp b/cpp/include/cudf/lists/sorting.hpp index 39c71f6e9fa..ee18ed57c57 100644 --- a/cpp/include/cudf/lists/sorting.hpp +++ b/cpp/include/cudf/lists/sorting.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace lists { @@ -58,7 +56,7 @@ std::unique_ptr sort_lists( order column_order, null_order null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Segmented sort of the elements within a list in each row of a list column using stable @@ -71,7 +69,7 @@ std::unique_ptr stable_sort_lists( order column_order, null_order null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace lists diff --git a/cpp/include/cudf/lists/stream_compaction.hpp b/cpp/include/cudf/lists/stream_compaction.hpp index 28ef13cd870..59b53c10ac9 100644 --- a/cpp/include/cudf/lists/stream_compaction.hpp +++ b/cpp/include/cudf/lists/stream_compaction.hpp @@ -18,10 +18,9 @@ #include #include #include +#include #include -#include -#include namespace CUDF_EXPORT cudf { namespace lists { @@ -65,7 +64,7 @@ std::unique_ptr apply_boolean_mask( lists_column_view const& input, lists_column_view const& boolean_mask, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a new list column without duplicate elements in each list. @@ -92,7 +91,7 @@ std::unique_ptr distinct( null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/merge.hpp b/cpp/include/cudf/merge.hpp index 83c6ff04500..18701bf8ec6 100644 --- a/cpp/include/cudf/merge.hpp +++ b/cpp/include/cudf/merge.hpp @@ -18,9 +18,7 @@ #include #include - -#include -#include +#include #include #include @@ -109,6 +107,6 @@ std::unique_ptr merge( std::vector const& column_order, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/null_mask.hpp b/cpp/include/cudf/null_mask.hpp index 70ca6aa29c5..fe719bf2c62 100644 --- a/cpp/include/cudf/null_mask.hpp +++ b/cpp/include/cudf/null_mask.hpp @@ -18,11 +18,10 @@ #include #include #include +#include #include #include -#include -#include #include @@ -92,7 +91,7 @@ rmm::device_buffer create_null_mask( size_type size, mask_state state, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Sets a pre-allocated bitmask buffer to a given state in the range @@ -135,7 +134,7 @@ rmm::device_buffer copy_bitmask( size_type begin_bit, size_type end_bit, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Copies `view`'s bitmask from the bits @@ -152,7 +151,7 @@ rmm::device_buffer copy_bitmask( rmm::device_buffer copy_bitmask( column_view const& view, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs bitwise AND of the bitmasks of columns of a table. Returns @@ -169,7 +168,7 @@ rmm::device_buffer copy_bitmask( std::pair bitmask_and( table_view const& view, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs bitwise OR of the bitmasks of columns of a table. Returns @@ -186,7 +185,7 @@ std::pair bitmask_and( std::pair bitmask_or( table_view const& view, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Given a validity bitmask, counts the number of null elements (unset bits) diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp index 6a53553063e..385da993262 100644 --- a/cpp/include/cudf/partitioning.hpp +++ b/cpp/include/cudf/partitioning.hpp @@ -19,10 +19,9 @@ #include #include #include +#include #include -#include -#include #include #include @@ -80,7 +79,7 @@ std::pair, std::vector> partition( table_view const& t, column_view const& partition_map, size_type num_partitions, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Partitions rows from the input table into multiple output tables. @@ -109,7 +108,7 @@ std::pair, std::vector> hash_partition( hash_id hash_function = hash_id::HASH_MURMUR3, uint32_t seed = DEFAULT_HASH_SEED, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Round-robin partition. @@ -252,7 +251,7 @@ std::pair, std::vector> round_robi table_view const& input, cudf::size_type num_partitions, cudf::size_type start_partition = 0, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/quantiles.hpp b/cpp/include/cudf/quantiles.hpp index 47eac2e72f9..f6bae170f03 100644 --- a/cpp/include/cudf/quantiles.hpp +++ b/cpp/include/cudf/quantiles.hpp @@ -21,9 +21,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { /** @@ -61,7 +59,7 @@ std::unique_ptr quantile( interpolation interp = interpolation::LINEAR, column_view const& ordered_indices = {}, bool exact = true, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the rows of the input corresponding to the requested quantiles. @@ -100,7 +98,7 @@ std::unique_ptr
quantiles( cudf::sorted is_input_sorted = sorted::NO, std::vector const& column_order = {}, std::vector const& null_precedence = {}, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Calculate approximate percentiles on an input tdigest column. @@ -127,7 +125,7 @@ std::unique_ptr
quantiles( std::unique_ptr percentile_approx( tdigest::tdigest_column_view const& input, column_view const& percentiles, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp index e42ff5df15d..41be2e70cc3 100644 --- a/cpp/include/cudf/reduction.hpp +++ b/cpp/include/cudf/reduction.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -85,7 +83,7 @@ std::unique_ptr reduce( reduce_aggregation const& agg, data_type output_dtype, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the reduction of the values in all rows of a column with an initial value @@ -109,7 +107,7 @@ std::unique_ptr reduce( data_type output_dtype, std::optional> init, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Compute reduction of each segment in the input column @@ -161,7 +159,7 @@ std::unique_ptr segmented_reduce( data_type output_dtype, null_policy null_handling, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Compute reduction of each segment in the input column with an initial value. Only SUM, @@ -188,7 +186,7 @@ std::unique_ptr segmented_reduce( null_policy null_handling, std::optional> init, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the scan of a column. @@ -214,7 +212,7 @@ std::unique_ptr scan( scan_type inclusive, null_policy null_handling = null_policy::EXCLUDE, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Determines the minimum and maximum values of a column. @@ -229,7 +227,7 @@ std::unique_ptr scan( std::pair, std::unique_ptr> minmax( column_view const& col, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/reduction/detail/histogram.hpp b/cpp/include/cudf/reduction/detail/histogram.hpp index 5b17df47ec7..c990db32977 100644 --- a/cpp/include/cudf/reduction/detail/histogram.hpp +++ b/cpp/include/cudf/reduction/detail/histogram.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/include/cudf/reduction/detail/reduction.cuh b/cpp/include/cudf/reduction/detail/reduction.cuh index 7d1754d86f2..37e1545bcf2 100644 --- a/cpp/include/cudf/reduction/detail/reduction.cuh +++ b/cpp/include/cudf/reduction/detail/reduction.cuh @@ -20,13 +20,13 @@ #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/reduction/detail/reduction.hpp b/cpp/include/cudf/reduction/detail/reduction.hpp index a15783fb460..fd0e3abb529 100644 --- a/cpp/include/cudf/reduction/detail/reduction.hpp +++ b/cpp/include/cudf/reduction/detail/reduction.hpp @@ -20,8 +20,7 @@ #include #include #include - -#include +#include #include diff --git a/cpp/include/cudf/reduction/detail/reduction_functions.hpp b/cpp/include/cudf/reduction/detail/reduction_functions.hpp index fa21dc87e64..b40211a54ad 100644 --- a/cpp/include/cudf/reduction/detail/reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/reduction_functions.hpp @@ -21,9 +21,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp index 1c55b387454..af45a14874b 100644 --- a/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp +++ b/cpp/include/cudf/reduction/detail/segmented_reduction_functions.hpp @@ -19,11 +19,10 @@ #include #include #include -#include #include +#include #include -#include #include diff --git a/cpp/include/cudf/replace.hpp b/cpp/include/cudf/replace.hpp index 43aabd6c6c6..8d8510da5ea 100644 --- a/cpp/include/cudf/replace.hpp +++ b/cpp/include/cudf/replace.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -58,7 +56,7 @@ std::unique_ptr replace_nulls( column_view const& input, column_view const& replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Replaces all null values in a column with a scalar. @@ -77,7 +75,7 @@ std::unique_ptr replace_nulls( column_view const& input, scalar const& replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Replaces all null values in a column with the first non-null value that precedes/follows. @@ -96,7 +94,7 @@ std::unique_ptr replace_nulls( column_view const& input, replace_policy const& replace_policy, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Replaces all NaN values in a column with corresponding values from another column @@ -124,7 +122,7 @@ std::unique_ptr replace_nans( column_view const& input, column_view const& replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Replaces all NaN values in a column with a scalar @@ -151,7 +149,7 @@ std::unique_ptr replace_nans( column_view const& input, scalar const& replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Return a copy of `input_col` replacing any `values_to_replace[i]` @@ -170,7 +168,7 @@ std::unique_ptr find_and_replace_all( column_view const& values_to_replace, column_view const& replacement_values, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Replaces values less than `lo` in `input` with `lo_replace`, @@ -225,7 +223,7 @@ std::unique_ptr clamp( scalar const& hi, scalar const& hi_replace, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Replaces values less than `lo` in `input` with `lo`, @@ -271,7 +269,7 @@ std::unique_ptr clamp( scalar const& lo, scalar const& hi, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Copies from a column of floating-point elements and replaces `-NaN` and `-0.0` with `+NaN` @@ -291,7 +289,7 @@ std::unique_ptr clamp( std::unique_ptr normalize_nans_and_zeros( column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Modifies a column of floating-point elements to replace all `-NaN` and `-0.0` with `+NaN` diff --git a/cpp/include/cudf/reshape.hpp b/cpp/include/cudf/reshape.hpp index a0a7fe694bb..e437e7abfca 100644 --- a/cpp/include/cudf/reshape.hpp +++ b/cpp/include/cudf/reshape.hpp @@ -20,9 +20,7 @@ #include #include #include - -#include -#include +#include #include @@ -47,14 +45,15 @@ namespace CUDF_EXPORT cudf { * @throws cudf::logic_error if input contains no columns. * @throws cudf::logic_error if input columns dtypes are not identical. * - * @param[in] input Table containing columns to interleave - * @param[in] mr Device memory resource used to allocate the returned column's device memory - * + * @param input Table containing columns to interleave + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory * @return The interleaved columns as a single column */ std::unique_ptr interleave_columns( table_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Repeats the rows from `input` table `count` times to form a new table. @@ -68,16 +67,18 @@ std::unique_ptr interleave_columns( * return = [[8, 4, 7, 8, 4, 7], [5, 2, 3, 5, 2, 3]] * ``` * - * @param[in] input Table containing rows to be repeated - * @param[in] count Number of times to tile "rows". Must be non-negative - * @param[in] mr Device memory resource used to allocate the returned table's device memory + * @param input Table containing rows to be repeated + * @param count Number of times to tile "rows". Must be non-negative + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned table's device memory * * @return The table containing the tiled "rows" */ std::unique_ptr
tile( table_view const& input, size_type count, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Configures whether byte casting flips endianness @@ -95,6 +96,7 @@ enum class flip_endianness : bool { NO, YES }; * * @param input_column Column to be converted to lists of bytes * @param endian_configuration Whether to retain or flip the endianness of the elements + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * * @return The column containing the lists of bytes @@ -102,7 +104,8 @@ enum class flip_endianness : bool { NO, YES }; std::unique_ptr byte_cast( column_view const& input_column, flip_endianness endian_configuration, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group diff --git a/cpp/include/cudf/rolling.hpp b/cpp/include/cudf/rolling.hpp index 5a8c454d8fc..8a717c3f510 100644 --- a/cpp/include/cudf/rolling.hpp +++ b/cpp/include/cudf/rolling.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -70,7 +68,7 @@ std::unique_ptr rolling_window( size_type min_periods, rolling_aggregation const& agg, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief @copybrief rolling_window @@ -95,7 +93,7 @@ std::unique_ptr rolling_window( size_type min_periods, rolling_aggregation const& agg, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Abstraction for window boundary sizes @@ -245,7 +243,7 @@ std::unique_ptr grouped_rolling_window( size_type min_periods, rolling_aggregation const& aggr, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief @copybrief grouped_rolling_window @@ -267,7 +265,7 @@ std::unique_ptr grouped_rolling_window( size_type min_periods, rolling_aggregation const& aggr, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief @copybrief grouped_rolling_window @@ -294,7 +292,7 @@ std::unique_ptr grouped_rolling_window( size_type min_periods, rolling_aggregation const& aggr, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief @copybrief grouped_rolling_window @@ -318,7 +316,7 @@ std::unique_ptr grouped_rolling_window( size_type min_periods, rolling_aggregation const& aggr, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Applies a grouping-aware, timestamp-based rolling window function to the values in a @@ -415,7 +413,7 @@ std::unique_ptr grouped_time_range_rolling_window( size_type min_periods, rolling_aggregation const& aggr, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Applies a grouping-aware, timestamp-based rolling window function to the values in a @@ -446,7 +444,7 @@ std::unique_ptr grouped_time_range_rolling_window( size_type min_periods, rolling_aggregation const& aggr, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Applies a grouping-aware, value range-based rolling window function to the values in a @@ -568,7 +566,7 @@ std::unique_ptr grouped_range_rolling_window( size_type min_periods, rolling_aggregation const& aggr, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Applies a variable-size rolling window function to the values in a column. @@ -613,7 +611,7 @@ std::unique_ptr rolling_window( size_type min_periods, rolling_aggregation const& agg, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/round.hpp b/cpp/include/cudf/round.hpp index ef144b328f7..ba56ff34b97 100644 --- a/cpp/include/cudf/round.hpp +++ b/cpp/include/cudf/round.hpp @@ -18,9 +18,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { @@ -76,7 +74,7 @@ std::unique_ptr round( column_view const& input, int32_t decimal_places = 0, rounding_method method = rounding_method::HALF_UP, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp index 2c5cc60fc70..e8a498afc09 100644 --- a/cpp/include/cudf/scalar/scalar.hpp +++ b/cpp/include/cudf/scalar/scalar.hpp @@ -19,13 +19,12 @@ #include #include #include +#include #include #include #include #include -#include -#include /** * @file @@ -114,7 +113,7 @@ class scalar { */ scalar(scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new scalar object. @@ -130,7 +129,7 @@ class scalar { scalar(data_type type, bool is_valid = false, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); }; namespace detail { @@ -166,7 +165,7 @@ class fixed_width_scalar : public scalar { */ fixed_width_scalar(fixed_width_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Set the value of the scalar. @@ -217,7 +216,7 @@ class fixed_width_scalar : public scalar { fixed_width_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new fixed width scalar object from existing device memory. @@ -230,7 +229,7 @@ class fixed_width_scalar : public scalar { fixed_width_scalar(rmm::device_scalar&& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); }; } // namespace detail @@ -266,7 +265,7 @@ class numeric_scalar : public detail::fixed_width_scalar { */ numeric_scalar(numeric_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new numeric scalar object. @@ -279,7 +278,7 @@ class numeric_scalar : public detail::fixed_width_scalar { numeric_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new numeric scalar object from existing device memory. @@ -292,7 +291,7 @@ class numeric_scalar : public detail::fixed_width_scalar { numeric_scalar(rmm::device_scalar&& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); }; /** @@ -329,7 +328,7 @@ class fixed_point_scalar : public scalar { */ fixed_point_scalar(fixed_point_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new fixed_point scalar object from already shifted value and scale. @@ -344,7 +343,7 @@ class fixed_point_scalar : public scalar { numeric::scale_type scale, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new fixed_point scalar object from a value and default 0-scale. @@ -357,7 +356,7 @@ class fixed_point_scalar : public scalar { fixed_point_scalar(rep_type value, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new fixed_point scalar object from a fixed_point number. @@ -370,7 +369,7 @@ class fixed_point_scalar : public scalar { fixed_point_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new fixed_point scalar object from existing device memory. @@ -385,7 +384,7 @@ class fixed_point_scalar : public scalar { numeric::scale_type scale, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Get the value of the scalar. @@ -454,7 +453,7 @@ class string_scalar : public scalar { */ string_scalar(string_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new string scalar object. @@ -469,7 +468,7 @@ class string_scalar : public scalar { string_scalar(std::string const& string, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new string scalar object from string_view. @@ -484,7 +483,7 @@ class string_scalar : public scalar { string_scalar(value_type const& source, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new string scalar object from string_view in device memory. @@ -499,7 +498,7 @@ class string_scalar : public scalar { string_scalar(rmm::device_scalar& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new string scalar object by moving an existing string data buffer. @@ -515,7 +514,7 @@ class string_scalar : public scalar { string_scalar(rmm::device_buffer&& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Explicit conversion operator to get the value of the scalar in a host std::string. @@ -587,7 +586,7 @@ class chrono_scalar : public detail::fixed_width_scalar { */ chrono_scalar(chrono_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new chrono scalar object. @@ -600,7 +599,7 @@ class chrono_scalar : public detail::fixed_width_scalar { chrono_scalar(T value, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new chrono scalar object from existing device memory. @@ -613,7 +612,7 @@ class chrono_scalar : public detail::fixed_width_scalar { chrono_scalar(rmm::device_scalar&& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); }; /** @@ -646,7 +645,7 @@ class timestamp_scalar : public chrono_scalar { */ timestamp_scalar(timestamp_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new timestamp scalar object from a duration that is @@ -662,7 +661,7 @@ class timestamp_scalar : public chrono_scalar { timestamp_scalar(Duration2 const& value, bool is_valid, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the duration in number of ticks since the UNIX epoch. @@ -702,7 +701,7 @@ class duration_scalar : public chrono_scalar { */ duration_scalar(duration_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new duration scalar object from tick counts. @@ -715,7 +714,7 @@ class duration_scalar : public chrono_scalar { duration_scalar(rep_type value, bool is_valid, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the duration in number of ticks. @@ -751,7 +750,7 @@ class list_scalar : public scalar { */ list_scalar(list_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new list scalar object from column_view. @@ -766,7 +765,7 @@ class list_scalar : public scalar { list_scalar(cudf::column_view const& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new list scalar object from existing column. @@ -779,7 +778,7 @@ class list_scalar : public scalar { list_scalar(cudf::column&& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a non-owning, immutable view to underlying device data. @@ -816,7 +815,7 @@ class struct_scalar : public scalar { */ struct_scalar(struct_scalar const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new struct scalar object from table_view. @@ -831,7 +830,7 @@ class struct_scalar : public scalar { struct_scalar(table_view const& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new struct scalar object from a host_span of column_views. @@ -846,7 +845,7 @@ class struct_scalar : public scalar { struct_scalar(host_span data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new struct scalar object from an existing table in device memory. @@ -862,7 +861,7 @@ class struct_scalar : public scalar { struct_scalar(table&& data, bool is_valid = true, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a non-owning, immutable view to underlying device data. diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index a422c3bfbe9..87700115996 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -17,10 +17,9 @@ #include #include +#include #include -#include -#include namespace CUDF_EXPORT cudf { /** @@ -45,7 +44,7 @@ namespace CUDF_EXPORT cudf { std::unique_ptr make_numeric_scalar( data_type type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct scalar with uninitialized storage to hold a value of the @@ -62,7 +61,7 @@ std::unique_ptr make_numeric_scalar( std::unique_ptr make_timestamp_scalar( data_type type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct scalar with uninitialized storage to hold a value of the @@ -79,7 +78,7 @@ std::unique_ptr make_timestamp_scalar( std::unique_ptr make_duration_scalar( data_type type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct scalar with uninitialized storage to hold a value of the @@ -96,7 +95,7 @@ std::unique_ptr make_duration_scalar( std::unique_ptr make_fixed_width_scalar( data_type type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct STRING type scalar given a `std::string`. @@ -113,7 +112,7 @@ std::unique_ptr make_fixed_width_scalar( std::unique_ptr make_string_scalar( std::string const& string, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Constructs default constructed scalar of type `type` @@ -128,7 +127,7 @@ std::unique_ptr make_string_scalar( std::unique_ptr make_default_constructed_scalar( data_type type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates an empty (invalid) scalar of the same type as the `input` column_view. @@ -143,7 +142,7 @@ std::unique_ptr make_default_constructed_scalar( std::unique_ptr make_empty_scalar_like( column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct scalar using the given value of fixed width type @@ -158,7 +157,7 @@ template std::unique_ptr make_fixed_width_scalar( T value, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { return std::make_unique>(value, true, stream, mr); } @@ -178,7 +177,7 @@ std::unique_ptr make_fixed_point_scalar( typename T::rep value, numeric::scale_type scale, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { return std::make_unique>(value, scale, true, stream, mr); } @@ -194,7 +193,7 @@ std::unique_ptr make_fixed_point_scalar( std::unique_ptr make_list_scalar( column_view elements, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a struct scalar using the given table_view. @@ -209,7 +208,7 @@ std::unique_ptr make_list_scalar( std::unique_ptr make_struct_scalar( table_view const& data, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a struct scalar using the given span of column views. @@ -224,7 +223,7 @@ std::unique_ptr make_struct_scalar( std::unique_ptr make_struct_scalar( host_span data, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/search.hpp b/cpp/include/cudf/search.hpp index ad170ec726b..e10c8c8b4d2 100644 --- a/cpp/include/cudf/search.hpp +++ b/cpp/include/cudf/search.hpp @@ -21,9 +21,7 @@ #include #include #include - -#include -#include +#include #include @@ -75,7 +73,7 @@ std::unique_ptr lower_bound( std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Find largest indices in a sorted table where values should be inserted to maintain order. @@ -117,7 +115,7 @@ std::unique_ptr upper_bound( std::vector const& column_order, std::vector const& null_precedence, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Check if the given `needle` value exists in the `haystack` column. @@ -166,7 +164,7 @@ std::unique_ptr contains( column_view const& haystack, column_view const& needles, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp index 4cb265a2a0b..b773f76defe 100644 --- a/cpp/include/cudf/sorting.hpp +++ b/cpp/include/cudf/sorting.hpp @@ -20,9 +20,7 @@ #include #include #include - -#include -#include +#include #include #include @@ -56,7 +54,7 @@ std::unique_ptr sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the row indices that would produce `input` in a stable @@ -71,7 +69,7 @@ std::unique_ptr stable_sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Checks whether the rows of a `table` are sorted in a lexicographical @@ -115,7 +113,7 @@ std::unique_ptr
sort( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a stable lexicographic sort of the rows of a table @@ -127,7 +125,7 @@ std::unique_ptr
stable_sort( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a key-value sort. @@ -157,7 +155,7 @@ std::unique_ptr
sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a key-value stable sort. @@ -170,7 +168,7 @@ std::unique_ptr
stable_sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Computes the ranks of input column in sorted order. @@ -210,7 +208,7 @@ std::unique_ptr rank( null_order null_precedence, bool percentage, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns sorted order after sorting each segment in the table. @@ -261,7 +259,7 @@ std::unique_ptr segmented_sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns sorted order after stably sorting each segment in the table. @@ -274,7 +272,7 @@ std::unique_ptr stable_segmented_sorted_order( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a lexicographic segmented sort of a table @@ -330,7 +328,7 @@ std::unique_ptr
segmented_sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Performs a stably lexicographic segmented sort of a table @@ -344,7 +342,7 @@ std::unique_ptr
stable_segmented_sort_by_key( std::vector const& column_order = {}, std::vector const& null_precedence = {}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/stream_compaction.hpp index cfe404ff6ab..ed0730d50a4 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/stream_compaction.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include #include @@ -67,6 +65,7 @@ namespace CUDF_EXPORT cudf { * @param[in] keys vector of indices representing key columns from `input` * @param[in] keep_threshold The minimum number of non-null fields in a row * required to keep the row. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` with at least @p * keep_threshold non-null fields in @p keys. @@ -75,7 +74,8 @@ std::unique_ptr
drop_nulls( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Filters a table to remove null elements. @@ -99,6 +99,7 @@ std::unique_ptr
drop_nulls( * * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` without nulls in the columns * of @p keys. @@ -106,7 +107,8 @@ std::unique_ptr
drop_nulls( std::unique_ptr
drop_nulls( table_view const& input, std::vector const& keys, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Filters a table to remove NANs with threshold count. @@ -141,6 +143,7 @@ std::unique_ptr
drop_nulls( * @param[in] keys vector of indices representing key columns from `input` * @param[in] keep_threshold The minimum number of non-NAN elements in a row * required to keep the row. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` with at least @p * keep_threshold non-NAN elements in @p keys. @@ -149,7 +152,8 @@ std::unique_ptr
drop_nans( table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Filters a table to remove NANs. @@ -174,6 +178,7 @@ std::unique_ptr
drop_nans( * * @param[in] input The input `table_view` to filter * @param[in] keys vector of indices representing key columns from `input` + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing all rows of the `input` without NANs in the columns * of @p keys. @@ -181,7 +186,8 @@ std::unique_ptr
drop_nans( std::unique_ptr
drop_nans( table_view const& input, std::vector const& keys, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Filters `input` using `boolean_mask` of boolean values as a mask. @@ -200,6 +206,7 @@ std::unique_ptr
drop_nans( * @param[in] input The input table_view to filter * @param[in] boolean_mask A nullable column_view of type type_id::BOOL8 used * as a mask to filter the `input`. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device memory * @return Table containing copy of all rows of @p input passing * the filter defined by @p boolean_mask. @@ -207,7 +214,8 @@ std::unique_ptr
drop_nans( std::unique_ptr
apply_boolean_mask( table_view const& input, column_view const& boolean_mask, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Choices for drop_duplicates API for retainment of duplicate rows @@ -241,6 +249,7 @@ enum class duplicate_keep_option { * @param[in] keep keep any, first, last, or none of the found duplicates * @param[in] nulls_equal flag to denote nulls are equal if null_equality::EQUAL, nulls are not * equal if null_equality::UNEQUAL + * @param[in] stream CUDA stream used for device memory operations and kernel launches * @param[in] mr Device memory resource used to allocate the returned table's device * memory * @@ -251,7 +260,8 @@ std::unique_ptr
unique( std::vector const& keys, duplicate_keep_option keep, null_equality nulls_equal = null_equality::EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a new table without duplicate rows. @@ -269,6 +279,7 @@ std::unique_ptr
unique( * @param keep Copy any, first, last, or none of the found duplicates * @param nulls_equal Flag to specify whether null elements should be considered as equal * @param nans_equal Flag to specify whether NaN elements should be considered as equal + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table * @return Table with distinct rows in an unspecified order */ @@ -278,7 +289,8 @@ std::unique_ptr
distinct( duplicate_keep_option keep = duplicate_keep_option::KEEP_ANY, null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a column of indices of all distinct rows in the input table. @@ -300,7 +312,7 @@ std::unique_ptr distinct_indices( null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Create a new table without duplicate rows, preserving input order. @@ -332,7 +344,7 @@ std::unique_ptr
stable_distinct( null_equality nulls_equal = null_equality::EQUAL, nan_equality nans_equal = nan_equality::ALL_EQUAL, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Count the number of consecutive groups of equivalent rows in a column. @@ -346,12 +358,14 @@ std::unique_ptr
stable_distinct( * @param[in] input The column_view whose consecutive groups of equivalent rows will be counted * @param[in] null_handling flag to include or ignore `null` while counting * @param[in] nan_handling flag to consider `NaN==null` or not + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of consecutive groups of equivalent rows in the column */ cudf::size_type unique_count(column_view const& input, null_policy null_handling, - nan_policy nan_handling); + nan_policy nan_handling, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Count the number of consecutive groups of equivalent rows in a table. @@ -359,11 +373,13 @@ cudf::size_type unique_count(column_view const& input, * @param[in] input Table whose consecutive groups of equivalent rows will be counted * @param[in] nulls_equal flag to denote if null elements should be considered equal * nulls are not equal if null_equality::UNEQUAL. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of consecutive groups of equivalent rows in the column */ cudf::size_type unique_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL); + null_equality nulls_equal = null_equality::EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Count the distinct elements in the column_view. @@ -382,12 +398,14 @@ cudf::size_type unique_count(table_view const& input, * @param[in] input The column_view whose distinct elements will be counted * @param[in] null_handling flag to include or ignore `null` while counting * @param[in] nan_handling flag to consider `NaN==null` or not + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of distinct rows in the table */ cudf::size_type distinct_count(column_view const& input, null_policy null_handling, - nan_policy nan_handling); + nan_policy nan_handling, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** * @brief Count the distinct rows in a table. @@ -395,11 +413,13 @@ cudf::size_type distinct_count(column_view const& input, * @param[in] input Table whose distinct rows will be counted * @param[in] nulls_equal flag to denote if null elements should be considered equal. * nulls are not equal if null_equality::UNEQUAL. + * @param[in] stream CUDA stream used for device memory operations and kernel launches * * @return number of distinct rows in the table */ cudf::size_type distinct_count(table_view const& input, - null_equality nulls_equal = null_equality::EQUAL); + null_equality nulls_equal = null_equality::EQUAL, + rmm::cuda_stream_view stream = cudf::get_default_stream()); /** @} */ } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/attributes.hpp b/cpp/include/cudf/strings/attributes.hpp index 323290e907c..5f2eda8fa5b 100644 --- a/cpp/include/cudf/strings/attributes.hpp +++ b/cpp/include/cudf/strings/attributes.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { @@ -48,7 +46,7 @@ namespace strings { */ std::unique_ptr count_characters( strings_column_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column containing byte lengths @@ -66,7 +64,7 @@ std::unique_ptr count_characters( */ std::unique_ptr count_bytes( strings_column_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a numeric column with code point values (integers) for each @@ -86,7 +84,7 @@ std::unique_ptr count_bytes( */ std::unique_ptr code_points( strings_column_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of strings_apis group diff --git a/cpp/include/cudf/strings/capitalize.hpp b/cpp/include/cudf/strings/capitalize.hpp index 420b46a05b2..312e3a5bef1 100644 --- a/cpp/include/cudf/strings/capitalize.hpp +++ b/cpp/include/cudf/strings/capitalize.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -63,7 +61,7 @@ std::unique_ptr capitalize( strings_column_view const& input, string_scalar const& delimiters = string_scalar("", true, cudf::get_default_stream()), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Modifies first character of each word to upper-case and lower-cases the rest. @@ -96,7 +94,7 @@ std::unique_ptr title( strings_column_view const& input, string_character_types sequence_type = string_character_types::ALPHA, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Checks if the strings in the input column are title formatted. @@ -125,7 +123,7 @@ std::unique_ptr title( std::unique_ptr is_title( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/case.hpp b/cpp/include/cudf/strings/case.hpp index 45f56a681a6..c2bd559accc 100644 --- a/cpp/include/cudf/strings/case.hpp +++ b/cpp/include/cudf/strings/case.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -46,7 +44,7 @@ namespace strings { std::unique_ptr to_lower( strings_column_view const& strings, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Converts a column of strings to upper case. @@ -65,7 +63,7 @@ std::unique_ptr to_lower( std::unique_ptr to_upper( strings_column_view const& strings, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of strings converting lower case characters to @@ -85,7 +83,7 @@ std::unique_ptr to_upper( std::unique_ptr swapcase( strings_column_view const& strings, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp index a6af681eec6..3ebe5cb53e9 100644 --- a/cpp/include/cudf/strings/char_types/char_types.hpp +++ b/cpp/include/cudf/strings/char_types/char_types.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -68,7 +66,7 @@ std::unique_ptr all_characters_of_type( string_character_types types, string_character_types verify_types = string_character_types::ALL_TYPES, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Filter specific character types from a column of strings. @@ -115,7 +113,7 @@ std::unique_ptr filter_characters_of_type( string_scalar const& replacement = string_scalar(""), string_character_types types_to_keep = string_character_types::ALL_TYPES, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/combine.hpp b/cpp/include/cudf/strings/combine.hpp index 2cade813d78..d766fba0cdc 100644 --- a/cpp/include/cudf/strings/combine.hpp +++ b/cpp/include/cudf/strings/combine.hpp @@ -20,9 +20,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -81,7 +79,7 @@ std::unique_ptr join_strings( string_scalar const& separator = string_scalar(""), string_scalar const& narep = string_scalar("", false), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Concatenates a list of strings columns using separators for each row @@ -149,7 +147,7 @@ std::unique_ptr concatenate( string_scalar const& col_narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Row-wise concatenates the given list of strings columns and @@ -204,7 +202,7 @@ std::unique_ptr concatenate( string_scalar const& narep = string_scalar("", false), separator_on_nulls separate_nulls = separator_on_nulls::YES, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Given a lists column of strings (each row is a list of strings), concatenates the strings @@ -271,7 +269,7 @@ std::unique_ptr join_list_elements( separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Given a lists column of strings (each row is a list of strings), concatenates the strings @@ -330,7 +328,7 @@ std::unique_ptr join_list_elements( separator_on_nulls separate_nulls = separator_on_nulls::YES, output_if_empty_list empty_list_policy = output_if_empty_list::EMPTY_STRING, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp index 59c9b2dea40..2a25ac79bbb 100644 --- a/cpp/include/cudf/strings/contains.hpp +++ b/cpp/include/cudf/strings/contains.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -61,7 +59,7 @@ std::unique_ptr contains_re( strings_column_view const& input, regex_program const& prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying rows which @@ -89,7 +87,7 @@ std::unique_ptr matches_re( strings_column_view const& input, regex_program const& prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the number of times the given regex_program's pattern @@ -117,7 +115,7 @@ std::unique_ptr count_re( strings_column_view const& input, regex_program const& prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying rows which @@ -164,7 +162,7 @@ std::unique_ptr like( string_scalar const& pattern, string_scalar const& escape_character = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying rows which @@ -205,7 +203,7 @@ std::unique_ptr like( strings_column_view const& patterns, string_scalar const& escape_character = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_booleans.hpp b/cpp/include/cudf/strings/convert/convert_booleans.hpp index d79dd4a80ea..bf7b6c1525b 100644 --- a/cpp/include/cudf/strings/convert/convert_booleans.hpp +++ b/cpp/include/cudf/strings/convert/convert_booleans.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -46,7 +44,7 @@ std::unique_ptr to_booleans( strings_column_view const& input, string_scalar const& true_string, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column converting the boolean values from the @@ -68,7 +66,7 @@ std::unique_ptr from_booleans( string_scalar const& true_string, string_scalar const& false_string, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_datetime.hpp b/cpp/include/cudf/strings/convert/convert_datetime.hpp index c3b3c91ab35..04eba83925d 100644 --- a/cpp/include/cudf/strings/convert/convert_datetime.hpp +++ b/cpp/include/cudf/strings/convert/convert_datetime.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include #include #include @@ -90,7 +88,7 @@ std::unique_ptr to_timestamps( data_type timestamp_type, std::string_view format, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Verifies the given strings column can be parsed to timestamps using the provided format @@ -137,7 +135,7 @@ std::unique_ptr is_timestamp( strings_column_view const& input, std::string_view format, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column converting a timestamp column into @@ -251,7 +249,7 @@ std::unique_ptr from_timestamps( strings_column_view const& names = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_durations.hpp b/cpp/include/cudf/strings/convert/convert_durations.hpp index 8b69968a609..25184cbfd02 100644 --- a/cpp/include/cudf/strings/convert/convert_durations.hpp +++ b/cpp/include/cudf/strings/convert/convert_durations.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -78,7 +76,7 @@ std::unique_ptr to_durations( data_type duration_type, std::string_view format, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column converting a duration column into @@ -129,7 +127,7 @@ std::unique_ptr from_durations( column_view const& durations, std::string_view format = "%D days %H:%M:%S", rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp index a9c5aea6343..6d5e94a8e02 100644 --- a/cpp/include/cudf/strings/convert/convert_fixed_point.hpp +++ b/cpp/include/cudf/strings/convert/convert_fixed_point.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -64,7 +62,7 @@ std::unique_ptr to_fixed_point( strings_column_view const& input, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column converting the fixed-point values @@ -94,7 +92,7 @@ std::unique_ptr to_fixed_point( std::unique_ptr from_fixed_point( column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying strings in which all @@ -126,7 +124,7 @@ std::unique_ptr is_fixed_point( strings_column_view const& input, data_type decimal_type = data_type{type_id::DECIMAL64}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_floats.hpp b/cpp/include/cudf/strings/convert/convert_floats.hpp index 64e9bb776f4..52fb47df94f 100644 --- a/cpp/include/cudf/strings/convert/convert_floats.hpp +++ b/cpp/include/cudf/strings/convert/convert_floats.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -50,7 +48,7 @@ std::unique_ptr to_floats( strings_column_view const& strings, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column converting the float values from the @@ -73,7 +71,7 @@ std::unique_ptr to_floats( std::unique_ptr from_floats( column_view const& floats, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying strings in which all @@ -99,7 +97,7 @@ std::unique_ptr from_floats( std::unique_ptr is_float( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_integers.hpp b/cpp/include/cudf/strings/convert/convert_integers.hpp index 62eb1fdda4d..9aad32bfba4 100644 --- a/cpp/include/cudf/strings/convert/convert_integers.hpp +++ b/cpp/include/cudf/strings/convert/convert_integers.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -57,7 +55,7 @@ std::unique_ptr to_integers( strings_column_view const& input, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column converting the integer values from the @@ -78,7 +76,7 @@ std::unique_ptr to_integers( std::unique_ptr from_integers( column_view const& integers, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying strings in which all @@ -107,7 +105,7 @@ std::unique_ptr from_integers( std::unique_ptr is_integer( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying strings in which all @@ -141,7 +139,7 @@ std::unique_ptr is_integer( strings_column_view const& input, data_type int_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new integer numeric column parsing hexadecimal values from the @@ -171,7 +169,7 @@ std::unique_ptr hex_to_integers( strings_column_view const& input, data_type output_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying strings in which all @@ -198,7 +196,7 @@ std::unique_ptr hex_to_integers( std::unique_ptr is_hex( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column converting integer columns to hexadecimal @@ -231,7 +229,7 @@ std::unique_ptr is_hex( std::unique_ptr integers_to_hex( column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_ipv4.hpp b/cpp/include/cudf/strings/convert/convert_ipv4.hpp index 04a04907c12..2dd82554cee 100644 --- a/cpp/include/cudf/strings/convert/convert_ipv4.hpp +++ b/cpp/include/cudf/strings/convert/convert_ipv4.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -44,20 +42,17 @@ namespace strings { * No checking is done on the format. If a string is not in IPv4 format, the resulting * integer is undefined. * - * The resulting 32-bit integer is placed in an int64_t to avoid setting the sign-bit - * in an int32_t type. This could be changed if cudf supported a UINT32 type in the future. - * * Any null entries will result in corresponding null entries in the output column. * * @param input Strings instance for this operation * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory - * @return New INT64 column converted from strings + * @return New UINT32 column converted from strings */ std::unique_ptr ipv4_to_integers( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Converts integers into IPv4 addresses as strings. @@ -68,13 +63,11 @@ std::unique_ptr ipv4_to_integers( * Each input integer is dissected into four integers by dividing the input into 8-bit sections. * These sub-integers are then converted into [0-9] characters and placed between '.' characters. * - * No checking is done on the input integer value. Only the lower 32-bits are used. - * * Any null entries will result in corresponding null entries in the output column. * - * @throw cudf::logic_error if the input column is not INT64 type. + * @throw cudf::logic_error if the input column is not UINT32 type. * - * @param integers Integer (INT64) column to convert + * @param integers Integer (UINT32) column to convert * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return New strings column @@ -82,7 +75,7 @@ std::unique_ptr ipv4_to_integers( std::unique_ptr integers_to_ipv4( column_view const& integers, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a boolean column identifying strings in which all @@ -109,7 +102,7 @@ std::unique_ptr integers_to_ipv4( std::unique_ptr is_ipv4( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_lists.hpp b/cpp/include/cudf/strings/convert/convert_lists.hpp index 85b67907228..80d0511fc1f 100644 --- a/cpp/include/cudf/strings/convert/convert_lists.hpp +++ b/cpp/include/cudf/strings/convert/convert_lists.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -64,7 +62,7 @@ std::unique_ptr format_list_column( strings_column_view const& separators = strings_column_view(column_view{ data_type{type_id::STRING}, 0, nullptr, nullptr, 0}), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp index a42a5cd2407..d6e87f9d543 100644 --- a/cpp/include/cudf/strings/convert/convert_urls.hpp +++ b/cpp/include/cudf/strings/convert/convert_urls.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -48,7 +46,7 @@ namespace strings { std::unique_ptr url_encode( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Encodes each string using URL encoding. @@ -71,7 +69,7 @@ std::unique_ptr url_encode( std::unique_ptr url_decode( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp index 962191eae6a..31698457048 100644 --- a/cpp/include/cudf/strings/detail/combine.hpp +++ b/cpp/include/cudf/strings/detail/combine.hpp @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp index e038102ab1f..75762e61afe 100644 --- a/cpp/include/cudf/strings/detail/concatenate.hpp +++ b/cpp/include/cudf/strings/detail/concatenate.hpp @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh index 5f51da967d3..8440805960e 100644 --- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh +++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh @@ -17,8 +17,8 @@ #include +#include #include -#include #include namespace cudf { @@ -88,7 +88,7 @@ __device__ inline thrust::pair parse_integer( * @return Integer value of the exponent */ template -__device__ thrust::optional parse_exponent(char const* iter, char const* iter_end) +__device__ cuda::std::optional parse_exponent(char const* iter, char const* iter_end) { constexpr uint32_t exponent_max = static_cast(std::numeric_limits::max()); @@ -105,12 +105,12 @@ __device__ thrust::optional parse_exponent(char const* iter, char const while (iter < iter_end) { auto const ch = *iter++; if (ch < '0' || ch > '9') { - if (check_only) { return thrust::nullopt; } + if (check_only) { return cuda::std::nullopt; } break; } uint32_t exp_check = static_cast(exp_ten * 10) + static_cast(ch - '0'); - if (check_only && (exp_check > exponent_max)) { return thrust::nullopt; } // check overflow + if (check_only && (exp_check > exponent_max)) { return cuda::std::nullopt; } // check overflow exp_ten = static_cast(exp_check); } diff --git a/cpp/include/cudf/strings/detail/converters.hpp b/cpp/include/cudf/strings/detail/converters.hpp index 73a97499293..3880b8abc32 100644 --- a/cpp/include/cudf/strings/detail/converters.hpp +++ b/cpp/include/cudf/strings/detail/converters.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh index 4db7651330b..6b025e8659d 100644 --- a/cpp/include/cudf/strings/detail/copy_if_else.cuh +++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh @@ -18,15 +18,15 @@ #include #include #include +#include #include #include #include -#include #include +#include #include -#include #include namespace cudf { @@ -41,9 +41,9 @@ namespace detail { * ``` * * @tparam StringIterLeft A random access iterator whose value_type is - * `thrust::optional` where the `optional` has a value iff the element is valid. + * `cuda::std::optional` where the `optional` has a value iff the element is valid. * @tparam StringIterRight A random access iterator whose value_type is - * `thrust::optional` where the `optional` has a value iff the element is valid. + * `cuda::std::optional` where the `optional` has a value iff the element is valid. * @tparam Filter Functor that takes an index and returns a boolean. * * @param lhs_begin Start of first set of data. Used when `filter_fn` returns true. diff --git a/cpp/include/cudf/strings/detail/copy_range.hpp b/cpp/include/cudf/strings/detail/copy_range.hpp index 71dcf9edaf3..33ac74da97f 100644 --- a/cpp/include/cudf/strings/detail/copy_range.hpp +++ b/cpp/include/cudf/strings/detail/copy_range.hpp @@ -17,9 +17,9 @@ #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp index b4d3362359d..f97cc9f5b5d 100644 --- a/cpp/include/cudf/strings/detail/copying.hpp +++ b/cpp/include/cudf/strings/detail/copying.hpp @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp index 1a3ff2c9166..55508b0ac1b 100644 --- a/cpp/include/cudf/strings/detail/fill.hpp +++ b/cpp/include/cudf/strings/detail/fill.hpp @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh index 4369de317b3..4216523df97 100644 --- a/cpp/include/cudf/strings/detail/gather.cuh +++ b/cpp/include/cudf/strings/detail/gather.cuh @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/strings/detail/merge.hpp b/cpp/include/cudf/strings/detail/merge.hpp index 0aa5c0c2899..92f0fe34576 100644 --- a/cpp/include/cudf/strings/detail/merge.hpp +++ b/cpp/include/cudf/strings/detail/merge.hpp @@ -21,6 +21,7 @@ #include #include +#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp index ab092555c48..780a0f6a9f5 100644 --- a/cpp/include/cudf/strings/detail/replace.hpp +++ b/cpp/include/cudf/strings/detail/replace.hpp @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/scan.hpp b/cpp/include/cudf/strings/detail/scan.hpp index 4991fd633d5..71fbfadf9ec 100644 --- a/cpp/include/cudf/strings/detail/scan.hpp +++ b/cpp/include/cudf/strings/detail/scan.hpp @@ -17,9 +17,9 @@ #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh index 87f0e7ae47c..e49d6dff40d 100644 --- a/cpp/include/cudf/strings/detail/scatter.cuh +++ b/cpp/include/cudf/strings/detail/scatter.cuh @@ -19,12 +19,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -70,7 +70,7 @@ std::unique_ptr scatter(SourceIterator begin, // create vector of string_view's to scatter into rmm::device_uvector target_vector = - create_string_vector_from_column(target, stream, rmm::mr::get_current_device_resource()); + create_string_vector_from_column(target, stream, cudf::get_current_device_resource_ref()); // this ensures empty strings are not mapped to nulls in the make_strings_column function auto const size = thrust::distance(begin, end); diff --git a/cpp/include/cudf/strings/detail/strings_children.cuh b/cpp/include/cudf/strings/detail/strings_children.cuh index 55b59dd4ff2..1283226879b 100644 --- a/cpp/include/cudf/strings/detail/strings_children.cuh +++ b/cpp/include/cudf/strings/detail/strings_children.cuh @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/include/cudf/strings/detail/strings_column_factories.cuh b/cpp/include/cudf/strings/detail/strings_column_factories.cuh index a3221038eed..6b1b453a752 100644 --- a/cpp/include/cudf/strings/detail/strings_column_factories.cuh +++ b/cpp/include/cudf/strings/detail/strings_column_factories.cuh @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp index 1fa505501d8..d276c5df7dc 100644 --- a/cpp/include/cudf/strings/detail/utilities.hpp +++ b/cpp/include/cudf/strings/detail/utilities.hpp @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include namespace CUDF_EXPORT cudf { namespace strings::detail { diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp index 2ef7308b802..f8bf93b77cf 100644 --- a/cpp/include/cudf/strings/extract.hpp +++ b/cpp/include/cudf/strings/extract.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -64,7 +62,7 @@ std::unique_ptr
extract( strings_column_view const& input, regex_program const& prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a lists column of strings where each string column row corresponds to the @@ -100,7 +98,7 @@ std::unique_ptr extract_all_record( strings_column_view const& input, regex_program const& prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/find.hpp b/cpp/include/cudf/strings/find.hpp index efba6da9454..e024b116a71 100644 --- a/cpp/include/cudf/strings/find.hpp +++ b/cpp/include/cudf/strings/find.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -59,7 +57,7 @@ std::unique_ptr find( size_type start = 0, size_type stop = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of character position values where the target @@ -90,7 +88,7 @@ std::unique_ptr rfind( size_type start = 0, size_type stop = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of character position values where the target @@ -117,7 +115,7 @@ std::unique_ptr find( strings_column_view const& target, size_type start = 0, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -138,7 +136,7 @@ std::unique_ptr contains( strings_column_view const& input, string_scalar const& target, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -163,7 +161,7 @@ std::unique_ptr contains( strings_column_view const& input, strings_column_view const& targets, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -185,7 +183,7 @@ std::unique_ptr starts_with( strings_column_view const& input, string_scalar const& target, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -211,7 +209,7 @@ std::unique_ptr starts_with( strings_column_view const& input, strings_column_view const& targets, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -233,7 +231,7 @@ std::unique_ptr ends_with( strings_column_view const& input, string_scalar const& target, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a column of boolean values for each string where true indicates @@ -259,7 +257,7 @@ std::unique_ptr ends_with( strings_column_view const& input, strings_column_view const& targets, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/find_multiple.hpp b/cpp/include/cudf/strings/find_multiple.hpp index dea08308ff0..1fe446db8da 100644 --- a/cpp/include/cudf/strings/find_multiple.hpp +++ b/cpp/include/cudf/strings/find_multiple.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -59,7 +57,7 @@ std::unique_ptr find_multiple( strings_column_view const& input, strings_column_view const& targets, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index 26249b6842c..c6b9bc7e58a 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -66,7 +64,7 @@ std::unique_ptr findall( strings_column_view const& input, regex_program const& prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/padding.hpp b/cpp/include/cudf/strings/padding.hpp index 11e35f717ae..606a866cb8a 100644 --- a/cpp/include/cudf/strings/padding.hpp +++ b/cpp/include/cudf/strings/padding.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -62,7 +60,7 @@ std::unique_ptr pad( side_type side = side_type::RIGHT, std::string_view fill_char = " ", rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Add '0' as padding to the left of each string. @@ -92,7 +90,7 @@ std::unique_ptr zfill( strings_column_view const& input, size_type width, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/regex/flags.hpp b/cpp/include/cudf/strings/regex/flags.hpp index f7108129dee..4f3fc7086f2 100644 --- a/cpp/include/cudf/strings/regex/flags.hpp +++ b/cpp/include/cudf/strings/regex/flags.hpp @@ -35,10 +35,11 @@ namespace strings { * and to match the Python flag values. */ enum regex_flags : uint32_t { - DEFAULT = 0, ///< default - MULTILINE = 8, ///< the '^' and '$' honor new-line characters - DOTALL = 16, ///< the '.' matching includes new-line characters - ASCII = 256 ///< use only ASCII when matching built-in character classes + DEFAULT = 0, ///< default + MULTILINE = 8, ///< the '^' and '$' honor new-line characters + DOTALL = 16, ///< the '.' matching includes new-line characters + ASCII = 256, ///< use only ASCII when matching built-in character classes + EXT_NEWLINE = 512 ///< new-line matches extended characters }; /** @@ -74,6 +75,17 @@ constexpr bool is_ascii(regex_flags const f) return (f & regex_flags::ASCII) == regex_flags::ASCII; } +/** + * @brief Returns true if the given flags contain EXT_NEWLINE + * + * @param f Regex flags to check + * @return true if `f` includes EXT_NEWLINE + */ +constexpr bool is_ext_newline(regex_flags const f) +{ + return (f & regex_flags::EXT_NEWLINE) == regex_flags::EXT_NEWLINE; +} + /** * @brief Capture groups setting * diff --git a/cpp/include/cudf/strings/repeat_strings.hpp b/cpp/include/cudf/strings/repeat_strings.hpp index e160f75390b..af419d9501f 100644 --- a/cpp/include/cudf/strings/repeat_strings.hpp +++ b/cpp/include/cudf/strings/repeat_strings.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -61,7 +59,7 @@ std::unique_ptr repeat_string( string_scalar const& input, size_type repeat_times, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Repeat each string in the given strings column a given number of times @@ -92,7 +90,7 @@ std::unique_ptr repeat_strings( strings_column_view const& input, size_type repeat_times, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Repeat each string in the given strings column by the numbers of times given in another @@ -129,7 +127,7 @@ std::unique_ptr repeat_strings( strings_column_view const& input, column_view const& repeat_times, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp index 5b4ffb98f99..c7a87bbb0d0 100644 --- a/cpp/include/cudf/strings/replace.hpp +++ b/cpp/include/cudf/strings/replace.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -70,7 +68,7 @@ std::unique_ptr replace( string_scalar const& repl, cudf::size_type maxrepl = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief This function replaces each string in the column with the provided @@ -112,7 +110,7 @@ std::unique_ptr replace_slice( size_type start = 0, size_type stop = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Replaces substrings matching a list of targets with the corresponding @@ -158,19 +156,7 @@ std::unique_ptr replace_multiple( strings_column_view const& targets, strings_column_view const& repls, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); - -/** - * @copydoc cudf::strings::replace_multiple - * - * @deprecated since 24.08 - */ -[[deprecated]] std::unique_ptr replace( - strings_column_view const& input, - strings_column_view const& targets, - strings_column_view const& repls, - rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp index 6b487072cb2..4a58142cbe6 100644 --- a/cpp/include/cudf/strings/replace_re.hpp +++ b/cpp/include/cudf/strings/replace_re.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -60,7 +58,7 @@ std::unique_ptr replace_re( string_scalar const& replacement = string_scalar(""), std::optional max_replace_count = std::nullopt, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief For each string, replaces any character sequence matching the given patterns @@ -84,7 +82,7 @@ std::unique_ptr replace_re( strings_column_view const& replacements, regex_flags const flags = regex_flags::DEFAULT, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief For each string, replaces any character sequence matching the given regex @@ -109,7 +107,7 @@ std::unique_ptr replace_with_backrefs( regex_program const& prog, std::string_view replacement, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); } // namespace strings } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/strings/reverse.hpp b/cpp/include/cudf/strings/reverse.hpp index fbda2e5fe7c..f9ab34373df 100644 --- a/cpp/include/cudf/strings/reverse.hpp +++ b/cpp/include/cudf/strings/reverse.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -49,7 +47,7 @@ namespace strings { std::unique_ptr reverse( strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/slice.hpp b/cpp/include/cudf/strings/slice.hpp index b0da6976207..754bee4b1f0 100644 --- a/cpp/include/cudf/strings/slice.hpp +++ b/cpp/include/cudf/strings/slice.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -65,7 +63,7 @@ std::unique_ptr slice_strings( numeric_scalar const& stop = numeric_scalar(0, false), numeric_scalar const& step = numeric_scalar(1), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a new strings column that contains substrings of the @@ -110,7 +108,7 @@ std::unique_ptr slice_strings( column_view const& starts, column_view const& stops, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/split/partition.hpp b/cpp/include/cudf/strings/split/partition.hpp index 8f5ae752417..92573a665c9 100644 --- a/cpp/include/cudf/strings/split/partition.hpp +++ b/cpp/include/cudf/strings/split/partition.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -63,7 +61,7 @@ std::unique_ptr
partition( strings_column_view const& input, string_scalar const& delimiter = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a set of 3 columns by splitting each string using the @@ -97,7 +95,7 @@ std::unique_ptr
rpartition( strings_column_view const& input, string_scalar const& delimiter = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/split/split.hpp b/cpp/include/cudf/strings/split/split.hpp index ca371d7abd1..026192d4a0b 100644 --- a/cpp/include/cudf/strings/split/split.hpp +++ b/cpp/include/cudf/strings/split/split.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -58,7 +56,7 @@ std::unique_ptr
split( string_scalar const& delimiter = string_scalar(""), size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a list of columns by splitting each string using the @@ -88,7 +86,7 @@ std::unique_ptr
rsplit( string_scalar const& delimiter = string_scalar(""), size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Splits individual strings elements into a list of strings. @@ -162,7 +160,7 @@ std::unique_ptr split_record( string_scalar const& delimiter = string_scalar(""), size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Splits individual strings elements into a list of strings starting @@ -241,7 +239,7 @@ std::unique_ptr rsplit_record( string_scalar const& delimiter = string_scalar(""), size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp index 96ef0b6e830..ce376ab93cf 100644 --- a/cpp/include/cudf/strings/split/split_re.hpp +++ b/cpp/include/cudf/strings/split/split_re.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -85,7 +83,7 @@ std::unique_ptr
split_re( regex_program const& prog, size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Splits strings elements into a table of strings columns using a @@ -141,7 +139,7 @@ std::unique_ptr
rsplit_re( regex_program const& prog, size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Splits strings elements into a list column of strings @@ -199,7 +197,7 @@ std::unique_ptr split_record_re( regex_program const& prog, size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Splits strings elements into a list column of strings using the given @@ -259,7 +257,7 @@ std::unique_ptr rsplit_record_re( regex_program const& prog, size_type maxsplit = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh index abb26d7ccb4..14695c3bb27 100644 --- a/cpp/include/cudf/strings/string_view.cuh +++ b/cpp/include/cudf/strings/string_view.cuh @@ -191,9 +191,14 @@ __device__ inline string_view::const_iterator& string_view::const_iterator::oper __device__ inline string_view::const_iterator& string_view::const_iterator::operator--() { - if (byte_pos > 0) - while (strings::detail::bytes_in_utf8_byte(static_cast(p[--byte_pos])) == 0) - ; + if (byte_pos > 0) { + if (byte_pos == char_pos) { + --byte_pos; + } else { + while (strings::detail::bytes_in_utf8_byte(static_cast(p[--byte_pos])) == 0) + ; + } + } --char_pos; return *this; } diff --git a/cpp/include/cudf/strings/strip.hpp b/cpp/include/cudf/strings/strip.hpp index 4cfba59c72c..396940dbb30 100644 --- a/cpp/include/cudf/strings/strip.hpp +++ b/cpp/include/cudf/strings/strip.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -67,7 +65,7 @@ std::unique_ptr strip( side_type side = side_type::BOTH, string_scalar const& to_strip = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/translate.hpp b/cpp/include/cudf/strings/translate.hpp index 531753f4a8c..aa69a2e5679 100644 --- a/cpp/include/cudf/strings/translate.hpp +++ b/cpp/include/cudf/strings/translate.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -58,7 +56,7 @@ std::unique_ptr translate( strings_column_view const& input, std::vector> const& chars_table, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Removes or keeps the specified character ranges in cudf::strings::filter_characters @@ -105,7 +103,7 @@ std::unique_ptr filter_characters( filter_type keep_characters = filter_type::KEEP, string_scalar const& replacement = string_scalar(""), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/strings/utilities.hpp b/cpp/include/cudf/strings/utilities.hpp index ae445282382..999fff0f4c8 100644 --- a/cpp/include/cudf/strings/utilities.hpp +++ b/cpp/include/cudf/strings/utilities.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -35,7 +33,7 @@ namespace strings { rmm::device_uvector create_string_vector_from_column( cudf::strings_column_view const strings, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Return the threshold size for a strings column to use int64 offsets diff --git a/cpp/include/cudf/strings/wrap.hpp b/cpp/include/cudf/strings/wrap.hpp index 465a9d15d00..96ae2fb0582 100644 --- a/cpp/include/cudf/strings/wrap.hpp +++ b/cpp/include/cudf/strings/wrap.hpp @@ -17,9 +17,7 @@ #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { namespace strings { @@ -68,7 +66,7 @@ std::unique_ptr wrap( strings_column_view const& input, size_type width, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings diff --git a/cpp/include/cudf/structs/detail/concatenate.hpp b/cpp/include/cudf/structs/detail/concatenate.hpp index 16be868af52..96964eac31f 100644 --- a/cpp/include/cudf/structs/detail/concatenate.hpp +++ b/cpp/include/cudf/structs/detail/concatenate.hpp @@ -19,10 +19,9 @@ #include #include #include +#include #include -#include - namespace CUDF_EXPORT cudf { namespace structs::detail { diff --git a/cpp/include/cudf/structs/detail/scan.hpp b/cpp/include/cudf/structs/detail/scan.hpp index 6121f63d42f..e9e721c3335 100644 --- a/cpp/include/cudf/structs/detail/scan.hpp +++ b/cpp/include/cudf/structs/detail/scan.hpp @@ -18,9 +18,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT cudf { namespace structs::detail { diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh index f05e5f4ca5c..3f33c70c29a 100644 --- a/cpp/include/cudf/table/experimental/row_operators.cuh +++ b/cpp/include/cudf/table/experimental/row_operators.cuh @@ -211,7 +211,7 @@ struct sorting_physical_element_comparator { } }; -using optional_dremel_view = thrust::optional; +using optional_dremel_view = cuda::std::optional; // The has_nested_columns template parameter of the device_row_comparator is // necessary to help the compiler optimize our code. Without it, the list and @@ -223,12 +223,12 @@ using optional_dremel_view = thrust::optional; // std::optional> in the // preprocessed_table/device_row_comparator (which is always valid when // has_nested_columns and is otherwise invalid) that is then unpacked to a -// thrust::optional at the element_comparator level (which +// cuda::std::optional at the element_comparator level (which // is always valid for a list column and otherwise invalid). We cannot use an // additional template parameter for the element_comparator on a per-column // basis because we cannot conditionally define dremel_device_view member // variables without jumping through extra hoops with inheritance, so the -// thrust::optional member must be an optional rather than +// cuda::std::optional member must be an optional rather than // a raw dremel_device_view. /** * @brief Computes the lexicographic comparison between 2 rows. diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index be2af7ac653..762131a174f 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -18,10 +18,9 @@ #include #include #include +#include #include -#include -#include #include #include @@ -58,7 +57,7 @@ class table { */ explicit table(table const& other, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Moves the contents from a vector of `unique_ptr`s to columns to * construct a new table. @@ -77,7 +76,7 @@ class table { */ table(table_view view, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the number of columns in the table diff --git a/cpp/include/cudf/timezone.hpp b/cpp/include/cudf/timezone.hpp index 8329c64e24f..f6de1056c24 100644 --- a/cpp/include/cudf/timezone.hpp +++ b/cpp/include/cudf/timezone.hpp @@ -15,10 +15,11 @@ */ #pragma once +#include #include +#include -#include -#include +#include #include #include @@ -45,6 +46,7 @@ static constexpr uint32_t solar_cycle_entry_count = 2 * solar_cycle_years; * * @param tzif_dir The directory where the TZif files are located * @param timezone_name standard timezone name (for example, "America/Los_Angeles") + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory. * * @return The transition table for the given timezone @@ -52,6 +54,7 @@ static constexpr uint32_t solar_cycle_entry_count = 2 * solar_cycle_years; std::unique_ptr
make_timezone_transition_table( std::optional tzif_dir, std::string_view timezone_name, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/transform.hpp b/cpp/include/cudf/transform.hpp index adc5bdb2af8..82b8bee1acf 100644 --- a/cpp/include/cudf/transform.hpp +++ b/cpp/include/cudf/transform.hpp @@ -19,9 +19,7 @@ #include #include #include - -#include -#include +#include #include @@ -47,6 +45,7 @@ namespace CUDF_EXPORT cudf { * @param unary_udf The PTX/CUDA string of the unary function to apply * @param output_type The output type that is compatible with the output type in the UDF * @param is_ptx true: the UDF is treated as PTX code; false: the UDF is treated as CUDA code + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return The column resulting from applying the unary function to * every element of the input @@ -56,7 +55,8 @@ std::unique_ptr transform( std::string const& unary_udf, data_type output_type, bool is_ptx, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a null_mask from `input` by converting `NaN` to null and @@ -65,13 +65,15 @@ std::unique_ptr transform( * @throws cudf::logic_error if `input.type()` is a non-floating type * * @param input An immutable view of the input column of floating-point type + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned bitmask * @return A pair containing a `device_buffer` with the new bitmask and it's * null count obtained by replacing `NaN` in `input` with null. */ std::pair, size_type> nans_to_nulls( column_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Compute a new column by evaluating an expression tree on a table. @@ -83,13 +85,15 @@ std::pair, size_type> nans_to_nulls( * * @param table The table used for expression evaluation * @param expr The root of the expression tree + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource * @return Output column */ std::unique_ptr compute_column( table_view const& table, ast::expression const& expr, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a bitmask from a column of boolean elements. @@ -101,6 +105,7 @@ std::unique_ptr compute_column( * @throws cudf::logic_error if `input.type()` is a non-boolean type * * @param input Boolean elements to convert to a bitmask + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned bitmask * @return A pair containing a `device_buffer` with the new bitmask and it's * null count obtained from input considering `true` represent `valid`/`1` and @@ -108,7 +113,8 @@ std::unique_ptr compute_column( */ std::pair, cudf::size_type> bools_to_mask( column_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Encode the rows of the given table as integers @@ -130,13 +136,15 @@ std::pair, cudf::size_type> bools_to_mask( * @endcode * * @param input Table containing values to be encoded + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return A pair containing the distinct row of the input table in sorter order, * and a column of integer indices representing the encoded rows. */ std::pair, std::unique_ptr> encode( cudf::table_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Encodes `input` by generating a new column for each value in `categories` indicating the @@ -162,13 +170,15 @@ std::pair, std::unique_ptr> encode( * * @param input Column containing values to be encoded * @param categories Column containing categories + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned table's device memory * @return A pair containing the owner to all encoded data and a table view into the data */ std::pair, table_view> one_hot_encode( column_view const& input, column_view const& categories, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a boolean column from given bitmask. @@ -188,6 +198,7 @@ std::pair, table_view> one_hot_encode( * @param bitmask A device pointer to the bitmask which needs to be converted * @param begin_bit position of the bit from which the conversion should start * @param end_bit position of the bit before which the conversion should stop + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned columns' device memory * @return A boolean column representing the given mask from [begin_bit, end_bit) */ @@ -195,7 +206,8 @@ std::unique_ptr mask_to_bools( bitmask_type const* bitmask, size_type begin_bit, size_type end_bit, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for @@ -219,11 +231,14 @@ std::unique_ptr mask_to_bools( * row_bit_count(column(x)) >= row_bit_count(gather(column(x))) * * @param t The table view to perform the computation on + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned columns' device memory * @return A 32-bit integer column containing the per-row bit counts */ std::unique_ptr row_bit_count( - table_view const& t, rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + table_view const& t, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns an approximate cumulative size in bits of all columns in the `table_view` for @@ -240,13 +255,15 @@ std::unique_ptr row_bit_count( * * @param t The table view to perform the computation on * @param segment_length The number of rows in each segment for which the total size is computed + * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned columns' device memory * @return A 32-bit integer column containing the bit counts for each segment of rows */ std::unique_ptr segmented_row_bit_count( table_view const& t, size_type segment_length, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/transpose.hpp b/cpp/include/cudf/transpose.hpp index f4433c46a06..8b680071e71 100644 --- a/cpp/include/cudf/transpose.hpp +++ b/cpp/include/cudf/transpose.hpp @@ -18,9 +18,7 @@ #include #include #include - -#include -#include +#include namespace CUDF_EXPORT cudf { /** @@ -46,7 +44,7 @@ namespace CUDF_EXPORT cudf { */ std::pair, table_view> transpose( table_view const& input, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 55f4c1f5a23..53e0f3a15d2 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -21,11 +21,9 @@ #include #include #include +#include #include -#include -#include - #include namespace CUDF_EXPORT cudf { @@ -159,7 +157,7 @@ std::unique_ptr unary_operation( cudf::column_view const& input, cudf::unary_operator op, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a column of `type_id::BOOL8` elements where for every element in `input` `true` @@ -175,7 +173,7 @@ std::unique_ptr unary_operation( std::unique_ptr is_null( cudf::column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a column of `type_id::BOOL8` elements where for every element in `input` `true` @@ -191,7 +189,7 @@ std::unique_ptr is_null( std::unique_ptr is_valid( cudf::column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Casts data from dtype specified in input to dtype specified in output. @@ -210,7 +208,7 @@ std::unique_ptr cast( column_view const& input, data_type out_type, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Check if a cast between two datatypes is supported. @@ -238,7 +236,7 @@ bool is_supported_cast(data_type from, data_type to) noexcept; std::unique_ptr is_nan( cudf::column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a column of `type_id::BOOL8` elements indicating the absence of `NaN` values @@ -257,7 +255,7 @@ std::unique_ptr is_nan( std::unique_ptr is_not_nan( cudf::column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT cudf diff --git a/cpp/include/cudf/utilities/memory_resource.hpp b/cpp/include/cudf/utilities/memory_resource.hpp new file mode 100644 index 00000000000..b562574fd79 --- /dev/null +++ b/cpp/include/cudf/utilities/memory_resource.hpp @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include + +namespace cudf { + +/** + * @addtogroup memory_resource + * @{ + * @file + */ + +/** + * @brief Get the current device memory resource. + * + * @return The current device memory resource. + */ +inline rmm::mr::device_memory_resource* get_current_device_resource() +{ + return rmm::mr::get_current_device_resource(); +} + +/** + * @brief Get the current device memory resource reference. + * + * @return The current device memory resource reference. + */ +inline rmm::device_async_resource_ref get_current_device_resource_ref() +{ + // For now, match current behavior which is to return current resource pointer + return rmm::mr::get_current_device_resource(); +} + +/** + * @brief Set the current device memory resource. + * + * @param mr The new device memory resource. + * @return The previous device memory resource. + */ +inline rmm::mr::device_memory_resource* set_current_device_resource( + rmm::mr::device_memory_resource* mr) +{ + return rmm::mr::set_current_device_resource(mr); +} + +/** + * @brief Set the current device memory resource reference. + * + * @param mr The new device memory resource reference. + * @return The previous device memory resource reference. + */ +inline rmm::device_async_resource_ref set_current_device_resource_ref( + rmm::device_async_resource_ref mr) +{ + return rmm::mr::set_current_device_resource_ref(mr); +} + +/** + * @brief Reset the current device memory resource reference to the initial resource. + * + * @return The previous device memory resource reference. + */ +inline rmm::device_async_resource_ref reset_current_device_resource_ref() +{ + return rmm::mr::reset_current_device_resource_ref(); +} + +/** @} */ // end of group +} // namespace cudf diff --git a/cpp/include/cudf/utilities/pinned_memory.hpp b/cpp/include/cudf/utilities/pinned_memory.hpp index 623a033698f..2cab0aa363e 100644 --- a/cpp/include/cudf/utilities/pinned_memory.hpp +++ b/cpp/include/cudf/utilities/pinned_memory.hpp @@ -17,8 +17,7 @@ #pragma once #include - -#include +#include #include diff --git a/cpp/include/cudf/utilities/type_checks.hpp b/cpp/include/cudf/utilities/type_checks.hpp index 4fcbca09d17..aeb5db57830 100644 --- a/cpp/include/cudf/utilities/type_checks.hpp +++ b/cpp/include/cudf/utilities/type_checks.hpp @@ -22,25 +22,6 @@ namespace CUDF_EXPORT cudf { -/** - * @brief Compare the types of two `column_view`s - * - * @deprecated Since 24.06. Use cudf::have_same_types instead. - * - * This function returns true if the type of `lhs` equals that of `rhs`. - * - For fixed point types, the scale is compared. - * - For dictionary types, the type of the keys are compared if both are - * non-empty columns. - * - For lists types, the type of child columns are compared recursively. - * - For struct types, the type of each field are compared in order. - * - For all other types, the `id` of `data_type` is compared. - * - * @param lhs The first `column_view` to compare - * @param rhs The second `column_view` to compare - * @return true if column types match - */ -[[deprecated]] bool column_types_equal(column_view const& lhs, column_view const& rhs); - /** * @brief Compare the type IDs of two `column_view`s * diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index 04bd51e9aa3..7b86f971cae 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -20,11 +20,10 @@ #include #include +#include #include #include -#include -#include namespace CUDF_EXPORT cudf { namespace test { @@ -38,7 +37,7 @@ namespace test { * ``` */ class BaseFixture : public ::testing::Test { - rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; + rmm::device_async_resource_ref _mr{cudf::get_current_device_resource_ref()}; public: /** @@ -59,7 +58,7 @@ class BaseFixture : public ::testing::Test { */ template class BaseFixtureWithParam : public ::testing::TestWithParam { - rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; + rmm::device_async_resource_ref _mr{cudf::get_current_device_resource_ref()}; public: /** diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp index 4e504ec1d30..6206c1311d2 100644 --- a/cpp/include/cudf_test/column_wrapper.hpp +++ b/cpp/include/cudf_test/column_wrapper.hpp @@ -33,11 +33,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -771,10 +771,10 @@ class strings_column_wrapper : public detail::column_wrapper { auto all_valid = thrust::make_constant_iterator(true); auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid); auto d_chars = cudf::detail::make_device_uvector_async( - chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + chars, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_offsets = std::make_unique( cudf::detail::make_device_uvector_sync( - offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), + offsets, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()), rmm::device_buffer{}, 0); wrapped = @@ -821,14 +821,14 @@ class strings_column_wrapper : public detail::column_wrapper { auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v); auto [null_mask, null_count] = detail::make_null_mask_vector(v, v + num_strings); auto d_chars = cudf::detail::make_device_uvector_async( - chars, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + chars, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_offsets = std::make_unique( cudf::detail::make_device_uvector_async( - offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), + offsets, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()), rmm::device_buffer{}, 0); auto d_bitmask = cudf::detail::make_device_uvector_sync( - null_mask, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + null_mask, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); wrapped = cudf::make_strings_column( num_strings, std::move(d_offsets), d_chars.release(), null_count, d_bitmask.release()); } @@ -1337,7 +1337,7 @@ class lists_column_wrapper : public detail::column_wrapper { lists_column_wrapper(std::initializer_list elements) : column_wrapper{} { build_from_non_nested( - std::move(cudf::test::fixed_width_column_wrapper(elements).release())); + cudf::test::fixed_width_column_wrapper(elements).release()); } /** @@ -1361,7 +1361,7 @@ class lists_column_wrapper : public detail::column_wrapper { lists_column_wrapper(InputIterator begin, InputIterator end) : column_wrapper{} { build_from_non_nested( - std::move(cudf::test::fixed_width_column_wrapper(begin, end).release())); + cudf::test::fixed_width_column_wrapper(begin, end).release()); } /** @@ -1386,7 +1386,7 @@ class lists_column_wrapper : public detail::column_wrapper { : column_wrapper{} { build_from_non_nested( - std::move(cudf::test::fixed_width_column_wrapper(elements, v).release())); + cudf::test::fixed_width_column_wrapper(elements, v).release()); } /** @@ -1413,8 +1413,8 @@ class lists_column_wrapper : public detail::column_wrapper { lists_column_wrapper(InputIterator begin, InputIterator end, ValidityIterator v) : column_wrapper{} { - build_from_non_nested(std::move( - cudf::test::fixed_width_column_wrapper(begin, end, v).release())); + build_from_non_nested( + cudf::test::fixed_width_column_wrapper(begin, end, v).release()); } /** @@ -1435,7 +1435,7 @@ class lists_column_wrapper : public detail::column_wrapper { lists_column_wrapper(std::initializer_list elements) : column_wrapper{} { build_from_non_nested( - std::move(cudf::test::strings_column_wrapper(elements.begin(), elements.end()).release())); + cudf::test::strings_column_wrapper(elements.begin(), elements.end()).release()); } /** @@ -1460,7 +1460,7 @@ class lists_column_wrapper : public detail::column_wrapper { : column_wrapper{} { build_from_non_nested( - std::move(cudf::test::strings_column_wrapper(elements.begin(), elements.end(), v).release())); + cudf::test::strings_column_wrapper(elements.begin(), elements.end(), v).release()); } /** @@ -1651,7 +1651,7 @@ class lists_column_wrapper : public detail::column_wrapper { auto data = children.empty() ? cudf::empty_like(expected_hierarchy) : cudf::concatenate(children, cudf::test::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // increment depth depth = expected_depth + 1; @@ -1756,7 +1756,7 @@ class lists_column_wrapper : public detail::column_wrapper { lists_column_view(expected_hierarchy).child()), col.null_count(), cudf::copy_bitmask( - col, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()), + col, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()), cudf::test::get_default_stream()); } diff --git a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp index 4f3c723d195..b4001babe24 100644 --- a/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp +++ b/cpp/include/cudf_test/stream_checking_resource_adaptor.hpp @@ -18,9 +18,9 @@ #include #include +#include #include -#include #include @@ -156,16 +156,4 @@ class stream_checking_resource_adaptor final : public rmm::mr::device_memory_res // cudf::test::get_default_stream() is observed. }; -/** - * @brief Convenience factory to return a `stream_checking_resource_adaptor` around the - * upstream resource `upstream`. - * - * @param upstream Reference to the upstream resource - */ -inline stream_checking_resource_adaptor make_stream_checking_resource_adaptor( - rmm::device_async_resource_ref upstream, bool error_on_invalid_stream, bool check_default_stream) -{ - return stream_checking_resource_adaptor{upstream, error_on_invalid_stream, check_default_stream}; -} - } // namespace cudf::test diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh index 5fd2403b0f2..1758790cd64 100644 --- a/cpp/include/cudf_test/tdigest_utilities.cuh +++ b/cpp/include/cudf_test/tdigest_utilities.cuh @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include @@ -171,7 +171,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv, thrust::host_vector> h_spans; h_spans.push_back({input_values.begin(), static_cast(input_values.size())}); auto spans = cudf::detail::make_device_uvector_async( - h_spans, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + h_spans, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto expected_min = cudf::make_fixed_width_column( data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED); @@ -271,7 +271,7 @@ void tdigest_simple_all_nulls_aggregation(Func op) // NOTE: an empty tdigest column still has 1 row. auto expected = cudf::tdigest::detail::make_empty_tdigest_column( - cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected); } @@ -562,12 +562,12 @@ template void tdigest_merge_empty(MergeFunc merge_op) { // 3 empty tdigests all in the same group - auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); - auto b = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); - auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + auto a = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + auto b = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + auto c = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); std::vector cols; cols.push_back(*a); cols.push_back(*b); @@ -578,7 +578,7 @@ void tdigest_merge_empty(MergeFunc merge_op) auto result = merge_op(*values, delta); auto expected = cudf::tdigest::detail::make_empty_tdigest_column( - cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result); } diff --git a/cpp/include/cudf_test/testing_main.hpp b/cpp/include/cudf_test/testing_main.hpp index 9866253a9f8..272c91133f8 100644 --- a/cpp/include/cudf_test/testing_main.hpp +++ b/cpp/include/cudf_test/testing_main.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -30,7 +31,6 @@ #include #include #include -#include #include namespace CUDF_EXPORT cudf { @@ -161,7 +161,7 @@ inline auto make_memory_resource_adaptor(cxxopts::ParseResult const& cmd_opts) { auto const rmm_mode = cmd_opts["rmm_mode"].as(); auto resource = cudf::test::create_memory_resource(rmm_mode); - rmm::mr::set_current_device_resource(resource.get()); + cudf::set_current_device_resource(resource.get()); return resource; } @@ -178,15 +178,15 @@ inline auto make_memory_resource_adaptor(cxxopts::ParseResult const& cmd_opts) */ inline auto make_stream_mode_adaptor(cxxopts::ParseResult const& cmd_opts) { - auto resource = rmm::mr::get_current_device_resource(); + auto resource = cudf::get_current_device_resource_ref(); auto const stream_mode = cmd_opts["stream_mode"].as(); auto const stream_error_mode = cmd_opts["stream_error_mode"].as(); auto const error_on_invalid_stream = (stream_error_mode == "error"); auto const check_default_stream = (stream_mode == "new_cudf_default"); - auto adaptor = cudf::test::make_stream_checking_resource_adaptor( + auto adaptor = cudf::test::stream_checking_resource_adaptor( resource, error_on_invalid_stream, check_default_stream); if ((stream_mode == "new_cudf_default") || (stream_mode == "new_testing_default")) { - rmm::mr::set_current_device_resource(&adaptor); + cudf::set_current_device_resource(&adaptor); } return adaptor; } diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h index 7c395ffee42..5f3e7efbbfe 100644 --- a/cpp/include/doxygen_groups.h +++ b/cpp/include/doxygen_groups.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ /** * @defgroup default_stream Default Stream + * @defgroup memory_resource Memory Resource Management * @defgroup cudf_classes Classes * @{ * @defgroup column_classes Column diff --git a/cpp/include/nvtext/byte_pair_encoding.hpp b/cpp/include/nvtext/byte_pair_encoding.hpp index 6559933f696..ab862df044d 100644 --- a/cpp/include/nvtext/byte_pair_encoding.hpp +++ b/cpp/include/nvtext/byte_pair_encoding.hpp @@ -21,8 +21,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT nvtext { @@ -49,7 +48,7 @@ struct bpe_merge_pairs { */ bpe_merge_pairs(std::unique_ptr&& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Construct a new bpe merge pairs object @@ -60,7 +59,7 @@ struct bpe_merge_pairs { */ bpe_merge_pairs(cudf::strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); ~bpe_merge_pairs(); bpe_merge_pairs(); @@ -98,7 +97,7 @@ struct bpe_merge_pairs { std::unique_ptr load_merge_pairs( cudf::strings_column_view const& merge_pairs, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Byte pair encode the input strings. @@ -130,7 +129,7 @@ std::unique_ptr byte_pair_encoding( cudf::strings_column_view const& input, bpe_merge_pairs const& merges_pairs, cudf::string_scalar const& separator = cudf::string_scalar(" "), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/detail/generate_ngrams.hpp b/cpp/include/nvtext/detail/generate_ngrams.hpp index 7c49421560d..ae48fed4e79 100644 --- a/cpp/include/nvtext/detail/generate_ngrams.hpp +++ b/cpp/include/nvtext/detail/generate_ngrams.hpp @@ -15,10 +15,11 @@ */ #pragma once +#include + #include #include -#include namespace CUDF_EXPORT nvtext { namespace detail { diff --git a/cpp/include/nvtext/detail/load_hash_file.hpp b/cpp/include/nvtext/detail/load_hash_file.hpp index 438a4a9afdd..1334cbf47ea 100644 --- a/cpp/include/nvtext/detail/load_hash_file.hpp +++ b/cpp/include/nvtext/detail/load_hash_file.hpp @@ -16,11 +16,11 @@ #pragma once #include +#include #include #include -#include #include #include diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp index 57ad008f1a9..5e5c78e993f 100644 --- a/cpp/include/nvtext/detail/tokenize.hpp +++ b/cpp/include/nvtext/detail/tokenize.hpp @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include namespace CUDF_EXPORT nvtext { namespace detail { diff --git a/cpp/include/nvtext/edit_distance.hpp b/cpp/include/nvtext/edit_distance.hpp index 102f2cffa18..723ba310a1e 100644 --- a/cpp/include/nvtext/edit_distance.hpp +++ b/cpp/include/nvtext/edit_distance.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include //! NVText APIs namespace CUDF_EXPORT nvtext { @@ -64,7 +63,7 @@ std::unique_ptr edit_distance( cudf::strings_column_view const& input, cudf::strings_column_view const& targets, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Compute the edit distance between all the strings in the input column. @@ -102,7 +101,7 @@ std::unique_ptr edit_distance( std::unique_ptr edit_distance_matrix( cudf::strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/generate_ngrams.hpp b/cpp/include/nvtext/generate_ngrams.hpp index ce79d985a49..54282b8ef3c 100644 --- a/cpp/include/nvtext/generate_ngrams.hpp +++ b/cpp/include/nvtext/generate_ngrams.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT nvtext { /** @@ -62,7 +61,7 @@ std::unique_ptr generate_ngrams( cudf::size_type ngrams, cudf::string_scalar const& separator, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Generates ngrams of characters within each string @@ -91,7 +90,7 @@ std::unique_ptr generate_character_ngrams( cudf::strings_column_view const& input, cudf::size_type ngrams = 2, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Hashes ngrams of characters within each string @@ -126,7 +125,7 @@ std::unique_ptr hash_character_ngrams( cudf::strings_column_view const& input, cudf::size_type ngrams = 5, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/jaccard.hpp b/cpp/include/nvtext/jaccard.hpp index 3c3486c079e..e0b924ac658 100644 --- a/cpp/include/nvtext/jaccard.hpp +++ b/cpp/include/nvtext/jaccard.hpp @@ -18,8 +18,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT nvtext { /** @@ -76,7 +75,7 @@ std::unique_ptr jaccard_index( cudf::strings_column_view const& input2, cudf::size_type width, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/minhash.hpp b/cpp/include/nvtext/minhash.hpp index fc28ecfb199..7c909f1a948 100644 --- a/cpp/include/nvtext/minhash.hpp +++ b/cpp/include/nvtext/minhash.hpp @@ -17,13 +17,13 @@ #include #include +#include #include #include #include +#include #include -#include - namespace CUDF_EXPORT nvtext { /** * @addtogroup nvtext_minhash @@ -56,7 +56,7 @@ std::unique_ptr minhash( cudf::numeric_scalar seed = 0, cudf::size_type width = 4, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the minhash values for each string per seed @@ -73,7 +73,7 @@ std::unique_ptr minhash( * * @throw std::invalid_argument if the width < 2 * @throw std::invalid_argument if seeds is empty - * @throw std::overflow_error if `seeds * input.size()` exceeds the column size limit + * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit * * @param input Strings column to compute minhash * @param seeds Seed values used for the hash algorithm @@ -88,7 +88,7 @@ std::unique_ptr minhash( cudf::device_span seeds, cudf::size_type width = 4, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the minhash value for each string @@ -117,7 +117,7 @@ std::unique_ptr minhash64( cudf::numeric_scalar seed = 0, cudf::size_type width = 4, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the minhash values for each string per seed @@ -134,7 +134,7 @@ std::unique_ptr minhash64( * * @throw std::invalid_argument if the width < 2 * @throw std::invalid_argument if seeds is empty - * @throw std::overflow_error if `seeds * input.size()` exceeds the column size limit + * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit * * @param input Strings column to compute minhash * @param seeds Seed values used for the hash algorithm @@ -149,7 +149,63 @@ std::unique_ptr minhash64( cudf::device_span seeds, cudf::size_type width = 4, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** + * @brief Returns the minhash values for each row of strings per seed + * + * Hash values are computed from each string in each row and the + * minimum hash value is returned for each row for each seed. + * Each row of the output list column are seed results for the corresponding + * input row. The order of the elements in each row match the order of + * the seeds provided in the `seeds` parameter. + * + * This function uses MurmurHash3_x86_32 for the hash algorithm. + * + * Any null row entries result in corresponding null output rows. + * + * @throw std::invalid_argument if seeds is empty + * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit + * + * @param input Lists column of strings to compute minhash + * @param seeds Seed values used for the hash algorithm + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return List column of minhash values for each string per seed + */ +std::unique_ptr word_minhash( + cudf::lists_column_view const& input, + cudf::device_span seeds, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + +/** + * @brief Returns the minhash values for each row of strings per seed + * + * Hash values are computed from each string in each row and the + * minimum hash value is returned for each row for each seed. + * Each row of the output list column are seed results for the corresponding + * input row. The order of the elements in each row match the order of + * the seeds provided in the `seeds` parameter. + * + * This function uses MurmurHash3_x64_128 for the hash algorithm though + * only the first 64-bits of the hash are used in computing the output. + * + * Any null row entries result in corresponding null output rows. + * + * @throw std::invalid_argument if seeds is empty + * @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit + * + * @param input Lists column of strings to compute minhash + * @param seeds Seed values used for the hash algorithm + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return List column of minhash values for each string per seed + */ +std::unique_ptr word_minhash64( + cudf::lists_column_view const& input, + cudf::device_span seeds, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/ngrams_tokenize.hpp b/cpp/include/nvtext/ngrams_tokenize.hpp index 1048cd4abad..e3b3c23a7a9 100644 --- a/cpp/include/nvtext/ngrams_tokenize.hpp +++ b/cpp/include/nvtext/ngrams_tokenize.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT nvtext { /** @@ -84,7 +83,7 @@ std::unique_ptr ngrams_tokenize( cudf::string_scalar const& delimiter, cudf::string_scalar const& separator, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/normalize.hpp b/cpp/include/nvtext/normalize.hpp index ec0b8981f8f..74325f4a406 100644 --- a/cpp/include/nvtext/normalize.hpp +++ b/cpp/include/nvtext/normalize.hpp @@ -18,8 +18,7 @@ #include #include #include - -#include +#include //! NVText APIs namespace CUDF_EXPORT nvtext { @@ -55,7 +54,7 @@ namespace CUDF_EXPORT nvtext { std::unique_ptr normalize_spaces( cudf::strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Normalizes strings characters for tokenizing. @@ -106,7 +105,7 @@ std::unique_ptr normalize_characters( cudf::strings_column_view const& input, bool do_lower_case, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/replace.hpp b/cpp/include/nvtext/replace.hpp index eedcd3976ca..bbd0503379b 100644 --- a/cpp/include/nvtext/replace.hpp +++ b/cpp/include/nvtext/replace.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include //! NVText APIs namespace CUDF_EXPORT nvtext { @@ -91,7 +90,7 @@ std::unique_ptr replace_tokens( cudf::strings_column_view const& replacements, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Removes tokens whose lengths are less than a specified number of characters. @@ -140,7 +139,7 @@ std::unique_ptr filter_tokens( cudf::string_scalar const& replacement = cudf::string_scalar{""}, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/stemmer.hpp b/cpp/include/nvtext/stemmer.hpp index 4607c42ceed..55a4124bfd0 100644 --- a/cpp/include/nvtext/stemmer.hpp +++ b/cpp/include/nvtext/stemmer.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT nvtext { /** @@ -83,7 +82,7 @@ std::unique_ptr is_letter( letter_type ltype, cudf::size_type character_index, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns boolean column indicating if character at `indices[i]` of `input[i]` @@ -136,7 +135,7 @@ std::unique_ptr is_letter( letter_type ltype, cudf::column_view const& indices, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the Porter Stemmer measurements of a strings column. @@ -170,7 +169,7 @@ std::unique_ptr is_letter( std::unique_ptr porter_stemmer_measure( cudf::strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/subword_tokenize.hpp b/cpp/include/nvtext/subword_tokenize.hpp index b5636c8401b..c4210699975 100644 --- a/cpp/include/nvtext/subword_tokenize.hpp +++ b/cpp/include/nvtext/subword_tokenize.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT nvtext { @@ -68,7 +67,7 @@ struct hashed_vocabulary { */ std::unique_ptr load_vocabulary_file( std::string const& filename_hashed_vocabulary, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Result object for the subword_tokenize functions. @@ -158,7 +157,7 @@ tokenizer_result subword_tokenize( uint32_t stride, bool do_lower_case, bool do_truncate, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/tokenize.hpp b/cpp/include/nvtext/tokenize.hpp index 833b53efcde..e61601c6fea 100644 --- a/cpp/include/nvtext/tokenize.hpp +++ b/cpp/include/nvtext/tokenize.hpp @@ -19,8 +19,7 @@ #include #include #include - -#include +#include namespace CUDF_EXPORT nvtext { /** @@ -63,7 +62,7 @@ std::unique_ptr tokenize( cudf::strings_column_view const& input, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a single column of strings by tokenizing the input strings @@ -99,7 +98,7 @@ std::unique_ptr tokenize( cudf::strings_column_view const& input, cudf::strings_column_view const& delimiters, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the number of tokens in each string of a strings column. @@ -130,7 +129,7 @@ std::unique_ptr count_tokens( cudf::strings_column_view const& input, cudf::string_scalar const& delimiter = cudf::string_scalar{""}, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the number of tokens in each string of a strings column @@ -162,7 +161,7 @@ std::unique_ptr count_tokens( cudf::strings_column_view const& input, cudf::strings_column_view const& delimiters, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns a single column of strings by converting each character to a string. @@ -188,7 +187,7 @@ std::unique_ptr count_tokens( std::unique_ptr character_tokenize( cudf::strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Creates a strings column from a strings column of tokens and an @@ -229,7 +228,7 @@ std::unique_ptr detokenize( cudf::column_view const& row_indices, cudf::string_scalar const& separator = cudf::string_scalar(" "), rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Vocabulary object to be used with nvtext::tokenize_with_vocabulary @@ -251,7 +250,7 @@ struct tokenize_vocabulary { */ tokenize_vocabulary(cudf::strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); ~tokenize_vocabulary(); struct tokenize_vocabulary_impl; @@ -274,7 +273,7 @@ struct tokenize_vocabulary { std::unique_ptr load_vocabulary( cudf::strings_column_view const& input, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** * @brief Returns the token ids for the input string by looking up each delimited @@ -307,7 +306,7 @@ std::unique_ptr tokenize_with_vocabulary( cudf::string_scalar const& delimiter, cudf::size_type default_id = -1, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of tokenize group } // namespace CUDF_EXPORT nvtext diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp index 3ac8547baad..a6c878efbbc 100644 --- a/cpp/src/binaryop/binaryop.cpp +++ b/cpp/src/binaryop/binaryop.cpp @@ -35,13 +35,13 @@ #include #include #include +#include #include #include #include -#include -#include +#include #include @@ -173,7 +173,7 @@ template void fixed_point_binary_operation_validation(binary_operator op, Lhs lhs, Rhs rhs, - thrust::optional output_type = {}) + cuda::std::optional output_type = {}) { CUDF_EXPECTS((is_fixed_point(lhs) or is_fixed_point(rhs)), "One of the inputs must have fixed_point data_type."); diff --git a/cpp/src/binaryop/compiled/binary_ops.cu b/cpp/src/binaryop/compiled/binary_ops.cu index 7a0bc312434..3c558f1e264 100644 --- a/cpp/src/binaryop/compiled/binary_ops.cu +++ b/cpp/src/binaryop/compiled/binary_ops.cu @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -116,7 +116,7 @@ scalar_as_column_view::return_type scalar_as_column_view::operator() #include #include +#include #include -#include #include diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu index d0faeea8336..4ca05f9c335 100644 --- a/cpp/src/bitmask/null_mask.cu +++ b/cpp/src/bitmask/null_mask.cu @@ -27,13 +27,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index 90f719b9516..973b1ffd133 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -30,12 +30,12 @@ #include #include #include +#include #include #include #include #include -#include #include diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp index 0260068d4db..482413d0ccb 100644 --- a/cpp/src/column/column_factories.cpp +++ b/cpp/src/column/column_factories.cpp @@ -23,10 +23,9 @@ #include #include #include +#include #include -#include - #include namespace cudf { diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu index bad20d6817c..60405ae7af1 100644 --- a/cpp/src/column/column_factories.cu +++ b/cpp/src/column/column_factories.cu @@ -20,11 +20,11 @@ #include #include #include -#include - -#include +#include +#include #include +#include namespace cudf { @@ -57,15 +57,26 @@ std::unique_ptr column_from_scalar_dispatch::operator() const&>(value); + if (!value.is_valid(stream)) { + return make_strings_column( + size, + make_column_from_scalar(numeric_scalar(0), size + 1, stream, mr), + rmm::device_buffer{}, + size, + cudf::detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr)); + } + + auto& ss = static_cast const&>(value); + auto const d_str = ss.value(stream); // no actual data is copied // fill the column with the scalar - auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr); - - return output; + rmm::device_uvector indices(size, stream); + auto const row_value = + d_str.empty() ? cudf::strings::detail::string_index_pair{"", 0} + : cudf::strings::detail::string_index_pair{d_str.data(), d_str.size_bytes()}; + thrust::uninitialized_fill( + rmm::exec_policy_nosync(stream), indices.begin(), indices.end(), row_value); + return cudf::strings::detail::make_strings_column(indices.begin(), indices.end(), stream, mr); } template <> diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu index ac9931335ff..b8e140f1fa5 100644 --- a/cpp/src/copying/concatenate.cu +++ b/cpp/src/copying/concatenate.cu @@ -32,11 +32,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -82,7 +82,7 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi [](auto const& col) { return *col; }); auto d_views = - make_device_uvector_async(device_views, stream, rmm::mr::get_current_device_resource()); + make_device_uvector_async(device_views, stream, cudf::get_current_device_resource_ref()); // Compute the partition offsets auto offsets = cudf::detail::make_host_vector(views.size() + 1, stream); @@ -94,7 +94,7 @@ auto create_device_views(host_span views, rmm::cuda_stream_vi [](auto const& col) { return col.size(); }, thrust::plus{}); auto d_offsets = - make_device_uvector_async(offsets, stream, rmm::mr::get_current_device_resource()); + make_device_uvector_async(offsets, stream, cudf::get_current_device_resource_ref()); auto const output_size = offsets.back(); return std::make_tuple( diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index 95544742fb7..15aa31ff5ee 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -28,10 +28,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -1939,8 +1939,8 @@ struct contiguous_split_state { std::transform(h_buf_sizes, h_buf_sizes + num_partitions, std::back_inserter(out_buffers), - [stream = stream, - mr = mr.value_or(rmm::mr::get_current_device_resource())](std::size_t bytes) { + [stream = stream, mr = mr.value_or(cudf::get_current_device_resource_ref())]( + std::size_t bytes) { return rmm::device_buffer{bytes, stream, mr}; }); } diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 98ee6aa8f68..d60fb5ce110 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -23,10 +23,10 @@ #include #include #include +#include #include #include -#include #include @@ -143,6 +143,12 @@ std::unique_ptr empty_like(column_view const& input) { CUDF_FUNC_RANGE(); + // test_dataframe.py passes an EMPTY column type here; + // this causes is_nested to throw an error since it uses the type-dispatcher + if ((input.type().id() == type_id::EMPTY) || !cudf::is_nested(input.type())) { + return make_empty_column(input.type()); + } + std::vector> children; std::transform(input.child_begin(), input.child_end(), diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index e86a1f8d6f1..e5e2514d035 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -25,13 +25,12 @@ #include #include #include +#include #include #include #include #include -#include -#include #include #include @@ -180,7 +179,7 @@ std::unique_ptr scatter_gather_based_if_else(cudf::column_view const& lh out_of_bounds_policy::DONT_CHECK, negative_index_policy::NOT_ALLOWED, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto result = cudf::detail::scatter( table_view{std::vector{scatter_src_lhs->get_column(0).view()}}, diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu index dd18f99a3c8..bffb48a8ec0 100644 --- a/cpp/src/copying/copy_range.cu +++ b/cpp/src/copying/copy_range.cu @@ -31,11 +31,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -100,7 +100,7 @@ struct out_of_place_copy_range_dispatch { cudf::size_type source_end, cudf::size_type target_begin, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto p_ret = std::make_unique(target, stream, mr); if ((!p_ret->nullable()) && source.has_nulls(source_begin, source_end)) { @@ -157,7 +157,7 @@ std::unique_ptr out_of_place_copy_range_dispatch::operator()view()); auto source_matched = cudf::dictionary::detail::set_keys( - dict_source, target_view.keys(), stream, rmm::mr::get_current_device_resource()); + dict_source, target_view.keys(), stream, cudf::get_current_device_resource_ref()); auto const source_view = cudf::dictionary_column_view(source_matched->view()); // build the new indices by calling in_place_copy_range on just the indices diff --git a/cpp/src/copying/gather.cu b/cpp/src/copying/gather.cu index 5eb039419df..d1ab39d665d 100644 --- a/cpp/src/copying/gather.cu +++ b/cpp/src/copying/gather.cu @@ -23,9 +23,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu index b8860da479c..29a28f81d1a 100644 --- a/cpp/src/copying/get_element.cu +++ b/cpp/src/copying/get_element.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp index 819ad593c0a..1282eec6c44 100644 --- a/cpp/src/copying/pack.cpp +++ b/cpp/src/copying/pack.cpp @@ -18,9 +18,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace detail { diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu index 581d0a00924..684deabf038 100644 --- a/cpp/src/copying/purge_nonempty_nulls.cu +++ b/cpp/src/copying/purge_nonempty_nulls.cu @@ -18,8 +18,7 @@ #include #include #include - -#include +#include #include #include diff --git a/cpp/src/copying/reverse.cu b/cpp/src/copying/reverse.cu index d3d42e35e26..effbb59f223 100644 --- a/cpp/src/copying/reverse.cu +++ b/cpp/src/copying/reverse.cu @@ -21,12 +21,11 @@ #include #include #include +#include #include #include #include -#include -#include #include #include diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu index ba00527f6b6..dc03856c7cf 100644 --- a/cpp/src/copying/sample.cu +++ b/cpp/src/copying/sample.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu index 993ee074f14..cd14eb96ec4 100644 --- a/cpp/src/copying/scatter.cu +++ b/cpp/src/copying/scatter.cu @@ -33,10 +33,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -198,7 +198,7 @@ struct column_scalar_scatterer_impl { mr); auto dict_view = dictionary_column_view(dict_target->view()); auto scalar_index = dictionary::detail::get_index( - dict_view, source.get(), stream, rmm::mr::get_current_device_resource()); + dict_view, source.get(), stream, cudf::get_current_device_resource_ref()); auto scalar_iter = thrust::make_permutation_iterator( indexalator_factory::make_input_iterator(*scalar_index), thrust::make_constant_iterator(0)); auto new_indices = std::make_unique(dict_view.get_indices_annotated(), stream, mr); @@ -271,7 +271,7 @@ struct column_scalar_scatterer_impl { auto scatter_functor = column_scalar_scatterer{}; auto fields_iter_begin = make_counting_transform_iterator(0, [&](auto const& i) { auto row_slr = detail::get_element( - typed_s->view().column(i), 0, stream, rmm::mr::get_current_device_resource()); + typed_s->view().column(i), 0, stream, cudf::get_current_device_resource_ref()); return type_dispatcher(row_slr->type(), scatter_functor, *row_slr, @@ -416,7 +416,7 @@ std::unique_ptr boolean_mask_scatter(column_view const& input, // The scatter map is actually a table with only one column, which is scatter map. auto scatter_map = detail::apply_boolean_mask( - table_view{{indices->view()}}, boolean_mask, stream, rmm::mr::get_current_device_resource()); + table_view{{indices->view()}}, boolean_mask, stream, cudf::get_current_device_resource_ref()); auto output_table = detail::scatter( table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr); diff --git a/cpp/src/copying/segmented_shift.cu b/cpp/src/copying/segmented_shift.cu index b7abc60f240..6ea5c5ab38a 100644 --- a/cpp/src/copying/segmented_shift.cu +++ b/cpp/src/copying/segmented_shift.cu @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu index 91254f21170..674f6dbd28a 100644 --- a/cpp/src/copying/shift.cu +++ b/cpp/src/copying/shift.cu @@ -25,13 +25,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index 7629cad79a9..ddb0dbcd96d 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -29,13 +29,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -580,142 +580,167 @@ std::unique_ptr extract_quarter(column_view const& column, std::unique_ptr ceil_datetimes(column_view const& column, rounding_frequency freq, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::round_general( - detail::rounding_function::CEIL, freq, column, cudf::get_default_stream(), mr); + return detail::round_general(detail::rounding_function::CEIL, freq, column, stream, mr); } std::unique_ptr floor_datetimes(column_view const& column, rounding_frequency freq, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::round_general( - detail::rounding_function::FLOOR, freq, column, cudf::get_default_stream(), mr); + return detail::round_general(detail::rounding_function::FLOOR, freq, column, stream, mr); } std::unique_ptr round_datetimes(column_view const& column, rounding_frequency freq, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::round_general( - detail::rounding_function::ROUND, freq, column, cudf::get_default_stream(), mr); + return detail::round_general(detail::rounding_function::ROUND, freq, column, stream, mr); } -std::unique_ptr extract_year(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr extract_year(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_year(column, cudf::get_default_stream(), mr); + return detail::extract_year(column, stream, mr); } -std::unique_ptr extract_month(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr extract_month(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_month(column, cudf::get_default_stream(), mr); + return detail::extract_month(column, stream, mr); } -std::unique_ptr extract_day(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr extract_day(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_day(column, cudf::get_default_stream(), mr); + return detail::extract_day(column, stream, mr); } std::unique_ptr extract_weekday(column_view const& column, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_weekday(column, cudf::get_default_stream(), mr); + return detail::extract_weekday(column, stream, mr); } -std::unique_ptr extract_hour(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr extract_hour(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_hour(column, cudf::get_default_stream(), mr); + return detail::extract_hour(column, stream, mr); } -std::unique_ptr extract_minute(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr extract_minute(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_minute(column, cudf::get_default_stream(), mr); + return detail::extract_minute(column, stream, mr); } -std::unique_ptr extract_second(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr extract_second(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_second(column, cudf::get_default_stream(), mr); + return detail::extract_second(column, stream, mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_millisecond_fraction(column, cudf::get_default_stream(), mr); + return detail::extract_millisecond_fraction(column, stream, mr); } std::unique_ptr extract_microsecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_microsecond_fraction(column, cudf::get_default_stream(), mr); + return detail::extract_microsecond_fraction(column, stream, mr); } std::unique_ptr extract_nanosecond_fraction(column_view const& column, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_nanosecond_fraction(column, cudf::get_default_stream(), mr); + return detail::extract_nanosecond_fraction(column, stream, mr); } std::unique_ptr last_day_of_month(column_view const& column, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::last_day_of_month(column, cudf::get_default_stream(), mr); + return detail::last_day_of_month(column, stream, mr); } -std::unique_ptr day_of_year(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr day_of_year(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::day_of_year(column, cudf::get_default_stream(), mr); + return detail::day_of_year(column, stream, mr); } std::unique_ptr add_calendrical_months(cudf::column_view const& timestamp_column, cudf::column_view const& months_column, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::add_calendrical_months( - timestamp_column, months_column, cudf::get_default_stream(), mr); + return detail::add_calendrical_months(timestamp_column, months_column, stream, mr); } std::unique_ptr add_calendrical_months(cudf::column_view const& timestamp_column, cudf::scalar const& months, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::add_calendrical_months(timestamp_column, months, cudf::get_default_stream(), mr); + return detail::add_calendrical_months(timestamp_column, months, stream, mr); } -std::unique_ptr is_leap_year(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr is_leap_year(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::is_leap_year(column, cudf::get_default_stream(), mr); + return detail::is_leap_year(column, stream, mr); } -std::unique_ptr days_in_month(column_view const& column, rmm::device_async_resource_ref mr) +std::unique_ptr days_in_month(column_view const& column, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::days_in_month(column, cudf::get_default_stream(), mr); + return detail::days_in_month(column, stream, mr); } std::unique_ptr extract_quarter(column_view const& column, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::extract_quarter(column, cudf::get_default_stream(), mr); + return detail::extract_quarter(column, stream, mr); } } // namespace datetime diff --git a/cpp/src/datetime/timezone.cpp b/cpp/src/datetime/timezone.cpp index 7ca1b51df98..cf239297255 100644 --- a/cpp/src/datetime/timezone.cpp +++ b/cpp/src/datetime/timezone.cpp @@ -18,8 +18,7 @@ #include #include #include - -#include +#include #include #include @@ -381,11 +380,11 @@ static int64_t get_transition_time(dst_transition_s const& trans, int year) std::unique_ptr
make_timezone_transition_table(std::optional tzif_dir, std::string_view timezone_name, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::make_timezone_transition_table( - tzif_dir, timezone_name, cudf::get_default_stream(), mr); + return detail::make_timezone_transition_table(tzif_dir, timezone_name, stream, mr); } namespace detail { diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu index 0ed9006f88b..565055009ba 100644 --- a/cpp/src/dictionary/add_keys.cu +++ b/cpp/src/dictionary/add_keys.cu @@ -30,11 +30,9 @@ #include #include #include +#include #include -#include -#include - namespace cudf { namespace dictionary { namespace detail { @@ -61,7 +59,7 @@ std::unique_ptr add_keys(dictionary_column_view const& dictionary_column // first, concatenate the keys together // [a,b,c,d,f] + [d,b,e] = [a,b,c,d,f,d,b,e] auto combined_keys = cudf::detail::concatenate( - std::vector{old_keys, new_keys}, stream, rmm::mr::get_current_device_resource()); + std::vector{old_keys, new_keys}, stream, cudf::get_current_device_resource_ref()); // Drop duplicates from the combined keys, then sort the result. // sort(distinct([a,b,c,d,f,d,b,e])) = [a,b,c,d,e,f] diff --git a/cpp/src/dictionary/decode.cu b/cpp/src/dictionary/decode.cu index 9f05593fc40..fb013586999 100644 --- a/cpp/src/dictionary/decode.cu +++ b/cpp/src/dictionary/decode.cu @@ -23,9 +23,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace dictionary { diff --git a/cpp/src/dictionary/detail/concatenate.cu b/cpp/src/dictionary/detail/concatenate.cu index 72828309425..b3a8bb4cd20 100644 --- a/cpp/src/dictionary/detail/concatenate.cu +++ b/cpp/src/dictionary/detail/concatenate.cu @@ -27,13 +27,12 @@ #include #include #include +#include #include #include #include #include -#include -#include #include #include @@ -120,7 +119,7 @@ struct compute_children_offsets_fn { return offsets_pair{lhs.first + rhs.first, lhs.second + rhs.second}; }); return cudf::detail::make_device_uvector_sync( - offsets, stream, rmm::mr::get_current_device_resource()); + offsets, stream, cudf::get_current_device_resource_ref()); } private: @@ -229,7 +228,7 @@ std::unique_ptr concatenate(host_span columns, return keys; }); auto all_keys = - cudf::detail::concatenate(keys_views, stream, rmm::mr::get_current_device_resource()); + cudf::detail::concatenate(keys_views, stream, cudf::get_current_device_resource_ref()); // sort keys and remove duplicates; // this becomes the keys child for the output dictionary column diff --git a/cpp/src/dictionary/detail/merge.cu b/cpp/src/dictionary/detail/merge.cu index c65aa5d1101..0af71397196 100644 --- a/cpp/src/dictionary/detail/merge.cu +++ b/cpp/src/dictionary/detail/merge.cu @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/dictionary/dictionary_factories.cu b/cpp/src/dictionary/dictionary_factories.cu index 0617d71fa51..3e0c98d36ea 100644 --- a/cpp/src/dictionary/dictionary_factories.cu +++ b/cpp/src/dictionary/dictionary_factories.cu @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include namespace cudf { namespace { diff --git a/cpp/src/dictionary/encode.cu b/cpp/src/dictionary/encode.cu index ff29d83b80a..c8ccb511e8f 100644 --- a/cpp/src/dictionary/encode.cu +++ b/cpp/src/dictionary/encode.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace dictionary { diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu index 35387efa56b..119f43a4ae9 100644 --- a/cpp/src/dictionary/remove_keys.cu +++ b/cpp/src/dictionary/remove_keys.cu @@ -27,11 +27,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index bc17dfd4bab..fe0b103cc55 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -25,10 +25,10 @@ #include #include #include +#include #include #include -#include namespace cudf { namespace dictionary { @@ -132,7 +132,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, input, make_column_from_scalar(replacement, 1, stream)->view(), stream, mr); auto const input_view = dictionary_column_view(input_matched->view()); auto const scalar_index = - get_index(input_view, replacement, stream, rmm::mr::get_current_device_resource()); + get_index(input_view, replacement, stream, cudf::get_current_device_resource_ref()); // now build the new indices by doing replace-null on the updated indices auto const input_indices = input_view.get_indices_annotated(); diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu index 231619836f9..04e2c17635d 100644 --- a/cpp/src/dictionary/search.cu +++ b/cpp/src/dictionary/search.cu @@ -20,13 +20,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu index cf40fda5971..be5c3dd6a26 100644 --- a/cpp/src/dictionary/set_keys.cu +++ b/cpp/src/dictionary/set_keys.cu @@ -31,11 +31,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -185,7 +185,7 @@ std::vector> match_dictionaries( { std::vector keys(input.size()); std::transform(input.begin(), input.end(), keys.begin(), [](auto& col) { return col.keys(); }); - auto new_keys = cudf::detail::concatenate(keys, stream, rmm::mr::get_current_device_resource()); + auto new_keys = cudf::detail::concatenate(keys, stream, cudf::get_current_device_resource_ref()); auto keys_view = new_keys->view(); std::vector> result(input.size()); std::transform(input.begin(), input.end(), result.begin(), [keys_view, mr, stream](auto& col) { diff --git a/cpp/src/filling/calendrical_month_sequence.cu b/cpp/src/filling/calendrical_month_sequence.cu index f984f307ddd..f5ad211bd0d 100644 --- a/cpp/src/filling/calendrical_month_sequence.cu +++ b/cpp/src/filling/calendrical_month_sequence.cu @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include -#include namespace cudf { namespace detail { diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu index 1fc9ed31c09..cfb209c0569 100644 --- a/cpp/src/filling/fill.cu +++ b/cpp/src/filling/fill.cu @@ -32,12 +32,11 @@ #include #include #include +#include #include #include #include -#include -#include #include @@ -175,7 +174,7 @@ std::unique_ptr out_of_place_fill_range_dispatch::operator()view(), value, stream, rmm::mr::get_current_device_resource()); + target_matched->view(), value, stream, cudf::get_current_device_resource_ref()); // now call fill using just the indices column and the new index auto new_indices = cudf::type_dispatcher(target_indices.type(), diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu index ff4005d9366..2e78954d78a 100644 --- a/cpp/src/filling/repeat.cu +++ b/cpp/src/filling/repeat.cu @@ -27,13 +27,12 @@ #include #include #include +#include #include #include #include #include -#include -#include #include #include diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu index ee1745b8498..d8fd993bbd1 100644 --- a/cpp/src/filling/sequence.cu +++ b/cpp/src/filling/sequence.cu @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index 82c3c08b501..80849357811 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -18,10 +18,9 @@ #include #include +#include #include -#include - #include #include diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index e43dfcb4d98..cc0682b68b9 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -35,12 +35,11 @@ #include #include #include +#include #include #include #include -#include -#include #include @@ -284,7 +283,7 @@ std::pair, std::unique_ptr
> groupby::replace_nulls std::back_inserter(results), [&](auto i) { bool nullable = values.column(i).nullable(); - auto final_mr = nullable ? rmm::mr::get_current_device_resource() : mr; + auto final_mr = nullable ? cudf::get_current_device_resource_ref() : mr; auto grouped_values = helper().grouped_values(values.column(i), stream, final_mr); return nullable ? detail::group_replace_nulls( *grouped_values, group_labels, replace_policies[i], stream, mr) @@ -331,7 +330,7 @@ std::pair, std::unique_ptr
> groupby::shift( std::back_inserter(results), [&](size_type i) { auto grouped_values = - helper().grouped_values(values.column(i), stream, rmm::mr::get_current_device_resource()); + helper().grouped_values(values.column(i), stream, cudf::get_current_device_resource_ref()); return cudf::detail::segmented_shift( grouped_values->view(), group_offsets, offsets[i], fill_values[i].get(), stream, mr); }); diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 5fe4a5eb30f..f9a80a048b5 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -39,11 +39,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -401,7 +401,7 @@ void sparse_to_dense_results(table_view const& keys, rmm::device_async_resource_ref mr) { auto row_bitmask = - cudf::detail::bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first; + cudf::detail::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first; bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; bitmask_type const* row_bitmask_ptr = skip_key_rows_with_nulls ? static_cast(row_bitmask.data()) : nullptr; @@ -475,13 +475,13 @@ void compute_single_pass_aggs(table_view const& keys, auto d_sparse_table = mutable_table_device_view::create(sparse_table, stream); auto d_values = table_device_view::create(flattened_values, stream); auto const d_aggs = cudf::detail::make_device_uvector_async( - agg_kinds, stream, rmm::mr::get_current_device_resource()); + agg_kinds, stream, cudf::get_current_device_resource_ref()); auto const skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE; auto row_bitmask = skip_key_rows_with_nulls - ? cudf::detail::bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first + ? cudf::detail::bitmask_and(keys, stream, cudf::get_current_device_resource_ref()).first : rmm::device_buffer{}; thrust::for_each_n( @@ -568,15 +568,16 @@ std::unique_ptr
groupby(table_view const& keys, cudf::detail::result_cache sparse_results(requests.size()); auto const comparator_helper = [&](auto const d_key_equal) { - auto const set = cuco::static_set{num_keys, - 0.5, // desired load factor - cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, - d_key_equal, - probing_scheme_type{d_row_hash}, - cuco::thread_scope_device, - cuco::storage<1>{}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto const set = cuco::static_set{ + num_keys, + 0.5, // desired load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_key_equal, + probing_scheme_type{d_row_hash}, + cuco::thread_scope_device, + cuco::storage<1>{}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; // Compute all single pass aggs first compute_single_pass_aggs(keys, diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index ba59616babe..a9085a1f1fd 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -35,9 +35,9 @@ #include #include #include +#include #include -#include #include #include @@ -435,7 +435,7 @@ void aggregate_result_functor::operator()(aggregation helper.num_groups(stream), null_handling, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const nulls_equal = dynamic_cast(agg)._nulls_equal; auto const nans_equal = @@ -507,7 +507,7 @@ void aggregate_result_functor::operator()(aggregation c helper.group_offsets(stream), helper.num_groups(stream), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const& merge_sets_agg = dynamic_cast(agg); cache.add_result(values, agg, diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index 057085fe85d..a13866802be 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/src/groupby/sort/group_argmax.cu b/cpp/src/groupby/sort/group_argmax.cu index a1d197b1307..7dce341130e 100644 --- a/cpp/src/groupby/sort/group_argmax.cu +++ b/cpp/src/groupby/sort/group_argmax.cu @@ -17,10 +17,10 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" #include +#include #include #include -#include #include diff --git a/cpp/src/groupby/sort/group_argmin.cu b/cpp/src/groupby/sort/group_argmin.cu index 03243bef836..c4bed330b9f 100644 --- a/cpp/src/groupby/sort/group_argmin.cu +++ b/cpp/src/groupby/sort/group_argmin.cu @@ -17,10 +17,10 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" #include +#include #include #include -#include #include diff --git a/cpp/src/groupby/sort/group_collect.cu b/cpp/src/groupby/sort/group_collect.cu index 555c5d3ad41..a1cac7ee3bc 100644 --- a/cpp/src/groupby/sort/group_collect.cu +++ b/cpp/src/groupby/sort/group_collect.cu @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_correlation.cu b/cpp/src/groupby/sort/group_correlation.cu index 152aa98a8b9..7f2102dc8ee 100644 --- a/cpp/src/groupby/sort/group_correlation.cu +++ b/cpp/src/groupby/sort/group_correlation.cu @@ -21,12 +21,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_count.cu b/cpp/src/groupby/sort/group_count.cu index 56a4943e272..2e1cb9591c4 100644 --- a/cpp/src/groupby/sort/group_count.cu +++ b/cpp/src/groupby/sort/group_count.cu @@ -18,11 +18,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_count_scan.cu b/cpp/src/groupby/sort/group_count_scan.cu index c076f21e1f8..5897cc341d4 100644 --- a/cpp/src/groupby/sort/group_count_scan.cu +++ b/cpp/src/groupby/sort/group_count_scan.cu @@ -17,11 +17,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_histogram.cu b/cpp/src/groupby/sort/group_histogram.cu index 1000ec0d470..861d801a070 100644 --- a/cpp/src/groupby/sort/group_histogram.cu +++ b/cpp/src/groupby/sort/group_histogram.cu @@ -23,10 +23,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/groupby/sort/group_m2.cu b/cpp/src/groupby/sort/group_m2.cu index 77f33486284..a17a4433d05 100644 --- a/cpp/src/groupby/sort/group_m2.cu +++ b/cpp/src/groupby/sort/group_m2.cu @@ -21,13 +21,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_max.cu b/cpp/src/groupby/sort/group_max.cu index 60b071c25ff..06a759dd25a 100644 --- a/cpp/src/groupby/sort/group_max.cu +++ b/cpp/src/groupby/sort/group_max.cu @@ -16,8 +16,9 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" +#include + #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/group_max_scan.cu b/cpp/src/groupby/sort/group_max_scan.cu index 270059cfcad..21e439a2253 100644 --- a/cpp/src/groupby/sort/group_max_scan.cu +++ b/cpp/src/groupby/sort/group_max_scan.cu @@ -16,8 +16,9 @@ #include "groupby/sort/group_scan_util.cuh" +#include + #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/group_merge_lists.cu b/cpp/src/groupby/sort/group_merge_lists.cu index 92cce1aa00e..009530a9915 100644 --- a/cpp/src/groupby/sort/group_merge_lists.cu +++ b/cpp/src/groupby/sort/group_merge_lists.cu @@ -16,11 +16,11 @@ #include #include +#include #include #include #include -#include #include diff --git a/cpp/src/groupby/sort/group_merge_m2.cu b/cpp/src/groupby/sort/group_merge_m2.cu index 4ad8fa5ff07..746c3fe3962 100644 --- a/cpp/src/groupby/sort/group_merge_m2.cu +++ b/cpp/src/groupby/sort/group_merge_m2.cu @@ -20,12 +20,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_min.cu b/cpp/src/groupby/sort/group_min.cu index 22aaf664168..f86aa14430a 100644 --- a/cpp/src/groupby/sort/group_min.cu +++ b/cpp/src/groupby/sort/group_min.cu @@ -16,8 +16,9 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" +#include + #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/group_min_scan.cu b/cpp/src/groupby/sort/group_min_scan.cu index 4ddc10a2e5a..96b7ad95a19 100644 --- a/cpp/src/groupby/sort/group_min_scan.cu +++ b/cpp/src/groupby/sort/group_min_scan.cu @@ -16,8 +16,9 @@ #include "groupby/sort/group_scan_util.cuh" +#include + #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index 1bc1eef908c..a4752b6948b 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -22,11 +22,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu index de11e70719a..348ab366762 100644 --- a/cpp/src/groupby/sort/group_nunique.cu +++ b/cpp/src/groupby/sort/group_nunique.cu @@ -18,11 +18,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_product.cu b/cpp/src/groupby/sort/group_product.cu index 83ca1059325..5e81c8513c8 100644 --- a/cpp/src/groupby/sort/group_product.cu +++ b/cpp/src/groupby/sort/group_product.cu @@ -17,10 +17,10 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" #include +#include #include #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/group_product_scan.cu b/cpp/src/groupby/sort/group_product_scan.cu index 40c53ceeff1..016f293ac5b 100644 --- a/cpp/src/groupby/sort/group_product_scan.cu +++ b/cpp/src/groupby/sort/group_product_scan.cu @@ -16,8 +16,9 @@ #include "groupby/sort/group_scan_util.cuh" +#include + #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index 3156dfaadd0..82d557b9f7e 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -24,12 +24,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -165,7 +165,7 @@ std::unique_ptr group_quantiles(column_view const& values, rmm::device_async_resource_ref mr) { auto dv_quantiles = cudf::detail::make_device_uvector_async( - quantiles, stream, rmm::mr::get_current_device_resource()); + quantiles, stream, cudf::get_current_device_resource_ref()); auto values_type = cudf::is_dictionary(values.type()) ? dictionary_column_view(values).keys().type() diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu index 0b65889f127..65bd5ac408f 100644 --- a/cpp/src/groupby/sort/group_rank_scan.cu +++ b/cpp/src/groupby/sort/group_rank_scan.cu @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -226,13 +226,13 @@ std::unique_ptr average_rank_scan(column_view const& grouped_values, group_labels, group_offsets, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto min_rank = min_rank_scan(grouped_values, value_order, group_labels, group_offsets, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto ranks = make_fixed_width_column( data_type{type_to_id()}, group_labels.size(), mask_state::UNALLOCATED, stream, mr); auto mutable_ranks = ranks->mutable_view(); diff --git a/cpp/src/groupby/sort/group_reductions.hpp b/cpp/src/groupby/sort/group_reductions.hpp index 5e76dc3135a..f8a531094c6 100644 --- a/cpp/src/groupby/sort/group_reductions.hpp +++ b/cpp/src/groupby/sort/group_reductions.hpp @@ -18,10 +18,10 @@ #include #include +#include #include #include -#include #include diff --git a/cpp/src/groupby/sort/group_replace_nulls.cu b/cpp/src/groupby/sort/group_replace_nulls.cu index 566507da230..088ed05e5eb 100644 --- a/cpp/src/groupby/sort/group_replace_nulls.cu +++ b/cpp/src/groupby/sort/group_replace_nulls.cu @@ -19,9 +19,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_scan.hpp b/cpp/src/groupby/sort/group_scan.hpp index 6f2daae5f9d..b5d8ce23a97 100644 --- a/cpp/src/groupby/sort/group_scan.hpp +++ b/cpp/src/groupby/sort/group_scan.hpp @@ -18,10 +18,10 @@ #include #include +#include #include #include -#include #include diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index b360ba2c45d..86835ea8a67 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -29,12 +29,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 5e892710d3b..2358f47bbbb 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -26,11 +26,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_std.cu b/cpp/src/groupby/sort/group_std.cu index 70f64186f21..86ee20dbbe2 100644 --- a/cpp/src/groupby/sort/group_std.cu +++ b/cpp/src/groupby/sort/group_std.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/groupby/sort/group_sum.cu b/cpp/src/groupby/sort/group_sum.cu index 316b6f395bb..fbbc9b5fd15 100644 --- a/cpp/src/groupby/sort/group_sum.cu +++ b/cpp/src/groupby/sort/group_sum.cu @@ -17,10 +17,10 @@ #include "groupby/sort/group_single_pass_reduction_util.cuh" #include +#include #include #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/group_sum_scan.cu b/cpp/src/groupby/sort/group_sum_scan.cu index 01c4d0c2c4a..d3af8c8794a 100644 --- a/cpp/src/groupby/sort/group_sum_scan.cu +++ b/cpp/src/groupby/sort/group_sum_scan.cu @@ -16,8 +16,9 @@ #include "groupby/sort/group_scan_util.cuh" +#include + #include -#include namespace cudf { namespace groupby { diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index f211c61b3b7..62bceccdf5f 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -33,9 +33,9 @@ #include #include #include +#include #include -#include #include @@ -145,7 +145,7 @@ void scan_result_functor::operator()(aggregation const& agg) return cudf::detail::sequence(group_labels.size(), *cudf::make_fixed_width_scalar(size_type{0}, stream), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } else { auto sort_order = (rank_agg._method == rank_method::FIRST ? cudf::detail::stable_sorted_order : cudf::detail::sorted_order); @@ -153,7 +153,7 @@ void scan_result_functor::operator()(aggregation const& agg) {order::ASCENDING, rank_agg._column_order}, {null_order::AFTER, rank_agg._null_precedence}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } }(); @@ -172,18 +172,18 @@ void scan_result_functor::operator()(aggregation const& agg) helper.group_labels(stream), helper.group_offsets(stream), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); if (rank_agg._percentage != rank_percentage::NONE) { auto count = get_grouped_values().nullable() and rank_agg._null_handling == null_policy::EXCLUDE ? detail::group_count_valid(get_grouped_values(), helper.group_labels(stream), helper.num_groups(stream), stream, - rmm::mr::get_current_device_resource()) + cudf::get_current_device_resource_ref()) : detail::group_count_all(helper.group_offsets(stream), helper.num_groups(stream), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); result = detail::group_rank_to_percentage(rank_agg._method, rank_agg._percentage, *result, diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu index 4da1da089cd..35e3e05a364 100644 --- a/cpp/src/groupby/sort/sort_helper.cu +++ b/cpp/src/groupby/sort/sort_helper.cu @@ -31,11 +31,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -100,7 +100,7 @@ column_view sort_groupby_helper::key_sort_order(rmm::cuda_stream_view stream) numeric_scalar(0, true, stream), numeric_scalar(1, true, stream), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return sliced_key_sorted_order(); } @@ -109,7 +109,7 @@ column_view sort_groupby_helper::key_sort_order(rmm::cuda_stream_view stream) ? std::vector(_keys.num_columns(), null_order::AFTER) : _null_precedence; _key_sorted_order = cudf::detail::stable_sorted_order( - _keys, {}, precedence, stream, rmm::mr::get_current_device_resource()); + _keys, {}, precedence, stream, cudf::get_current_device_resource_ref()); } else { // Pandas style // Temporarily prepend the keys table with a column that indicates the // presence of a null value within a row. This allows moving all rows that @@ -125,7 +125,7 @@ column_view sort_groupby_helper::key_sort_order(rmm::cuda_stream_view stream) }(); _key_sorted_order = cudf::detail::stable_sorted_order( - augmented_keys, {}, precedence, stream, rmm::mr::get_current_device_resource()); + augmented_keys, {}, precedence, stream, cudf::get_current_device_resource_ref()); // All rows with one or more null values are at the end of the resulting sorted order. } @@ -223,7 +223,7 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre scatter_map, table_view({temp_labels->view()}), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); _unsorted_keys_labels = std::move(t_unsorted_keys_labels->release()[0]); @@ -235,13 +235,13 @@ column_view sort_groupby_helper::keys_bitmask_column(rmm::cuda_stream_view strea if (_keys_bitmask_column) return _keys_bitmask_column->view(); auto [row_bitmask, null_count] = - cudf::detail::bitmask_and(_keys, stream, rmm::mr::get_current_device_resource()); + cudf::detail::bitmask_and(_keys, stream, cudf::get_current_device_resource_ref()); auto const zero = numeric_scalar(0, true, stream); // Create a temporary variable and only set _keys_bitmask_column right before the return. // This way, a 2nd (parallel) call to this will not be given a partially created object. auto keys_bitmask_column = cudf::detail::sequence( - _keys.num_rows(), zero, zero, stream, rmm::mr::get_current_device_resource()); + _keys.num_rows(), zero, zero, stream, cudf::get_current_device_resource_ref()); keys_bitmask_column->set_null_mask(std::move(row_bitmask), null_count); _keys_bitmask_column = std::move(keys_bitmask_column); diff --git a/cpp/src/hash/md5_hash.cu b/cpp/src/hash/md5_hash.cu index 0b559e8e86c..c7bfd4aecf4 100644 --- a/cpp/src/hash/md5_hash.cu +++ b/cpp/src/hash/md5_hash.cu @@ -25,11 +25,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/hash/murmurhash3_x64_128.cu b/cpp/src/hash/murmurhash3_x64_128.cu index 6c91532a193..090bd92af8c 100644 --- a/cpp/src/hash/murmurhash3_x64_128.cu +++ b/cpp/src/hash/murmurhash3_x64_128.cu @@ -19,10 +19,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/hash/murmurhash3_x86_32.cu b/cpp/src/hash/murmurhash3_x86_32.cu index eac72f5d995..dd7b19633be 100644 --- a/cpp/src/hash/murmurhash3_x86_32.cu +++ b/cpp/src/hash/murmurhash3_x86_32.cu @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/hash/sha1_hash.cu b/cpp/src/hash/sha1_hash.cu index f7609eb26af..3a0c442ed16 100644 --- a/cpp/src/hash/sha1_hash.cu +++ b/cpp/src/hash/sha1_hash.cu @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include diff --git a/cpp/src/hash/sha224_hash.cu b/cpp/src/hash/sha224_hash.cu index cf04504a489..3ac3c5dbbba 100644 --- a/cpp/src/hash/sha224_hash.cu +++ b/cpp/src/hash/sha224_hash.cu @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include diff --git a/cpp/src/hash/sha256_hash.cu b/cpp/src/hash/sha256_hash.cu index 664913c0f4c..8036308f09e 100644 --- a/cpp/src/hash/sha256_hash.cu +++ b/cpp/src/hash/sha256_hash.cu @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include diff --git a/cpp/src/hash/sha384_hash.cu b/cpp/src/hash/sha384_hash.cu index 92192f501ec..30fe181d55b 100644 --- a/cpp/src/hash/sha384_hash.cu +++ b/cpp/src/hash/sha384_hash.cu @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include diff --git a/cpp/src/hash/sha512_hash.cu b/cpp/src/hash/sha512_hash.cu index 244206aeeb9..fd74f38423b 100644 --- a/cpp/src/hash/sha512_hash.cu +++ b/cpp/src/hash/sha512_hash.cu @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include diff --git a/cpp/src/hash/sha_hash.cuh b/cpp/src/hash/sha_hash.cuh index 6976241057e..ebaec8e2775 100644 --- a/cpp/src/hash/sha_hash.cuh +++ b/cpp/src/hash/sha_hash.cuh @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/hash/xxhash_64.cu b/cpp/src/hash/xxhash_64.cu index 4366c12b453..fad8383210b 100644 --- a/cpp/src/hash/xxhash_64.cu +++ b/cpp/src/hash/xxhash_64.cu @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include diff --git a/cpp/src/interop/arrow_utilities.cpp b/cpp/src/interop/arrow_utilities.cpp index 4292552a800..a99262fb3bf 100644 --- a/cpp/src/interop/arrow_utilities.cpp +++ b/cpp/src/interop/arrow_utilities.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include @@ -98,6 +97,7 @@ ArrowType id_to_arrow_type(cudf::type_id id) ArrowType id_to_arrow_storage_type(cudf::type_id id) { switch (id) { + case cudf::type_id::TIMESTAMP_DAYS: return NANOARROW_TYPE_INT32; case cudf::type_id::TIMESTAMP_SECONDS: case cudf::type_id::TIMESTAMP_MILLISECONDS: case cudf::type_id::TIMESTAMP_MICROSECONDS: diff --git a/cpp/src/interop/arrow_utilities.hpp b/cpp/src/interop/arrow_utilities.hpp index 1cee3071fcb..1b79fbf9eda 100644 --- a/cpp/src/interop/arrow_utilities.hpp +++ b/cpp/src/interop/arrow_utilities.hpp @@ -17,11 +17,10 @@ #pragma once #include +#include #include #include -#include -#include #include diff --git a/cpp/src/interop/decimal_conversion_utilities.cu b/cpp/src/interop/decimal_conversion_utilities.cu new file mode 100644 index 00000000000..2f81c754a30 --- /dev/null +++ b/cpp/src/interop/decimal_conversion_utilities.cu @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "decimal_conversion_utilities.cuh" + +#include +#include +#include + +#include + +#include + +#include + +namespace cudf { +namespace detail { + +template +std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) +{ + static_assert(std::is_same_v or std::is_same_v, + "Only int32 and int64 decimal types can be converted to decimal128."); + + constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DecimalType); + auto buf = std::make_unique(column.size() * sizeof(__int128_t), stream, mr); + + thrust::for_each(rmm::exec_policy_nosync(stream, mr), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(column.size()), + [in = column.begin(), + out = reinterpret_cast(buf->data()), + BIT_WIDTH_RATIO] __device__(auto in_idx) { + auto const out_idx = in_idx * BIT_WIDTH_RATIO; + // the lowest order bits are the value, the remainder + // simply matches the sign bit to satisfy the two's + // complement integer representation of negative numbers. + out[out_idx] = in[in_idx]; +#pragma unroll BIT_WIDTH_RATIO - 1 + for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { + out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; + } + }); + + return buf; +} + +// Instantiate templates for int32_t and int64_t decimal types +template std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); + +template std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); + +} // namespace detail +} // namespace cudf diff --git a/cpp/src/interop/decimal_conversion_utilities.cuh b/cpp/src/interop/decimal_conversion_utilities.cuh new file mode 100644 index 00000000000..6b62eb0fee4 --- /dev/null +++ b/cpp/src/interop/decimal_conversion_utilities.cuh @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +#include + +namespace cudf::detail { + +/** + * @brief Convert decimal32 and decimal64 numeric data to decimal128 and return the device vector + * + * @tparam DecimalType to convert from + * + * @param column A view of the input columns + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + * + * @return A device vector containing the converted decimal128 data + */ +template +std::unique_ptr convert_decimals_to_decimal128( + cudf::column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); + +} // namespace cudf::detail diff --git a/cpp/src/interop/detail/arrow_allocator.cpp b/cpp/src/interop/detail/arrow_allocator.cpp deleted file mode 100644 index 2a19a5360fe..00000000000 --- a/cpp/src/interop/detail/arrow_allocator.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include - -namespace cudf { -namespace detail { - -/* - Enable Transparent Huge Pages (THP) for large (>4MB) allocations. - `buf` is returned untouched. - Enabling THP can improve performance of device-host memory transfers - significantly, see . -*/ -template -T enable_hugepage(T&& buf) -{ - if (buf->size() < (1u << 22u)) { // Smaller than 4 MB - return std::move(buf); - } - -#ifdef MADV_HUGEPAGE - auto const pagesize = sysconf(_SC_PAGESIZE); - void* addr = const_cast(buf->data()); - if (addr == nullptr) { return std::move(buf); } - auto length{static_cast(buf->size())}; - if (std::align(pagesize, pagesize, addr, length)) { - // Intentionally not checking for errors that may be returned by older kernel versions; - // optimistically tries enabling huge pages. - madvise(addr, length, MADV_HUGEPAGE); - } -#endif - return std::move(buf); -} - -std::unique_ptr allocate_arrow_buffer(int64_t const size, arrow::MemoryPool* ar_mr) -{ - /* - nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer - and `ValueOrDie` are used inside a CUDA compilation unit. - - To work around this issue we compile an allocation shim in C++ and use - that from our cuda sources - */ - arrow::Result> result = arrow::AllocateBuffer(size, ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow buffer"); - return enable_hugepage(std::move(result).ValueOrDie()); -} - -std::shared_ptr allocate_arrow_bitmap(int64_t const size, arrow::MemoryPool* ar_mr) -{ - /* - nvcc 11.0 generates Internal Compiler Error during codegen when arrow::AllocateBuffer - and `ValueOrDie` are used inside a CUDA compilation unit. - - To work around this issue we compile an allocation shim in C++ and use - that from our cuda sources - */ - arrow::Result> result = arrow::AllocateBitmap(size, ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to allocate Arrow bitmap"); - return enable_hugepage(std::move(result).ValueOrDie()); -} - -} // namespace detail -} // namespace cudf diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp index 78ddd7f5ad5..ba5b11b90d8 100644 --- a/cpp/src/interop/dlpack.cpp +++ b/cpp/src/interop/dlpack.cpp @@ -20,12 +20,12 @@ #include #include #include +#include #include #include #include #include -#include #include diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu deleted file mode 100644 index 579820cbae3..00000000000 --- a/cpp/src/interop/from_arrow.cu +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -namespace cudf { - -namespace detail { -data_type arrow_to_cudf_type(arrow::DataType const& arrow_type) -{ - switch (arrow_type.id()) { - case arrow::Type::NA: return data_type(type_id::EMPTY); - case arrow::Type::BOOL: return data_type(type_id::BOOL8); - case arrow::Type::INT8: return data_type(type_id::INT8); - case arrow::Type::INT16: return data_type(type_id::INT16); - case arrow::Type::INT32: return data_type(type_id::INT32); - case arrow::Type::INT64: return data_type(type_id::INT64); - case arrow::Type::UINT8: return data_type(type_id::UINT8); - case arrow::Type::UINT16: return data_type(type_id::UINT16); - case arrow::Type::UINT32: return data_type(type_id::UINT32); - case arrow::Type::UINT64: return data_type(type_id::UINT64); - case arrow::Type::FLOAT: return data_type(type_id::FLOAT32); - case arrow::Type::DOUBLE: return data_type(type_id::FLOAT64); - case arrow::Type::DATE32: return data_type(type_id::TIMESTAMP_DAYS); - case arrow::Type::TIMESTAMP: { - auto type = static_cast(&arrow_type); - switch (type->unit()) { - case arrow::TimeUnit::type::SECOND: return data_type(type_id::TIMESTAMP_SECONDS); - case arrow::TimeUnit::type::MILLI: return data_type(type_id::TIMESTAMP_MILLISECONDS); - case arrow::TimeUnit::type::MICRO: return data_type(type_id::TIMESTAMP_MICROSECONDS); - case arrow::TimeUnit::type::NANO: return data_type(type_id::TIMESTAMP_NANOSECONDS); - default: CUDF_FAIL("Unsupported timestamp unit in arrow"); - } - } - case arrow::Type::DURATION: { - auto type = static_cast(&arrow_type); - switch (type->unit()) { - case arrow::TimeUnit::type::SECOND: return data_type(type_id::DURATION_SECONDS); - case arrow::TimeUnit::type::MILLI: return data_type(type_id::DURATION_MILLISECONDS); - case arrow::TimeUnit::type::MICRO: return data_type(type_id::DURATION_MICROSECONDS); - case arrow::TimeUnit::type::NANO: return data_type(type_id::DURATION_NANOSECONDS); - default: CUDF_FAIL("Unsupported duration unit in arrow"); - } - } - case arrow::Type::STRING: return data_type(type_id::STRING); - case arrow::Type::LARGE_STRING: return data_type(type_id::STRING); - case arrow::Type::DICTIONARY: return data_type(type_id::DICTIONARY32); - case arrow::Type::LIST: return data_type(type_id::LIST); - case arrow::Type::DECIMAL: { - auto const type = static_cast(&arrow_type); - return data_type{type_id::DECIMAL128, -type->scale()}; - } - case arrow::Type::STRUCT: return data_type(type_id::STRUCT); - default: CUDF_FAIL("Unsupported type_id conversion to cudf"); - } -} - -namespace { -/** - * @brief Functor to return column for a corresponding arrow array. column - * is formed from buffer underneath the arrow array along with any offset and - * change in length that array has. - */ -struct dispatch_to_cudf_column { - /** - * @brief Returns mask from an array without any offsets. - */ - std::unique_ptr get_mask_buffer(arrow::Array const& array, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - if (array.null_bitmap_data() == nullptr) { - return std::make_unique(0, stream, mr); - } - auto const null_bitmap_size = array.null_bitmap()->size(); - auto const allocation_size = - bitmask_allocation_size_bytes(static_cast(null_bitmap_size * CHAR_BIT)); - auto mask = std::make_unique(allocation_size, stream, mr); - auto mask_buffer = array.null_bitmap(); - CUDF_CUDA_TRY(cudaMemcpyAsync(mask->data(), - reinterpret_cast(mask_buffer->address()), - null_bitmap_size, - cudaMemcpyDefault, - stream.value())); - // Zero-initialize trailing padding bytes - auto const num_trailing_bytes = allocation_size - null_bitmap_size; - if (num_trailing_bytes > 0) { - auto trailing_bytes = static_cast(mask->data()) + null_bitmap_size; - CUDF_CUDA_TRY(cudaMemsetAsync(trailing_bytes, 0, num_trailing_bytes, stream.value())); - } - return mask; - } - - template ())> - std::unique_ptr operator()( - arrow::Array const&, data_type, bool, rmm::cuda_stream_view, rmm::device_async_resource_ref) - { - CUDF_FAIL("Unsupported type in from_arrow."); - } - - template ())> - std::unique_ptr operator()(arrow::Array const& array, - data_type type, - bool skip_mask, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) - { - auto data_buffer = array.data()->buffers[1]; - size_type const num_rows = array.length(); - auto const has_nulls = skip_mask ? false : array.null_bitmap_data() != nullptr; - auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr); - auto mutable_column_view = col->mutable_view(); - CUDF_CUDA_TRY(cudaMemcpyAsync( - mutable_column_view.data(), - reinterpret_cast(data_buffer->address()) + array.offset() * sizeof(T), - sizeof(T) * num_rows, - cudaMemcpyDefault, - stream.value())); - if (has_nulls) { - auto tmp_mask = get_mask_buffer(array, stream, mr); - - // If array is sliced, we have to copy whole mask and then take copy. - auto out_mask = (num_rows == static_cast(data_buffer->size() / sizeof(T))) - ? std::move(*tmp_mask) - : cudf::detail::copy_bitmask(static_cast(tmp_mask->data()), - array.offset(), - array.offset() + num_rows, - stream, - mr); - - col->set_null_mask(std::move(out_mask), array.null_count()); - } - - return col; - } -}; - -std::unique_ptr get_empty_type_column(size_type size) -{ - // this abomination is required by cuDF Python, which needs to handle - // [PyArrow null arrays](https://arrow.apache.org/docs/python/generated/pyarrow.NullArray.html) - // of finite length - return std::make_unique( - data_type(type_id::EMPTY), size, rmm::device_buffer{}, rmm::device_buffer{}, size); -} - -/** - * @brief Returns cudf column formed from given arrow array - * This has been introduced to take care of compiler error "error: explicit specialization of - * function must precede its first use" - */ -std::unique_ptr get_column(arrow::Array const& array, - data_type type, - bool skip_mask, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -template <> -std::unique_ptr dispatch_to_cudf_column::operator()( - arrow::Array const& array, - data_type type, - bool skip_mask, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - using DeviceType = __int128_t; - - auto data_buffer = array.data()->buffers[1]; - auto const num_rows = static_cast(array.length()); - auto col = make_fixed_width_column(type, num_rows, mask_state::UNALLOCATED, stream, mr); - auto mutable_column_view = col->mutable_view(); - - CUDF_CUDA_TRY(cudaMemcpyAsync( - mutable_column_view.data(), - reinterpret_cast(data_buffer->address()) + array.offset() * sizeof(DeviceType), - sizeof(DeviceType) * num_rows, - cudaMemcpyDefault, - stream.value())); - - auto null_mask = [&] { - if (not skip_mask and array.null_bitmap_data()) { - auto temp_mask = get_mask_buffer(array, stream, mr); - // If array is sliced, we have to copy whole mask and then take copy. - return (num_rows == static_cast(data_buffer->size() / sizeof(DeviceType))) - ? std::move(*temp_mask.release()) - : cudf::detail::copy_bitmask(static_cast(temp_mask->data()), - array.offset(), - array.offset() + num_rows, - stream, - mr); - } - return rmm::device_buffer{}; - }(); - - col->set_null_mask(std::move(null_mask), array.null_count()); - return col; -} - -template <> -std::unique_ptr dispatch_to_cudf_column::operator()(arrow::Array const& array, - data_type, - bool skip_mask, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto data_buffer = array.data()->buffers[1]; - // mask-to-bools expects the mask to be bitmask_type aligned/padded - auto data = rmm::device_buffer( - cudf::bitmask_allocation_size_bytes(data_buffer->size() * CHAR_BIT), stream, mr); - CUDF_CUDA_TRY(cudaMemcpyAsync(data.data(), - reinterpret_cast(data_buffer->address()), - data_buffer->size(), - cudaMemcpyDefault, - stream.value())); - auto out_col = mask_to_bools(static_cast(data.data()), - array.offset(), - array.offset() + array.length(), - stream, - mr); - - auto const has_nulls = skip_mask ? false : array.null_bitmap_data() != nullptr; - if (has_nulls) { - auto out_mask = - detail::copy_bitmask(static_cast(get_mask_buffer(array, stream, mr)->data()), - array.offset(), - array.offset() + array.length(), - stream, - mr); - - out_col->set_null_mask(std::move(out_mask), array.null_count()); - } - - return out_col; -} - -template <> -std::unique_ptr dispatch_to_cudf_column::operator()( - arrow::Array const& array, - data_type, - bool, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - if (array.length() == 0) { return make_empty_column(type_id::STRING); } - - std::unique_ptr offsets_column; - std::unique_ptr char_array; - - if (array.type_id() == arrow::Type::LARGE_STRING) { - auto str_array = static_cast(&array); - auto offset_array = std::make_unique( - str_array->value_offsets()->size() / sizeof(int64_t), str_array->value_offsets(), nullptr); - offsets_column = dispatch_to_cudf_column{}.operator()( - *offset_array, data_type(type_id::INT64), true, stream, mr); - char_array = std::make_unique( - str_array->value_data()->size(), str_array->value_data(), nullptr); - } else if (array.type_id() == arrow::Type::STRING) { - auto str_array = static_cast(&array); - auto offset_array = std::make_unique( - str_array->value_offsets()->size() / sizeof(int32_t), str_array->value_offsets(), nullptr); - offsets_column = dispatch_to_cudf_column{}.operator()( - *offset_array, data_type(type_id::INT32), true, stream, mr); - char_array = std::make_unique( - str_array->value_data()->size(), str_array->value_data(), nullptr); - } else { - throw std::runtime_error("Unsupported array type"); - } - - rmm::device_buffer chars(char_array->length(), stream, mr); - auto data_buffer = char_array->data()->buffers[1]; - CUDF_CUDA_TRY(cudaMemcpyAsync(chars.data(), - reinterpret_cast(data_buffer->address()), - chars.size(), - cudaMemcpyDefault, - stream.value())); - - auto const num_rows = offsets_column->size() - 1; - auto out_col = make_strings_column(num_rows, - std::move(offsets_column), - std::move(chars), - array.null_count(), - std::move(*get_mask_buffer(array, stream, mr))); - - return num_rows == array.length() - ? std::move(out_col) - : std::make_unique( - cudf::detail::slice(out_col->view(), - static_cast(array.offset()), - static_cast(array.offset() + array.length()), - stream), - stream, - mr); -} - -template <> -std::unique_ptr dispatch_to_cudf_column::operator()( - arrow::Array const& array, - data_type, - bool, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto dict_array = static_cast(&array); - auto dict_type = arrow_to_cudf_type(*(dict_array->dictionary()->type())); - auto keys_column = get_column(*(dict_array->dictionary()), dict_type, true, stream, mr); - auto ind_type = arrow_to_cudf_type(*(dict_array->indices()->type())); - - auto indices_column = get_column(*(dict_array->indices()), ind_type, false, stream, mr); - // If index type is not of type uint32_t, then cast it to uint32_t - auto const dict_indices_type = data_type{type_id::UINT32}; - if (indices_column->type().id() != dict_indices_type.id()) - indices_column = cudf::detail::cast(indices_column->view(), dict_indices_type, stream, mr); - - // Child columns shouldn't have masks and we need the mask in main column - auto column_contents = indices_column->release(); - indices_column = std::make_unique(dict_indices_type, - static_cast(array.length()), - std::move(*(column_contents.data)), - rmm::device_buffer{}, - 0); - - return make_dictionary_column(std::move(keys_column), - std::move(indices_column), - std::move(*(column_contents.null_mask)), - array.null_count()); -} - -template <> -std::unique_ptr dispatch_to_cudf_column::operator()( - arrow::Array const& array, - data_type, - bool, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto struct_array = static_cast(&array); - std::vector> child_columns; - // Offsets have already been applied to child - arrow::ArrayVector array_children = struct_array->fields(); - std::transform(array_children.cbegin(), - array_children.cend(), - std::back_inserter(child_columns), - [&mr, &stream](auto const& child_array) { - auto type = arrow_to_cudf_type(*(child_array->type())); - return get_column(*child_array, type, false, stream, mr); - }); - - auto out_mask = std::move(*(get_mask_buffer(array, stream, mr))); - if (struct_array->null_bitmap_data() != nullptr) { - out_mask = detail::copy_bitmask(static_cast(out_mask.data()), - array.offset(), - array.offset() + array.length(), - stream, - mr); - } - - return make_structs_column( - array.length(), move(child_columns), array.null_count(), std::move(out_mask), stream, mr); -} - -template <> -std::unique_ptr dispatch_to_cudf_column::operator()( - arrow::Array const& array, - data_type, - bool, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto list_array = static_cast(&array); - auto offset_array = std::make_unique( - list_array->value_offsets()->size() / sizeof(int32_t), list_array->value_offsets(), nullptr); - auto offsets_column = dispatch_to_cudf_column{}.operator()( - *offset_array, data_type(type_id::INT32), true, stream, mr); - - auto child_type = arrow_to_cudf_type(*(list_array->values()->type())); - auto child_column = get_column(*(list_array->values()), child_type, false, stream, mr); - - auto const num_rows = offsets_column->size() - 1; - auto out_col = make_lists_column(num_rows, - std::move(offsets_column), - std::move(child_column), - array.null_count(), - std::move(*get_mask_buffer(array, stream, mr)), - stream, - mr); - - return num_rows == array.length() - ? std::move(out_col) - : std::make_unique( - cudf::detail::slice(out_col->view(), - static_cast(array.offset()), - static_cast(array.offset() + array.length()), - stream), - stream, - mr); -} - -std::unique_ptr get_column(arrow::Array const& array, - data_type type, - bool skip_mask, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return type.id() != type_id::EMPTY - ? type_dispatcher(type, dispatch_to_cudf_column{}, array, type, skip_mask, stream, mr) - : get_empty_type_column(array.length()); -} - -} // namespace - -std::unique_ptr
from_arrow(arrow::Table const& input_table, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - if (input_table.num_columns() == 0) { return std::make_unique
(); } - std::vector> columns; - auto chunked_arrays = input_table.columns(); - std::transform(chunked_arrays.begin(), - chunked_arrays.end(), - std::back_inserter(columns), - [&mr, &stream](auto const& chunked_array) { - std::vector> concat_columns; - auto cudf_type = arrow_to_cudf_type(*(chunked_array->type())); - auto array_chunks = chunked_array->chunks(); - if (cudf_type.id() == type_id::EMPTY) { - return get_empty_type_column(chunked_array->length()); - } - std::transform(array_chunks.begin(), - array_chunks.end(), - std::back_inserter(concat_columns), - [&cudf_type, &mr, &stream](auto const& array_chunk) { - return get_column(*array_chunk, cudf_type, false, stream, mr); - }); - if (concat_columns.empty()) { - return std::make_unique( - cudf_type, 0, rmm::device_buffer{}, rmm::device_buffer{}, 0); - } else if (concat_columns.size() == 1) { - return std::move(concat_columns[0]); - } - - std::vector column_views; - std::transform(concat_columns.begin(), - concat_columns.end(), - std::back_inserter(column_views), - [](auto const& col) { return col->view(); }); - return cudf::detail::concatenate(column_views, stream, mr); - }); - - return std::make_unique
(std::move(columns)); -} - -std::unique_ptr from_arrow(arrow::Scalar const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - auto maybe_array = arrow::MakeArrayFromScalar(input, 1); - if (!maybe_array.ok()) { CUDF_FAIL("Failed to create array"); } - auto array = *maybe_array; - - auto field = arrow::field("", input.type); - - auto table = arrow::Table::Make(arrow::schema({field}), {array}); - - auto cudf_table = detail::from_arrow(*table, stream, mr); - - auto cv = cudf_table->view().column(0); - return get_element(cv, 0, stream); -} - -} // namespace detail - -std::unique_ptr
from_arrow(arrow::Table const& input_table, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - - return detail::from_arrow(input_table, stream, mr); -} - -std::unique_ptr from_arrow(arrow::Scalar const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - - return detail::from_arrow(input, stream, mr); -} -} // namespace cudf diff --git a/cpp/src/interop/from_arrow_device.cu b/cpp/src/interop/from_arrow_device.cu index 440df571de0..057e563c86e 100644 --- a/cpp/src/interop/from_arrow_device.cu +++ b/cpp/src/interop/from_arrow_device.cu @@ -28,13 +28,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu index efde8f2a463..2e9504a6726 100644 --- a/cpp/src/interop/from_arrow_host.cu +++ b/cpp/src/interop/from_arrow_host.cu @@ -31,13 +31,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/src/interop/from_arrow_stream.cu b/cpp/src/interop/from_arrow_stream.cu index 578105aa90a..deff62be576 100644 --- a/cpp/src/interop/from_arrow_stream.cu +++ b/cpp/src/interop/from_arrow_stream.cu @@ -24,7 +24,6 @@ #include #include -#include #include #include diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu deleted file mode 100644 index 6b163e3441e..00000000000 --- a/cpp/src/interop/to_arrow.cu +++ /dev/null @@ -1,486 +0,0 @@ -/* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "arrow_utilities.hpp" -#include "detail/arrow_allocator.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include - -namespace cudf { -namespace detail { -namespace { - -/** - * @brief Create arrow data buffer from given cudf column - */ -template -std::shared_ptr fetch_data_buffer(device_span input, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - int64_t const data_size_in_bytes = sizeof(T) * input.size(); - - auto data_buffer = allocate_arrow_buffer(data_size_in_bytes, ar_mr); - - CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - input.data(), - data_size_in_bytes, - cudaMemcpyDefault, - stream.value())); - - return std::move(data_buffer); -} - -/** - * @brief Create arrow buffer of mask from given cudf column - */ -std::shared_ptr fetch_mask_buffer(column_view input_view, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - int64_t const mask_size_in_bytes = cudf::bitmask_allocation_size_bytes(input_view.size()); - - if (input_view.has_nulls()) { - auto mask_buffer = allocate_arrow_bitmap(static_cast(input_view.size()), ar_mr); - CUDF_CUDA_TRY(cudaMemcpyAsync( - mask_buffer->mutable_data(), - (input_view.offset() > 0) - ? cudf::detail::copy_bitmask(input_view, stream, rmm::mr::get_current_device_resource()) - .data() - : input_view.null_mask(), - mask_size_in_bytes, - cudaMemcpyDefault, - stream.value())); - - // Resets all padded bits to 0 - mask_buffer->ZeroPadding(); - - return mask_buffer; - } - - return nullptr; -} - -/** - * @brief Functor to convert cudf column to arrow array - */ -struct dispatch_to_arrow { - /** - * @brief Creates vector Arrays from given cudf column children - */ - std::vector> fetch_child_array( - column_view input_view, - std::vector const& metadata, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) - { - std::vector> child_arrays; - std::transform( - input_view.child_begin(), - input_view.child_end(), - metadata.begin(), - std::back_inserter(child_arrays), - [&ar_mr, &stream](auto const& child, auto const& meta) { - return type_dispatcher( - child.type(), dispatch_to_arrow{}, child, child.type().id(), meta, ar_mr, stream); - }); - return child_arrays; - } - - template ())> - std::shared_ptr operator()( - column_view, cudf::type_id, column_metadata const&, arrow::MemoryPool*, rmm::cuda_stream_view) - { - CUDF_FAIL("Unsupported type for to_arrow."); - } - - template ())> - std::shared_ptr operator()(column_view input_view, - cudf::type_id id, - column_metadata const&, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) - { - return to_arrow_array( - id, - static_cast(input_view.size()), - fetch_data_buffer( - device_span(input_view.data(), input_view.size()), ar_mr, stream), - fetch_mask_buffer(input_view, ar_mr, stream), - static_cast(input_view.null_count())); - } -}; - -// Convert decimal types from libcudf to arrow where those types are not -// directly supported by Arrow. These types must be fit into 128 bits, the -// smallest decimal resolution supported by Arrow. -template -std::shared_ptr unsupported_decimals_to_arrow(column_view input, - int32_t precision, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - auto buf = - detail::decimals_to_arrow(input, stream, rmm::mr::get_current_device_resource()); - - auto const buf_size_in_bytes = buf->size(); - auto data_buffer = allocate_arrow_buffer(buf_size_in_bytes, ar_mr); - - CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - buf->data(), - buf_size_in_bytes, - cudaMemcpyDefault, - stream.value())); - - auto type = arrow::decimal(precision, -input.type().scale()); - auto mask = fetch_mask_buffer(input, ar_mr, stream); - auto buffers = std::vector>{mask, std::move(data_buffer)}; - auto data = std::make_shared(type, input.size(), buffers); - - return std::make_shared(data); -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const&, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - using DeviceType = int32_t; - return unsupported_decimals_to_arrow( - input, cudf::detail::max_precision(), ar_mr, stream); -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const&, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - using DeviceType = int64_t; - return unsupported_decimals_to_arrow( - input, cudf::detail::max_precision(), ar_mr, stream); -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const&, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - using DeviceType = __int128_t; - auto const max_precision = cudf::detail::max_precision(); - - rmm::device_uvector buf(input.size(), stream); - - thrust::copy(rmm::exec_policy(stream), // - input.begin(), - input.end(), - buf.begin()); - - auto const buf_size_in_bytes = buf.size() * sizeof(DeviceType); - auto data_buffer = allocate_arrow_buffer(buf_size_in_bytes, ar_mr); - - CUDF_CUDA_TRY(cudaMemcpyAsync( - data_buffer->mutable_data(), buf.data(), buf_size_in_bytes, cudaMemcpyDefault, stream.value())); - - auto type = arrow::decimal(max_precision, -input.type().scale()); - auto mask = fetch_mask_buffer(input, ar_mr, stream); - auto buffers = std::vector>{mask, std::move(data_buffer)}; - auto data = std::make_shared(type, input.size(), buffers); - - return std::make_shared(data); -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()(column_view input, - cudf::type_id id, - column_metadata const&, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - auto bitmask = bools_to_mask(input, stream, rmm::mr::get_current_device_resource()); - - auto data_buffer = allocate_arrow_buffer(static_cast(bitmask.first->size()), ar_mr); - - CUDF_CUDA_TRY(cudaMemcpyAsync(data_buffer->mutable_data(), - bitmask.first->data(), - bitmask.first->size(), - cudaMemcpyDefault, - stream.value())); - return to_arrow_array(id, - static_cast(input.size()), - std::move(data_buffer), - fetch_mask_buffer(input, ar_mr, stream), - static_cast(input.null_count())); -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const&, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - std::unique_ptr tmp_column = - ((input.offset() != 0) or - ((input.num_children() == 1) and (input.child(0).size() - 1 != input.size()))) - ? std::make_unique(input, stream) - : nullptr; - - column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; - auto child_arrays = fetch_child_array(input_view, {{}, {}}, ar_mr, stream); - if (child_arrays.empty()) { - // Empty string will have only one value in offset of 4 bytes - auto tmp_offset_buffer = allocate_arrow_buffer(sizeof(int32_t), ar_mr); - auto tmp_data_buffer = allocate_arrow_buffer(0, ar_mr); - memset(tmp_offset_buffer->mutable_data(), 0, sizeof(int32_t)); - - return std::make_shared( - 0, std::move(tmp_offset_buffer), std::move(tmp_data_buffer)); - } - auto offset_buffer = child_arrays[strings_column_view::offsets_column_index]->data()->buffers[1]; - auto const sview = strings_column_view{input_view}; - auto data_buffer = fetch_data_buffer( - device_span{sview.chars_begin(stream), - static_cast(sview.chars_size(stream))}, - ar_mr, - stream); - if (sview.offsets().type().id() == cudf::type_id::INT64) { - return std::make_shared(static_cast(input_view.size()), - offset_buffer, - data_buffer, - fetch_mask_buffer(input_view, ar_mr, stream), - static_cast(input_view.null_count())); - } else { - return std::make_shared(static_cast(input_view.size()), - offset_buffer, - data_buffer, - fetch_mask_buffer(input_view, ar_mr, stream), - static_cast(input_view.null_count())); - } -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const& metadata, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - CUDF_EXPECTS(metadata.children_meta.size() == static_cast(input.num_children()), - "Number of field names and number of children doesn't match\n"); - std::unique_ptr tmp_column = nullptr; - - if (input.offset() != 0) { tmp_column = std::make_unique(input, stream); } - - column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; - auto child_arrays = fetch_child_array(input_view, metadata.children_meta, ar_mr, stream); - auto mask = fetch_mask_buffer(input_view, ar_mr, stream); - - std::vector> fields; - std::transform(child_arrays.cbegin(), - child_arrays.cend(), - metadata.children_meta.cbegin(), - std::back_inserter(fields), - [](auto const array, auto const meta) { - return std::make_shared( - meta.name, array->type(), array->null_count() > 0); - }); - auto dtype = std::make_shared(fields); - - return std::make_shared(dtype, - static_cast(input_view.size()), - child_arrays, - mask, - static_cast(input_view.null_count())); -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const& metadata, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - CUDF_EXPECTS(metadata.children_meta.empty() || - metadata.children_meta.size() == static_cast(input.num_children()), - "Number of field names and number of children do not match\n"); - std::unique_ptr tmp_column = nullptr; - if ((input.offset() != 0) or - ((input.num_children() == 2) and (input.child(0).size() - 1 != input.size()))) { - tmp_column = std::make_unique(input, stream); - } - - column_view input_view = (tmp_column != nullptr) ? tmp_column->view() : input; - auto children_meta = - metadata.children_meta.empty() ? std::vector{{}, {}} : metadata.children_meta; - auto child_arrays = fetch_child_array(input_view, children_meta, ar_mr, stream); - if (child_arrays.empty() || child_arrays[0]->data()->length == 0) { - auto element_type = child_arrays.empty() ? arrow::null() : child_arrays[1]->type(); - auto result = arrow::MakeEmptyArray(arrow::list(element_type), ar_mr); - CUDF_EXPECTS(result.ok(), "Failed to construct empty arrow list array\n"); - return result.ValueUnsafe(); - } - - auto offset_buffer = child_arrays[0]->data()->buffers[1]; - auto data = child_arrays[1]; - return std::make_shared(arrow::list(data->type()), - static_cast(input_view.size()), - offset_buffer, - data, - fetch_mask_buffer(input_view, ar_mr, stream), - static_cast(input_view.null_count())); -} - -template <> -std::shared_ptr dispatch_to_arrow::operator()( - column_view input, - cudf::type_id, - column_metadata const& metadata, - arrow::MemoryPool* ar_mr, - rmm::cuda_stream_view stream) -{ - // Arrow dictionary requires indices to be signed integer - std::unique_ptr dict_indices = - detail::cast(cudf::dictionary_column_view(input).get_indices_annotated(), - cudf::data_type{type_id::INT32}, - stream, - rmm::mr::get_current_device_resource()); - auto indices = dispatch_to_arrow{}.operator()( - dict_indices->view(), dict_indices->type().id(), {}, ar_mr, stream); - auto dict_keys = cudf::dictionary_column_view(input).keys(); - auto dictionary = - type_dispatcher(dict_keys.type(), - dispatch_to_arrow{}, - dict_keys, - dict_keys.type().id(), - metadata.children_meta.empty() ? column_metadata{} : metadata.children_meta[0], - ar_mr, - stream); - - return std::make_shared( - arrow::dictionary(indices->type(), dictionary->type()), indices, dictionary); -} -} // namespace - -std::shared_ptr to_arrow(table_view input, - std::vector const& metadata, - rmm::cuda_stream_view stream, - arrow::MemoryPool* ar_mr) -{ - CUDF_EXPECTS((metadata.size() == static_cast(input.num_columns())), - "columns' metadata should be equal to number of columns in table"); - - std::vector> arrays; - std::vector> fields; - - std::transform( - input.begin(), - input.end(), - metadata.begin(), - std::back_inserter(arrays), - [&](auto const& c, auto const& meta) { - return c.type().id() != type_id::EMPTY - ? type_dispatcher( - c.type(), detail::dispatch_to_arrow{}, c, c.type().id(), meta, ar_mr, stream) - : std::make_shared(c.size()); - }); - - std::transform( - arrays.begin(), - arrays.end(), - metadata.begin(), - std::back_inserter(fields), - [](auto const& array, auto const& meta) { return arrow::field(meta.name, array->type()); }); - - auto result = arrow::Table::Make(arrow::schema(fields), arrays); - - // synchronize the stream because after the return the data may be accessed from the host before - // the above `cudaMemcpyAsync` calls have completed their copies (especially if pinned host - // memory is used). - stream.synchronize(); - - return result; -} - -std::shared_ptr to_arrow(cudf::scalar const& input, - column_metadata const& metadata, - rmm::cuda_stream_view stream, - arrow::MemoryPool* ar_mr) -{ - auto const column = cudf::make_column_from_scalar(input, 1, stream); - cudf::table_view const tv{{column->view()}}; - auto const arrow_table = detail::to_arrow(tv, {metadata}, stream, ar_mr); - auto const ac = arrow_table->column(0); - auto const maybe_scalar = ac->GetScalar(0); - if (!maybe_scalar.ok()) { CUDF_FAIL("Failed to produce a scalar"); } - return maybe_scalar.ValueOrDie(); -} -} // namespace detail - -std::shared_ptr to_arrow(table_view input, - std::vector const& metadata, - rmm::cuda_stream_view stream, - arrow::MemoryPool* ar_mr) -{ - CUDF_FUNC_RANGE(); - return detail::to_arrow(input, metadata, stream, ar_mr); -} - -std::shared_ptr to_arrow(cudf::scalar const& input, - column_metadata const& metadata, - rmm::cuda_stream_view stream, - arrow::MemoryPool* ar_mr) -{ - CUDF_FUNC_RANGE(); - return detail::to_arrow(input, metadata, stream, ar_mr); -} -} // namespace cudf diff --git a/cpp/src/interop/to_arrow_device.cu b/cpp/src/interop/to_arrow_device.cu index 2eb9b912054..a2874b46b06 100644 --- a/cpp/src/interop/to_arrow_device.cu +++ b/cpp/src/interop/to_arrow_device.cu @@ -15,6 +15,7 @@ */ #include "arrow_utilities.hpp" +#include "decimal_conversion_utilities.cuh" #include #include @@ -29,14 +30,13 @@ #include #include #include +#include #include #include #include #include #include -#include -#include #include #include @@ -141,7 +141,9 @@ int construct_decimals(cudf::column_view input, nanoarrow::UniqueArray tmp; NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, input)); - auto buf = detail::decimals_to_arrow(input, stream, mr); + auto buf = detail::convert_decimals_to_decimal128(input, stream, mr); + // Synchronize stream here to ensure the decimal128 buffer is ready. + stream.synchronize(); NANOARROW_RETURN_NOT_OK(set_buffer(std::move(buf), fixed_width_data_buffer_idx, tmp.get())); ArrowArrayMove(tmp.get(), out); @@ -197,7 +199,7 @@ int dispatch_to_arrow_device::operator()(cudf::column&& column, nanoarrow::UniqueArray tmp; NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column)); - auto bitmask = bools_to_mask(column.view(), stream, mr); + auto bitmask = detail::bools_to_mask(column.view(), stream, mr); auto contents = column.release(); NANOARROW_RETURN_NOT_OK(set_null_mask(contents, tmp.get())); NANOARROW_RETURN_NOT_OK( @@ -439,7 +441,7 @@ int dispatch_to_arrow_device_view::operator()(ArrowArray* out) const nanoarrow::UniqueArray tmp; NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column)); - auto bitmask = bools_to_mask(column, stream, mr); + auto bitmask = detail::bools_to_mask(column, stream, mr); NANOARROW_RETURN_NOT_OK( set_buffer(std::move(bitmask.first), fixed_width_data_buffer_idx, tmp.get())); NANOARROW_RETURN_NOT_OK(set_null_mask(column, tmp.get())); diff --git a/cpp/src/interop/to_arrow_host.cu b/cpp/src/interop/to_arrow_host.cu index c9e53ebaab7..79fb7550044 100644 --- a/cpp/src/interop/to_arrow_host.cu +++ b/cpp/src/interop/to_arrow_host.cu @@ -15,6 +15,7 @@ */ #include "arrow_utilities.hpp" +#include "decimal_conversion_utilities.cuh" #include #include @@ -29,14 +30,13 @@ #include #include #include +#include #include #include #include #include #include -#include -#include #include #include @@ -50,41 +50,6 @@ namespace cudf { namespace detail { -template -std::unique_ptr decimals_to_arrow(cudf::column_view input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - constexpr size_type BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DeviceType); - auto buf = std::make_unique(input.size() * sizeof(__int128_t), stream, mr); - - auto count = thrust::counting_iterator(0); - thrust::for_each(rmm::exec_policy(stream, mr), - count, - count + input.size(), - [in = input.begin(), - out = reinterpret_cast(buf->data()), - BIT_WIDTH_RATIO] __device__(auto in_idx) { - auto const out_idx = in_idx * BIT_WIDTH_RATIO; - // the lowest order bits are the value, the remainder - // simply matches the sign bit to satisfy the two's - // complement integer representation of negative numbers. - out[out_idx] = in[in_idx]; -#pragma unroll BIT_WIDTH_RATIO - 1 - for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { - out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; - } - }); - - return buf; -} - -template std::unique_ptr decimals_to_arrow( - cudf::column_view input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); - -template std::unique_ptr decimals_to_arrow( - cudf::column_view input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); - namespace { struct dispatch_to_arrow_host { @@ -156,7 +121,9 @@ struct dispatch_to_arrow_host { NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_DECIMAL128, column)); NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); - auto buf = detail::decimals_to_arrow(column, stream, mr); + auto buf = detail::convert_decimals_to_decimal128(column, stream, mr); + // No need to synchronize stream here as populate_data_buffer uses the same stream to copy data + // to host. NANOARROW_RETURN_NOT_OK( populate_data_buffer(device_span<__int128_t const>( reinterpret_cast(buf->data()), column.size()), @@ -179,7 +146,7 @@ int dispatch_to_arrow_host::operator()(ArrowArray* out) const NANOARROW_RETURN_NOT_OK(initialize_array(tmp.get(), NANOARROW_TYPE_BOOL, column)); NANOARROW_RETURN_NOT_OK(populate_validity_bitmap(ArrowArrayValidityBitmap(tmp.get()))); - auto bitmask = bools_to_mask(column, stream, mr); + auto bitmask = detail::bools_to_mask(column, stream, mr); NANOARROW_RETURN_NOT_OK(populate_data_buffer( device_span(reinterpret_cast(bitmask.first->data()), bitmask.first->size()), diff --git a/cpp/src/interop/to_arrow_schema.cpp b/cpp/src/interop/to_arrow_schema.cpp index b98ca8a7bed..5afed772656 100644 --- a/cpp/src/interop/to_arrow_schema.cpp +++ b/cpp/src/interop/to_arrow_schema.cpp @@ -170,8 +170,9 @@ int dispatch_to_arrow_type::operator()(column_view input, NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(out, NANOARROW_TYPE_LIST)); auto child = input.child(cudf::lists_column_view::child_column_index); ArrowSchemaInit(out->children[0]); - auto child_meta = - metadata.children_meta.empty() ? column_metadata{"element"} : metadata.children_meta[0]; + auto child_meta = metadata.children_meta.empty() + ? column_metadata{"element"} + : metadata.children_meta[cudf::lists_column_view::child_column_index]; out->flags = input.has_nulls() ? ARROW_FLAG_NULLABLE : 0; NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(out->children[0], child_meta.name.c_str())); diff --git a/cpp/src/io/avro/reader_impl.cu b/cpp/src/io/avro/reader_impl.cu index 69a0e982a5b..f0a92f7554d 100644 --- a/cpp/src/io/avro/reader_impl.cu +++ b/cpp/src/io/avro/reader_impl.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,6 @@ #include #include #include -#include #include #include @@ -448,7 +448,7 @@ std::vector decode_data(metadata& meta, } auto block_list = cudf::detail::make_device_uvector_async( - meta.block_list, stream, rmm::mr::get_current_device_resource()); + meta.block_list, stream, cudf::get_current_device_resource_ref()); schema_desc.host_to_device_async(stream); @@ -578,9 +578,9 @@ table_with_metadata read_avro(std::unique_ptr&& source, } d_global_dict = cudf::detail::make_device_uvector_async( - h_global_dict, stream, rmm::mr::get_current_device_resource()); + h_global_dict, stream, cudf::get_current_device_resource_ref()); d_global_dict_data = cudf::detail::make_device_uvector_async( - h_global_dict_data, stream, rmm::mr::get_current_device_resource()); + h_global_dict_data, stream, cudf::get_current_device_resource_ref()); stream.synchronize(); } diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 861820f47e7..72649dbe427 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -58,6 +58,7 @@ THE SOFTWARE. #include "gpuinflate.hpp" #include "io/utilities/block_utils.cuh" +#include #include #include @@ -2047,19 +2048,14 @@ CUDF_KERNEL void __launch_bounds__(block_size, 2) */ size_t __host__ get_gpu_debrotli_scratch_size(int max_num_inputs) { - int sm_count = 0; - int dev = 0; uint32_t max_fb_size, min_fb_size, fb_size; - CUDF_CUDA_TRY(cudaGetDevice(&dev)); - if (cudaSuccess == cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, dev)) { - // printf("%d SMs on device %d\n", sm_count, dev); - max_num_inputs = - min(max_num_inputs, sm_count * 3); // no more than 3 blocks/sm at most due to 32KB smem use - if (max_num_inputs <= 0) { - max_num_inputs = sm_count * 2; // Target 2 blocks/SM by default for scratch mem computation - } + auto const sm_count = cudf::detail::num_multiprocessors(); + // no more than 3 blocks/sm at most due to 32KB smem use + max_num_inputs = std::min(max_num_inputs, sm_count * 3); + if (max_num_inputs <= 0) { + max_num_inputs = sm_count * 2; // Target 2 blocks/SM by default for scratch mem computation } - max_num_inputs = min(max(max_num_inputs, 1), 512); + max_num_inputs = std::min(std::max(max_num_inputs, 1), 512); // Max fb size per block occurs if all huffman tables for all 3 group types fail local_alloc() // with num_htrees=256 (See HuffmanTreeGroupAlloc) max_fb_size = 256 * (630 + 1080 + 920) * 2; // 1.3MB diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp index 5d0c6a8c83b..c3187f73a95 100644 --- a/cpp/src/io/comp/nvcomp_adapter.cpp +++ b/cpp/src/io/comp/nvcomp_adapter.cpp @@ -22,95 +22,47 @@ #include #include +#include +#include #include #include +#include #include -#define NVCOMP_DEFLATE_HEADER -#if __has_include(NVCOMP_DEFLATE_HEADER) -#include NVCOMP_DEFLATE_HEADER -#endif - -#define NVCOMP_ZSTD_HEADER -#if __has_include(NVCOMP_ZSTD_HEADER) -#include NVCOMP_ZSTD_HEADER -#endif - -// When building with nvcomp 4.0 or newer, map the new version macros to the old ones -#ifndef NVCOMP_MAJOR_VERSION -#define NVCOMP_MAJOR_VERSION NVCOMP_VER_MAJOR -#define NVCOMP_MINOR_VERSION NVCOMP_VER_MINOR -#define NVCOMP_PATCH_VERSION NVCOMP_VER_PATCH -#endif - -#define NVCOMP_HAS_ZSTD_DECOMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 3)) - -#define NVCOMP_HAS_ZSTD_COMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 4)) - -#define NVCOMP_HAS_DEFLATE(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 5)) - -#define NVCOMP_HAS_DECOMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) \ - (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 1)) - -#define NVCOMP_HAS_COMP_TEMPSIZE_EX(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 6)) - -// ZSTD is stable for nvcomp 2.3.2 or newer -#define NVCOMP_ZSTD_DECOMP_IS_STABLE(MAJOR, MINOR, PATCH) \ - (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 2)) - namespace cudf::io::nvcomp { // Dispatcher for nvcompBatchedDecompressGetTempSizeEx template -std::optional batched_decompress_get_temp_size_ex(compression_type compression, - Args&&... args) +auto batched_decompress_get_temp_size_ex(compression_type compression, Args&&... args) { -#if NVCOMP_HAS_DECOMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) switch (compression) { case compression_type::SNAPPY: return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward(args)...); case compression_type::ZSTD: -#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward(args)...); -#else - return std::nullopt; -#endif case compression_type::LZ4: return nvcompBatchedLZ4DecompressGetTempSizeEx(std::forward(args)...); - case compression_type::DEFLATE: [[fallthrough]]; - default: return std::nullopt; - } -#endif - return std::nullopt; -} - -// Dispatcher for nvcompBatchedDecompressGetTempSize -template -auto batched_decompress_get_temp_size(compression_type compression, Args&&... args) -{ - switch (compression) { - case compression_type::SNAPPY: - return nvcompBatchedSnappyDecompressGetTempSize(std::forward(args)...); - case compression_type::ZSTD: -#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - return nvcompBatchedZstdDecompressGetTempSize(std::forward(args)...); -#else - CUDF_FAIL("Decompression error: " + - nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value()); -#endif case compression_type::DEFLATE: -#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - return nvcompBatchedDeflateDecompressGetTempSize(std::forward(args)...); -#else - CUDF_FAIL("Decompression error: " + - nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value()); -#endif - case compression_type::LZ4: - return nvcompBatchedLZ4DecompressGetTempSize(std::forward(args)...); + return nvcompBatchedDeflateDecompressGetTempSizeEx(std::forward(args)...); + case compression_type::GZIP: + return nvcompBatchedGzipDecompressGetTempSizeEx(std::forward(args)...); default: CUDF_FAIL("Unsupported compression type"); } } +size_t batched_decompress_temp_size(compression_type compression, + size_t num_chunks, + size_t max_uncomp_chunk_size, + size_t max_total_uncomp_size) +{ + size_t temp_size = 0; + nvcompStatus_t nvcomp_status = batched_decompress_get_temp_size_ex( + compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size); + + CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, + "Unable to get scratch size for decompression"); + return temp_size; +} // Dispatcher for nvcompBatchedDecompressAsync template @@ -120,20 +72,12 @@ auto batched_decompress_async(compression_type compression, Args&&... args) case compression_type::SNAPPY: return nvcompBatchedSnappyDecompressAsync(std::forward(args)...); case compression_type::ZSTD: -#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) return nvcompBatchedZstdDecompressAsync(std::forward(args)...); -#else - CUDF_FAIL("Decompression error: " + - nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value()); -#endif case compression_type::DEFLATE: -#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) return nvcompBatchedDeflateDecompressAsync(std::forward(args)...); -#else - CUDF_FAIL("Decompression error: " + - nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value()); -#endif case compression_type::LZ4: return nvcompBatchedLZ4DecompressAsync(std::forward(args)...); + case compression_type::GZIP: + return nvcompBatchedGzipDecompressAsync(std::forward(args)...); default: CUDF_FAIL("Unsupported compression type"); } } @@ -145,31 +89,11 @@ std::string compression_type_name(compression_type compression) case compression_type::ZSTD: return "Zstandard"; case compression_type::DEFLATE: return "Deflate"; case compression_type::LZ4: return "LZ4"; + case compression_type::GZIP: return "GZIP"; } return "compression_type(" + std::to_string(static_cast(compression)) + ")"; } -size_t batched_decompress_temp_size(compression_type compression, - size_t num_chunks, - size_t max_uncomp_chunk_size, - size_t max_total_uncomp_size) -{ - size_t temp_size = 0; - auto nvcomp_status = batched_decompress_get_temp_size_ex( - compression, num_chunks, max_uncomp_chunk_size, &temp_size, max_total_uncomp_size); - - if (nvcomp_status.value_or(nvcompStatus_t::nvcompErrorInternal) != - nvcompStatus_t::nvcompSuccess) { - nvcomp_status = - batched_decompress_get_temp_size(compression, num_chunks, max_uncomp_chunk_size, &temp_size); - } - - CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, - "Unable to get scratch size for decompression"); - - return temp_size; -} - void batched_decompress(compression_type compression, device_span const> inputs, device_span const> outputs, @@ -204,54 +128,10 @@ void batched_decompress(compression_type compression, update_compression_results(nvcomp_statuses, actual_uncompressed_data_sizes, results, stream); } -// Wrapper for nvcompBatchedCompressGetTempSize -auto batched_compress_get_temp_size(compression_type compression, - size_t batch_size, - size_t max_uncompressed_chunk_bytes) -{ - size_t temp_size = 0; - nvcompStatus_t nvcomp_status = nvcompStatus_t::nvcompSuccess; - switch (compression) { - case compression_type::SNAPPY: - nvcomp_status = nvcompBatchedSnappyCompressGetTempSize( - batch_size, max_uncompressed_chunk_bytes, nvcompBatchedSnappyDefaultOpts, &temp_size); - break; - case compression_type::DEFLATE: -#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - nvcomp_status = nvcompBatchedDeflateCompressGetTempSize( - batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size); - break; -#else - CUDF_FAIL("Compression error: " + - nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); -#endif - case compression_type::ZSTD: -#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - nvcomp_status = nvcompBatchedZstdCompressGetTempSize( - batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size); - break; -#else - CUDF_FAIL("Compression error: " + - nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -#endif - case compression_type::LZ4: - nvcomp_status = nvcompBatchedLZ4CompressGetTempSize( - batch_size, max_uncompressed_chunk_bytes, nvcompBatchedLZ4DefaultOpts, &temp_size); - break; - default: CUDF_FAIL("Unsupported compression type"); - } - - CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, - "Unable to get scratch size for compression"); - return temp_size; -} - -#if NVCOMP_HAS_COMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) -// Wrapper for nvcompBatchedCompressGetTempSizeEx -auto batched_compress_get_temp_size_ex(compression_type compression, - size_t batch_size, - size_t max_uncompressed_chunk_bytes, - size_t max_total_uncompressed_bytes) +size_t batched_compress_temp_size(compression_type compression, + size_t batch_size, + size_t max_uncompressed_chunk_bytes, + size_t max_total_uncompressed_bytes) { size_t temp_size = 0; nvcompStatus_t nvcomp_status = nvcompStatus_t::nvcompSuccess; @@ -291,28 +171,8 @@ auto batched_compress_get_temp_size_ex(compression_type compression, "Unable to get scratch size for compression"); return temp_size; } -#endif - -size_t batched_compress_temp_size(compression_type compression, - size_t num_chunks, - size_t max_uncomp_chunk_size, - size_t max_total_uncomp_size) -{ -#if NVCOMP_HAS_COMP_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - try { - return batched_compress_get_temp_size_ex( - compression, num_chunks, max_uncomp_chunk_size, max_total_uncomp_size); - } catch (...) { - // Ignore errors in the expanded version; fall back to the old API in case of failure - CUDF_LOG_WARN( - "CompressGetTempSizeEx call failed, falling back to CompressGetTempSize; this may increase " - "the memory usage"); - } -#endif - - return batched_compress_get_temp_size(compression, num_chunks, max_uncomp_chunk_size); -} +// Wrapper for nvcompBatchedCompressGetMaxOutputChunkSize size_t compress_max_output_chunk_size(compression_type compression, uint32_t max_uncompressed_chunk_bytes) { @@ -328,23 +188,13 @@ size_t compress_max_output_chunk_size(compression_type compression, capped_uncomp_bytes, nvcompBatchedSnappyDefaultOpts, &max_comp_chunk_size); break; case compression_type::DEFLATE: -#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize( capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size); break; -#else - CUDF_FAIL("Compression error: " + - nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); -#endif case compression_type::ZSTD: -#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) status = nvcompBatchedZstdCompressGetMaxOutputChunkSize( capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size); break; -#else - CUDF_FAIL("Compression error: " + - nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -#endif case compression_type::LZ4: status = nvcompBatchedLZ4CompressGetMaxOutputChunkSize( capped_uncomp_bytes, nvcompBatchedLZ4DefaultOpts, &max_comp_chunk_size); @@ -384,7 +234,6 @@ static void batched_compress_async(compression_type compression, stream.value()); break; case compression_type::DEFLATE: -#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs, device_uncompressed_bytes, max_uncompressed_chunk_bytes, @@ -396,12 +245,7 @@ static void batched_compress_async(compression_type compression, nvcompBatchedDeflateDefaultOpts, stream.value()); break; -#else - CUDF_FAIL("Compression error: " + - nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value()); -#endif case compression_type::ZSTD: -#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs, device_uncompressed_bytes, max_uncompressed_chunk_bytes, @@ -413,10 +257,6 @@ static void batched_compress_async(compression_type compression, nvcompBatchedZstdDefaultOpts, stream.value()); break; -#else - CUDF_FAIL("Compression error: " + - nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -#endif case compression_type::LZ4: nvcomp_status = nvcompBatchedLZ4CompressAsync(device_uncompressed_ptrs, device_uncompressed_bytes, @@ -478,16 +318,18 @@ void batched_compress(compression_type compression, } feature_status_parameters::feature_status_parameters() - : lib_major_version{NVCOMP_MAJOR_VERSION}, - lib_minor_version{NVCOMP_MINOR_VERSION}, - lib_patch_version{NVCOMP_PATCH_VERSION}, - are_all_integrations_enabled{nvcomp_integration::is_all_enabled()}, - are_stable_integrations_enabled{nvcomp_integration::is_stable_enabled()} + : feature_status_parameters(nvcomp_integration::is_all_enabled(), + nvcomp_integration::is_stable_enabled()) +{ +} + +feature_status_parameters::feature_status_parameters(bool all_enabled, bool stable_enabled) + : lib_major_version{NVCOMP_VER_MAJOR}, + lib_minor_version{NVCOMP_VER_MINOR}, + lib_patch_version{NVCOMP_VER_PATCH}, + are_all_integrations_enabled{all_enabled}, + are_stable_integrations_enabled{stable_enabled} { - int device; - CUDF_CUDA_TRY(cudaGetDevice(&device)); - CUDF_CUDA_TRY( - cudaDeviceGetAttribute(&compute_capability_major, cudaDevAttrComputeCapabilityMajor, device)); } // Represents all parameters required to determine status of a compression/decompression feature @@ -510,43 +352,21 @@ std::optional is_compression_disabled_impl(compression_type compres { switch (compression) { case compression_type::DEFLATE: { - if (not NVCOMP_HAS_DEFLATE( - params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { - return "nvCOMP 2.5 or newer is required for Deflate compression"; - } if (not params.are_all_integrations_enabled) { return "DEFLATE compression is experimental, you can enable it through " "`LIBCUDF_NVCOMP_POLICY` environment variable."; } return std::nullopt; } - case compression_type::SNAPPY: { - if (not params.are_stable_integrations_enabled) { - return "Snappy compression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " - "environment variable."; - } - return std::nullopt; - } - case compression_type::ZSTD: { - if (not NVCOMP_HAS_ZSTD_COMP( - params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { - return "nvCOMP 2.4 or newer is required for Zstandard compression"; - } - if (not params.are_stable_integrations_enabled) { - return "Zstandard compression is experimental, you can enable it through " - "`LIBCUDF_NVCOMP_POLICY` environment variable."; - } - return std::nullopt; - } case compression_type::LZ4: + case compression_type::SNAPPY: + case compression_type::ZSTD: if (not params.are_stable_integrations_enabled) { - return "LZ4 compression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " - "environment variable."; + return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable."; } return std::nullopt; default: return "Unsupported compression type"; } - return "Unsupported compression type"; } std::optional is_compression_disabled(compression_type compression, @@ -578,58 +398,26 @@ std::optional is_compression_disabled(compression_type compression, return reason; } -std::optional is_zstd_decomp_disabled(feature_status_parameters const& params) -{ - if (not NVCOMP_HAS_ZSTD_DECOMP( - params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { - return "nvCOMP 2.3 or newer is required for Zstandard decompression"; - } - - if (NVCOMP_ZSTD_DECOMP_IS_STABLE( - params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { - if (not params.are_stable_integrations_enabled) { - return "Zstandard decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " - "environment variable."; - } - } else if (not params.are_all_integrations_enabled) { - return "Zstandard decompression is experimental, you can enable it through " - "`LIBCUDF_NVCOMP_POLICY` environment variable."; - } - - return std::nullopt; -} - std::optional is_decompression_disabled_impl(compression_type compression, feature_status_parameters params) { switch (compression) { - case compression_type::DEFLATE: { - if (not NVCOMP_HAS_DEFLATE( - params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) { - return "nvCOMP 2.5 or newer is required for Deflate decompression"; - } + case compression_type::DEFLATE: + case compression_type::GZIP: { if (not params.are_all_integrations_enabled) { return "DEFLATE decompression is experimental, you can enable it through " "`LIBCUDF_NVCOMP_POLICY` environment variable."; } return std::nullopt; } - case compression_type::SNAPPY: { - if (not params.are_stable_integrations_enabled) { - return "Snappy decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " - "environment variable."; - } - return std::nullopt; - } - case compression_type::ZSTD: return is_zstd_decomp_disabled(params); - case compression_type::LZ4: { + case compression_type::LZ4: + case compression_type::SNAPPY: + case compression_type::ZSTD: { if (not params.are_stable_integrations_enabled) { - return "LZ4 decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` " - "environment variable."; + return "nvCOMP use is disabled through the `LIBCUDF_NVCOMP_POLICY` environment variable."; } return std::nullopt; } - default: return "Unsupported compression type"; } return "Unsupported compression type"; } @@ -663,24 +451,14 @@ std::optional is_decompression_disabled(compression_type compressio return reason; } -size_t compress_input_alignment_bits(compression_type compression) -{ - switch (compression) { - case compression_type::DEFLATE: return 0; - case compression_type::SNAPPY: return 0; - case compression_type::ZSTD: return 2; - case compression_type::LZ4: return 2; - default: CUDF_FAIL("Unsupported compression type"); - } -} - -size_t compress_output_alignment_bits(compression_type compression) +size_t required_alignment(compression_type compression) { switch (compression) { - case compression_type::DEFLATE: return 3; - case compression_type::SNAPPY: return 0; - case compression_type::ZSTD: return 0; - case compression_type::LZ4: return 2; + case compression_type::GZIP: + case compression_type::DEFLATE: return nvcompDeflateRequiredAlignment; + case compression_type::SNAPPY: return nvcompSnappyRequiredAlignment; + case compression_type::ZSTD: return nvcompZstdRequiredAlignment; + case compression_type::LZ4: return nvcompLZ4RequiredAlignment; default: CUDF_FAIL("Unsupported compression type"); } } @@ -688,17 +466,11 @@ size_t compress_output_alignment_bits(compression_type compression) std::optional compress_max_allowed_chunk_size(compression_type compression) { switch (compression) { - case compression_type::DEFLATE: return 64 * 1024; - case compression_type::SNAPPY: return std::nullopt; - case compression_type::ZSTD: -#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION) - return nvcompZstdCompressionMaxAllowedChunkSize; -#else - CUDF_FAIL("Compression error: " + - nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value()); -#endif - case compression_type::LZ4: return 16 * 1024 * 1024; - default: return std::nullopt; + case compression_type::DEFLATE: return nvcompDeflateCompressionMaxAllowedChunkSize; + case compression_type::SNAPPY: return nvcompSnappyCompressionMaxAllowedChunkSize; + case compression_type::ZSTD: return nvcompZstdCompressionMaxAllowedChunkSize; + case compression_type::LZ4: return nvcompLZ4CompressionMaxAllowedChunkSize; + default: CUDF_FAIL("Unsupported compression type"); } } diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp index 43c79e32375..583bd6a3523 100644 --- a/cpp/src/io/comp/nvcomp_adapter.hpp +++ b/cpp/src/io/comp/nvcomp_adapter.hpp @@ -75,20 +75,12 @@ size_t batched_decompress_temp_size(compression_type compression, uint32_t max_uncomp_chunk_size); /** - * @brief Gets input alignment requirements for the given compression type. + * @brief Gets input and output alignment requirements for the given compression type. * * @param compression Compression type - * @returns required alignment, in bits + * @returns required alignment */ -[[nodiscard]] size_t compress_input_alignment_bits(compression_type compression); - -/** - * @brief Gets output alignment requirements for the given compression type. - * - * @param compression Compression type - * @returns required alignment, in bits - */ -[[nodiscard]] size_t compress_output_alignment_bits(compression_type compression); +[[nodiscard]] size_t required_alignment(compression_type compression); /** * @brief Maximum size of uncompressed chunks that can be compressed with nvCOMP. diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index ab516dd585d..602ff1734b6 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -510,7 +511,7 @@ size_t decompress_zstd(host_span src, { // Init device span of spans (source) auto const d_src = - cudf::detail::make_device_uvector_async(src, stream, rmm::mr::get_current_device_resource()); + cudf::detail::make_device_uvector_async(src, stream, cudf::get_current_device_resource_ref()); auto hd_srcs = cudf::detail::hostdevice_vector>(1, stream); hd_srcs[0] = d_src; hd_srcs.host_to_device_async(stream); diff --git a/cpp/src/io/csv/csv_gpu.cu b/cpp/src/io/csv/csv_gpu.cu index 7a05d0aebaf..273e82edf8b 100644 --- a/cpp/src/io/csv/csv_gpu.cu +++ b/cpp/src/io/csv/csv_gpu.cu @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -794,7 +795,7 @@ device_span __host__ remove_blank_rows(cudf::io::parse_options_view co return row_offsets.subspan(0, new_end - row_offsets.begin()); } -std::vector detect_column_types( +cudf::detail::host_vector detect_column_types( cudf::io::parse_options_view const& options, device_span const data, device_span const column_flags, @@ -807,12 +808,12 @@ std::vector detect_column_types( int const grid_size = (row_starts.size() + block_size - 1) / block_size; auto d_stats = detail::make_zeroed_device_uvector_async( - num_active_columns, stream, rmm::mr::get_current_device_resource()); + num_active_columns, stream, cudf::get_current_device_resource_ref()); data_type_detection<<>>( options, data, column_flags, row_starts, d_stats); - return detail::make_std_vector_sync(d_stats, stream); + return detail::make_host_vector_sync(d_stats, stream); } void decode_row_column_data(cudf::io::parse_options_view const& options, diff --git a/cpp/src/io/csv/csv_gpu.hpp b/cpp/src/io/csv/csv_gpu.hpp index 06c60319371..aa3d9f6c7b7 100644 --- a/cpp/src/io/csv/csv_gpu.hpp +++ b/cpp/src/io/csv/csv_gpu.hpp @@ -199,7 +199,7 @@ device_span remove_blank_rows(cudf::io::parse_options_view const& opti * * @return stats Histogram of each dtypes' occurrence for each column */ -std::vector detect_column_types( +cudf::detail::host_vector detect_column_types( cudf::io::parse_options_view const& options, device_span data, device_span column_flags, diff --git a/cpp/src/io/csv/durations.cu b/cpp/src/io/csv/durations.cu index 918951d5902..eac86b2f199 100644 --- a/cpp/src/io/csv/durations.cu +++ b/cpp/src/io/csv/durations.cu @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/io/csv/durations.hpp b/cpp/src/io/csv/durations.hpp index f671f435eeb..62f31dcd09c 100644 --- a/cpp/src/io/csv/durations.hpp +++ b/cpp/src/io/csv/durations.hpp @@ -17,10 +17,9 @@ #pragma once #include +#include #include -#include -#include #include diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu index 40d4372ae9d..ebca334a715 100644 --- a/cpp/src/io/csv/reader_impl.cu +++ b/cpp/src/io/csv/reader_impl.cu @@ -37,10 +37,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -532,7 +532,7 @@ void infer_column_types(parse_options const& parse_opts, auto const column_stats = cudf::io::csv::gpu::detect_column_types( parse_opts.view(), data, - make_device_uvector_async(column_flags, stream, rmm::mr::get_current_device_resource()), + make_device_uvector_async(column_flags, stream, cudf::get_current_device_resource_ref()), row_offsets, num_inferred_columns, stream); @@ -601,20 +601,20 @@ std::vector decode_data(parse_options const& parse_opts, } auto d_valid_counts = cudf::detail::make_zeroed_device_uvector_async( - num_active_columns, stream, rmm::mr::get_current_device_resource()); + num_active_columns, stream, cudf::get_current_device_resource_ref()); cudf::io::csv::gpu::decode_row_column_data( parse_opts.view(), data, - make_device_uvector_async(column_flags, stream, rmm::mr::get_current_device_resource()), + make_device_uvector_async(column_flags, stream, cudf::get_current_device_resource_ref()), row_offsets, - make_device_uvector_async(column_types, stream, rmm::mr::get_current_device_resource()), - make_device_uvector_async(h_data, stream, rmm::mr::get_current_device_resource()), - make_device_uvector_async(h_valid, stream, rmm::mr::get_current_device_resource()), + make_device_uvector_async(column_types, stream, cudf::get_current_device_resource_ref()), + make_device_uvector_async(h_data, stream, cudf::get_current_device_resource_ref()), + make_device_uvector_async(h_valid, stream, cudf::get_current_device_resource_ref()), d_valid_counts, stream); - auto const h_valid_counts = cudf::detail::make_std_vector_sync(d_valid_counts, stream); + auto const h_valid_counts = cudf::detail::make_host_vector_sync(d_valid_counts, stream); for (int i = 0; i < num_active_columns; ++i) { out_buffers[i].null_count() = num_records - h_valid_counts[i]; } diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu index 00a6dcb2286..b84446b5f3e 100644 --- a/cpp/src/io/csv/writer_impl.cu +++ b/cpp/src/io/csv/writer_impl.cu @@ -38,11 +38,10 @@ #include #include #include +#include #include #include -#include -#include #include #include @@ -436,7 +435,7 @@ void write_csv(data_sink* out_sink, // (even for tables with no rows) // write_chunked_begin( - out_sink, table, user_column_names, options, stream, rmm::mr::get_current_device_resource()); + out_sink, table, user_column_names, options, stream, cudf::get_current_device_resource_ref()); if (table.num_rows() > 0) { // no need to check same-size columns constraint; auto-enforced by table_view @@ -470,7 +469,7 @@ void write_csv(data_sink* out_sink, // convert each chunk to CSV: // - column_to_strings_fn converter{options, stream, rmm::mr::get_current_device_resource()}; + column_to_strings_fn converter{options, stream, cudf::get_current_device_resource_ref()}; for (auto&& sub_view : vector_views) { // Skip if the table has no rows if (sub_view.num_rows() == 0) continue; @@ -505,13 +504,13 @@ void write_csv(data_sink* out_sink, options_narep, strings::separator_on_nulls::YES, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return cudf::strings::detail::replace_nulls( - str_table_view.column(0), options_narep, stream, rmm::mr::get_current_device_resource()); + str_table_view.column(0), options_narep, stream, cudf::get_current_device_resource_ref()); }(); write_chunked( - out_sink, str_concat_col->view(), options, stream, rmm::mr::get_current_device_resource()); + out_sink, str_concat_col->view(), options, stream, cudf::get_current_device_resource_ref()); } } } diff --git a/cpp/src/io/fst/agent_dfa.cuh b/cpp/src/io/fst/agent_dfa.cuh index bc5b94e2718..0e70984b39c 100644 --- a/cpp/src/io/fst/agent_dfa.cuh +++ b/cpp/src/io/fst/agent_dfa.cuh @@ -791,7 +791,7 @@ __launch_bounds__(int32_t(AgentDFAPolicy::BLOCK_THREADS)) CUDF_KERNEL can_use_smem_cache; using DFASimulationCallbackWrapperT = - typename cub::If::Type; + cuda::std::conditional_t; // Stage 1: Compute the state-transition vector if (IS_TRANS_VECTOR_PASS || IS_SINGLE_PASS) { diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp index 62c3c5cd245..0ca54da5aaf 100644 --- a/cpp/src/io/functions.cpp +++ b/cpp/src/io/functions.cpp @@ -35,8 +35,7 @@ #include #include #include - -#include +#include #include diff --git a/cpp/src/io/json/byte_range_info.cu b/cpp/src/io/json/byte_range_info.cu deleted file mode 100644 index 258a40b0dd3..00000000000 --- a/cpp/src/io/json/byte_range_info.cu +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include - -#include - -namespace cudf::io::json::detail { - -// Extract the first character position in the string. -size_type find_first_delimiter(device_span d_data, - char const delimiter, - rmm::cuda_stream_view stream) -{ - auto const first_delimiter_position = - thrust::find(rmm::exec_policy(stream), d_data.begin(), d_data.end(), delimiter); - return first_delimiter_position != d_data.end() ? first_delimiter_position - d_data.begin() : -1; -} - -} // namespace cudf::io::json::detail diff --git a/cpp/src/io/json/column_tree_construction.cu b/cpp/src/io/json/column_tree_construction.cu new file mode 100644 index 00000000000..c4fe7926706 --- /dev/null +++ b/cpp/src/io/json/column_tree_construction.cu @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nested_json.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cudf::io::json { + +using row_offset_t = size_type; + +#ifdef CSR_DEBUG_PRINT +template +void print(device_span d_vec, std::string name, rmm::cuda_stream_view stream) +{ + stream.synchronize(); + auto h_vec = cudf::detail::make_std_vector_sync(d_vec, stream); + std::cout << name << " = "; + for (auto e : h_vec) { + std::cout << e << " "; + } + std::cout << std::endl; +} +#endif + +namespace experimental::detail { + +struct level_ordering { + device_span node_levels; + device_span col_ids; + device_span parent_node_ids; + __device__ bool operator()(NodeIndexT lhs_node_id, NodeIndexT rhs_node_id) const + { + auto lhs_parent_col_id = parent_node_ids[lhs_node_id] == parent_node_sentinel + ? parent_node_sentinel + : col_ids[parent_node_ids[lhs_node_id]]; + auto rhs_parent_col_id = parent_node_ids[rhs_node_id] == parent_node_sentinel + ? parent_node_sentinel + : col_ids[parent_node_ids[rhs_node_id]]; + + return (node_levels[lhs_node_id] < node_levels[rhs_node_id]) || + (node_levels[lhs_node_id] == node_levels[rhs_node_id] && + lhs_parent_col_id < rhs_parent_col_id) || + (node_levels[lhs_node_id] == node_levels[rhs_node_id] && + lhs_parent_col_id == rhs_parent_col_id && col_ids[lhs_node_id] < col_ids[rhs_node_id]); + } +}; + +struct parent_nodeids_to_colids { + device_span rev_mapped_col_ids; + __device__ auto operator()(NodeIndexT parent_node_id) -> NodeIndexT + { + return parent_node_id == parent_node_sentinel ? parent_node_sentinel + : rev_mapped_col_ids[parent_node_id]; + } +}; + +/** + * @brief Reduces node tree representation to column tree CSR representation. + * + * @param node_tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of column tree representation of JSON string, column ids of columns, and + * max row offsets of columns + */ +std::tuple reduce_to_column_tree( + tree_meta_t& node_tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT row_array_parent_col_id, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + + if (original_col_ids.empty()) { + rmm::device_uvector empty_row_idx(0, stream); + rmm::device_uvector empty_col_idx(0, stream); + rmm::device_uvector empty_column_categories(0, stream); + rmm::device_uvector empty_max_row_offsets(0, stream); + rmm::device_uvector empty_mapped_col_ids(0, stream); + return std::tuple{compressed_sparse_row{std::move(empty_row_idx), std::move(empty_col_idx)}, + column_tree_properties{std::move(empty_column_categories), + std::move(empty_max_row_offsets), + std::move(empty_mapped_col_ids)}}; + } + + auto [unpermuted_tree, unpermuted_col_ids, unpermuted_max_row_offsets] = + cudf::io::json::detail::reduce_to_column_tree(node_tree, + original_col_ids, + sorted_col_ids, + ordered_node_ids, + row_offsets, + is_array_of_arrays, + row_array_parent_col_id, + stream); + + NodeIndexT num_columns = unpermuted_col_ids.size(); + + auto mapped_col_ids = cudf::detail::make_device_uvector_async( + unpermuted_col_ids, stream, cudf::get_current_device_resource_ref()); + rmm::device_uvector rev_mapped_col_ids(num_columns, stream); + rmm::device_uvector reordering_index(unpermuted_col_ids.size(), stream); + + thrust::sequence( + rmm::exec_policy_nosync(stream), reordering_index.begin(), reordering_index.end()); + // Reorder nodes and column ids in level-wise fashion + thrust::sort_by_key( + rmm::exec_policy_nosync(stream), + reordering_index.begin(), + reordering_index.end(), + mapped_col_ids.begin(), + level_ordering{ + unpermuted_tree.node_levels, unpermuted_col_ids, unpermuted_tree.parent_node_ids}); + + { + auto mapped_col_ids_copy = cudf::detail::make_device_uvector_async( + mapped_col_ids, stream, cudf::get_current_device_resource_ref()); + thrust::sequence( + rmm::exec_policy_nosync(stream), rev_mapped_col_ids.begin(), rev_mapped_col_ids.end()); + thrust::sort_by_key(rmm::exec_policy_nosync(stream), + mapped_col_ids_copy.begin(), + mapped_col_ids_copy.end(), + rev_mapped_col_ids.begin()); + } + + rmm::device_uvector parent_col_ids(num_columns, stream); + thrust::transform_output_iterator parent_col_ids_it(parent_col_ids.begin(), + parent_nodeids_to_colids{rev_mapped_col_ids}); + rmm::device_uvector max_row_offsets(num_columns, stream); + rmm::device_uvector column_categories(num_columns, stream); + thrust::copy_n( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_permutation_iterator( + unpermuted_tree.parent_node_ids.begin(), reordering_index.begin()), + thrust::make_permutation_iterator(unpermuted_max_row_offsets.begin(), + reordering_index.begin()), + thrust::make_permutation_iterator( + unpermuted_tree.node_categories.begin(), reordering_index.begin())), + num_columns, + thrust::make_zip_iterator( + parent_col_ids_it, max_row_offsets.begin(), column_categories.begin())); + +#ifdef CSR_DEBUG_PRINT + print(reordering_index, "h_reordering_index", stream); + print(mapped_col_ids, "h_mapped_col_ids", stream); + print(rev_mapped_col_ids, "h_rev_mapped_col_ids", stream); + print(parent_col_ids, "h_parent_col_ids", stream); + print(max_row_offsets, "h_max_row_offsets", stream); +#endif + + auto construct_row_idx = [&stream](NodeIndexT num_columns, + device_span parent_col_ids) { + auto row_idx = cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns + 1), stream, cudf::get_current_device_resource_ref()); + // Note that the first element of csr_parent_col_ids is -1 (parent_node_sentinel) + // children adjacency + + auto num_non_leaf_columns = thrust::unique_count( + rmm::exec_policy_nosync(stream), parent_col_ids.begin() + 1, parent_col_ids.end()); + rmm::device_uvector non_leaf_nodes(num_non_leaf_columns, stream); + rmm::device_uvector non_leaf_nodes_children(num_non_leaf_columns, stream); + thrust::reduce_by_key(rmm::exec_policy_nosync(stream), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + thrust::make_constant_iterator(1), + non_leaf_nodes.begin(), + non_leaf_nodes_children.begin(), + thrust::equal_to()); + + thrust::scatter(rmm::exec_policy_nosync(stream), + non_leaf_nodes_children.begin(), + non_leaf_nodes_children.end(), + non_leaf_nodes.begin(), + row_idx.begin() + 1); + + if (num_columns > 1) { + thrust::transform_inclusive_scan( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(1), row_idx.begin() + 1), + thrust::make_zip_iterator(thrust::make_counting_iterator(1) + num_columns, row_idx.end()), + row_idx.begin() + 1, + cuda::proclaim_return_type([] __device__(auto a) { + auto n = thrust::get<0>(a); + auto idx = thrust::get<1>(a); + return n == 1 ? idx : idx + 1; + }), + thrust::plus{}); + } else { + auto single_node = 1; + row_idx.set_element_async(1, single_node, stream); + } + +#ifdef CSR_DEBUG_PRINT + print(row_idx, "h_row_idx", stream); +#endif + return row_idx; + }; + + auto construct_col_idx = [&stream](NodeIndexT num_columns, + device_span parent_col_ids, + device_span row_idx) { + rmm::device_uvector col_idx((num_columns - 1) * 2, stream); + thrust::fill(rmm::exec_policy_nosync(stream), col_idx.begin(), col_idx.end(), -1); + // excluding root node, construct scatter map + rmm::device_uvector map(num_columns - 1, stream); + thrust::inclusive_scan_by_key(rmm::exec_policy_nosync(stream), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + thrust::make_constant_iterator(1), + map.begin()); + thrust::for_each_n(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(1), + num_columns - 1, + [row_idx = row_idx.begin(), + map = map.begin(), + parent_col_ids = parent_col_ids.begin()] __device__(auto i) { + auto parent_col_id = parent_col_ids[i]; + if (parent_col_id == 0) + --map[i - 1]; + else + map[i - 1] += row_idx[parent_col_id]; + }); + thrust::scatter(rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(1) + num_columns - 1, + map.begin(), + col_idx.begin()); + + // Skip the parent of root node + thrust::scatter(rmm::exec_policy_nosync(stream), + parent_col_ids.begin() + 1, + parent_col_ids.end(), + row_idx.begin() + 1, + col_idx.begin()); + +#ifdef CSR_DEBUG_PRINT + print(col_idx, "h_col_idx", stream); +#endif + + return col_idx; + }; + + /* + 5. CSR construction: + a. Sort column levels and get their ordering + b. For each column node coln iterated according to sorted_column_levels; do + i. Find nodes that have coln as the parent node -> set adj_coln + ii. row idx[coln] = size of adj_coln + 1 + iii. col idx[coln] = adj_coln U {parent_col_id[coln]} + */ + auto row_idx = construct_row_idx(num_columns, parent_col_ids); + auto col_idx = construct_col_idx(num_columns, parent_col_ids, row_idx); + + return std::tuple{ + compressed_sparse_row{std::move(row_idx), std::move(col_idx)}, + column_tree_properties{ + std::move(column_categories), std::move(max_row_offsets), std::move(mapped_col_ids)}}; +} + +} // namespace experimental::detail +} // namespace cudf::io::json diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu new file mode 100644 index 00000000000..5855f1b5a5f --- /dev/null +++ b/cpp/src/io/json/host_tree_algorithms.cu @@ -0,0 +1,1404 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "io/utilities/parsing_utils.cuh" +#include "io/utilities/string_parsing.hpp" +#include "nested_json.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cudf::io::json::detail { + +/** + * @brief Get the column indices for the values column for array of arrays rows + * + * @param row_array_children_level The level of the row array's children + * @param d_tree The tree metadata + * @param col_ids The column ids + * @param num_columns The number of columns + * @param stream The stream to use + * @return The value columns' indices + */ +rmm::device_uvector get_values_column_indices(TreeDepthT const row_array_children_level, + tree_meta_t const& d_tree, + device_span col_ids, + size_type const num_columns, + rmm::cuda_stream_view stream) +{ + auto [level2_nodes, level2_indices] = get_array_children_indices( + row_array_children_level, d_tree.node_levels, d_tree.parent_node_ids, stream); + auto col_id_location = thrust::make_permutation_iterator(col_ids.begin(), level2_nodes.begin()); + rmm::device_uvector values_column_indices(num_columns, stream); + thrust::scatter(rmm::exec_policy_nosync(stream), + level2_indices.begin(), + level2_indices.end(), + col_id_location, + values_column_indices.begin()); + return values_column_indices; +} + +/** + * @brief Copies strings specified by pair of begin, end offsets to host vector of strings. + * + * @param input String device buffer + * @param node_range_begin Begin offset of the strings + * @param node_range_end End offset of the strings + * @param stream CUDA stream + * @return Vector of strings + */ +std::vector copy_strings_to_host_sync( + device_span input, + device_span node_range_begin, + device_span node_range_end, + rmm::cuda_stream_view stream) +{ + auto const num_strings = node_range_begin.size(); + rmm::device_uvector string_offsets(num_strings, stream); + rmm::device_uvector string_lengths(num_strings, stream); + auto d_offset_pairs = thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin()); + thrust::transform(rmm::exec_policy_nosync(stream), + d_offset_pairs, + d_offset_pairs + num_strings, + thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()), + [] __device__(auto const& offsets) { + // Note: first character for non-field columns + return thrust::make_tuple( + static_cast(thrust::get<0>(offsets)), + static_cast(thrust::get<1>(offsets) - thrust::get<0>(offsets))); + }); + + cudf::io::parse_options_view options_view{}; + options_view.quotechar = '\0'; // no quotes + options_view.keepquotes = true; + auto d_offset_length_it = + thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()); + auto d_column_names = parse_data(input.data(), + d_offset_length_it, + num_strings, + data_type{type_id::STRING}, + rmm::device_buffer{}, + 0, + options_view, + stream, + cudf::get_current_device_resource_ref()); + auto to_host = [stream](auto const& col) { + if (col.is_empty()) return std::vector{}; + auto const scv = cudf::strings_column_view(col); + auto const h_chars = cudf::detail::make_host_vector_async( + cudf::device_span(scv.chars_begin(stream), scv.chars_size(stream)), stream); + auto const h_offsets = cudf::detail::make_host_vector_async( + cudf::device_span(scv.offsets().data() + scv.offset(), + scv.size() + 1), + stream); + stream.synchronize(); + + // build std::string vector from chars and offsets + std::vector host_data; + host_data.reserve(col.size()); + std::transform( + std::begin(h_offsets), + std::end(h_offsets) - 1, + std::begin(h_offsets) + 1, + std::back_inserter(host_data), + [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); + return host_data; + }; + return to_host(d_column_names->view()); +} + +/** + * @brief Checks if all strings in each string column in the tree are nulls. + * For non-string columns, it's set as true. If any of rows in a string column is false, it's set as + * false. + * + * @param input Input JSON string device data + * @param d_column_tree column tree representation of JSON string + * @param tree Node tree representation of the JSON string + * @param col_ids Column ids of the nodes in the tree + * @param options Parsing options specifying the parsing behaviour + * @param stream CUDA stream used for device memory operations and kernel launches + * @return Array of bytes where each byte indicate if it is all nulls string column. + */ +rmm::device_uvector is_all_nulls_each_column(device_span input, + tree_meta_t const& d_column_tree, + tree_meta_t const& tree, + device_span col_ids, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream) +{ + auto const num_nodes = col_ids.size(); + auto const num_cols = d_column_tree.node_categories.size(); + rmm::device_uvector is_all_nulls(num_cols, stream); + thrust::fill(rmm::exec_policy_nosync(stream), is_all_nulls.begin(), is_all_nulls.end(), true); + + auto parse_opt = parsing_options(options, stream); + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::counting_iterator(0), + num_nodes, + [options = parse_opt.view(), + data = input.data(), + column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin(), + range_begin = tree.node_range_begin.begin(), + range_end = tree.node_range_end.begin(), + is_all_nulls = is_all_nulls.begin()] __device__(size_type i) { + auto const node_category = column_categories[col_ids[i]]; + if (node_category == NC_STR or node_category == NC_VAL) { + auto const is_null_literal = serialized_trie_contains( + options.trie_na, + {data + range_begin[i], static_cast(range_end[i] - range_begin[i])}); + if (!is_null_literal) is_all_nulls[col_ids[i]] = false; + } + }); + return is_all_nulls; +} + +NodeIndexT get_row_array_parent_col_id(device_span col_ids, + bool is_enabled_lines, + rmm::cuda_stream_view stream) +{ + NodeIndexT value = parent_node_sentinel; + if (!col_ids.empty()) { + auto const list_node_index = is_enabled_lines ? 0 : 1; + CUDF_CUDA_TRY(cudaMemcpyAsync(&value, + col_ids.data() + list_node_index, + sizeof(NodeIndexT), + cudaMemcpyDefault, + stream.value())); + stream.synchronize(); + } + return value; +} +/** + * @brief Holds member data pointers of `d_json_column` + * + */ +struct json_column_data { + using row_offset_t = json_column::row_offset_t; + row_offset_t* string_offsets; + row_offset_t* string_lengths; + row_offset_t* child_offsets; + bitmask_type* validity; +}; + +using hashmap_of_device_columns = + std::unordered_map>; + +std::pair, hashmap_of_device_columns> build_tree( + device_json_column& root, + host_span is_str_column_all_nulls, + tree_meta_t& d_column_tree, + device_span d_unique_col_ids, + device_span d_max_row_offsets, + std::vector const& column_names, + NodeIndexT row_array_parent_col_id, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +void scatter_offsets(tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_span node_ids, + device_span sorted_col_ids, // Reuse this for parent_col_ids + tree_meta_t const& d_column_tree, + host_span ignore_vals, + hashmap_of_device_columns const& columns, + rmm::cuda_stream_view stream); + +/** + * @brief Constructs `d_json_column` from node tree representation + * Newly constructed columns are inserted into `root`'s children. + * `root` must be a list type. + * + * @param input Input JSON string device data + * @param tree Node tree representation of the JSON string + * @param col_ids Column ids of the nodes in the tree + * @param row_offsets Row offsets of the nodes in the tree + * @param root Root node of the `d_json_column` tree + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param options Parsing options specifying the parsing behaviour + * options affecting behaviour are + * is_enabled_lines: Whether the input is a line-delimited JSON + * is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the device memory + * of child_offets and validity members of `d_json_column` + */ +void make_device_json_column(device_span input, + tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_json_column& root, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + bool const is_enabled_lines = options.is_enabled_lines(); + bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); + // make a copy + auto sorted_col_ids = cudf::detail::make_device_uvector_async( + col_ids, stream, cudf::get_current_device_resource_ref()); + + // sort by {col_id} on {node_ids} stable + rmm::device_uvector node_ids(col_ids.size(), stream); + thrust::sequence(rmm::exec_policy_nosync(stream), node_ids.begin(), node_ids.end()); + thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + node_ids.begin()); + + NodeIndexT const row_array_parent_col_id = + get_row_array_parent_col_id(col_ids, is_enabled_lines, stream); + + // 1. gather column information. + auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = + reduce_to_column_tree(tree, + col_ids, + sorted_col_ids, + node_ids, + row_offsets, + is_array_of_arrays, + row_array_parent_col_id, + stream); + auto num_columns = d_unique_col_ids.size(); + std::vector column_names = copy_strings_to_host_sync( + input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream); + // array of arrays column names + if (is_array_of_arrays) { + auto const unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); + auto const column_parent_ids = + cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); + TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2; + auto values_column_indices = + get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream); + auto h_values_column_indices = + cudf::detail::make_host_vector_sync(values_column_indices, stream); + std::transform(unique_col_ids.begin(), + unique_col_ids.end(), + column_names.cbegin(), + column_names.begin(), + [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id]( + auto col_id, auto name) mutable { + return column_parent_ids[col_id] == row_array_parent_col_id + ? std::to_string(h_values_column_indices[col_id]) + : name; + }); + } + + auto const is_str_column_all_nulls = [&, &column_tree = d_column_tree]() { + if (is_enabled_mixed_types_as_string) { + return cudf::detail::make_std_vector_sync( + is_all_nulls_each_column(input, column_tree, tree, col_ids, options, stream), stream); + } + return std::vector(); + }(); + auto const [ignore_vals, columns] = build_tree(root, + is_str_column_all_nulls, + d_column_tree, + d_unique_col_ids, + d_max_row_offsets, + column_names, + row_array_parent_col_id, + is_array_of_arrays, + options, + stream, + mr); + + scatter_offsets(tree, + col_ids, + row_offsets, + node_ids, + sorted_col_ids, + d_column_tree, + ignore_vals, + columns, + stream); +} + +std::pair, hashmap_of_device_columns> build_tree( + device_json_column& root, + host_span is_str_column_all_nulls, + tree_meta_t& d_column_tree, + device_span d_unique_col_ids, + device_span d_max_row_offsets, + std::vector const& column_names, + NodeIndexT row_array_parent_col_id, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); + auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); + auto column_categories = + cudf::detail::make_host_vector_async(d_column_tree.node_categories, stream); + auto const column_parent_ids = + cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); + auto column_range_beg = + cudf::detail::make_host_vector_async(d_column_tree.node_range_begin, stream); + auto const max_row_offsets = cudf::detail::make_host_vector_async(d_max_row_offsets, stream); + auto num_columns = d_unique_col_ids.size(); + stream.synchronize(); + + auto to_json_col_type = [](auto category) { + switch (category) { + case NC_STRUCT: return json_col_t::StructColumn; + case NC_LIST: return json_col_t::ListColumn; + case NC_STR: [[fallthrough]]; + case NC_VAL: return json_col_t::StringColumn; + default: return json_col_t::Unknown; + } + }; + auto init_to_zero = [stream](auto& v) { + thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), v.begin(), v.end(), 0); + }; + + auto initialize_json_columns = [&](auto i, auto& col, auto column_category) { + if (column_category == NC_ERR || column_category == NC_FN) { + return; + } else if (column_category == NC_VAL || column_category == NC_STR) { + col.string_offsets.resize(max_row_offsets[i] + 1, stream); + col.string_lengths.resize(max_row_offsets[i] + 1, stream); + init_to_zero(col.string_offsets); + init_to_zero(col.string_lengths); + } else if (column_category == NC_LIST) { + col.child_offsets.resize(max_row_offsets[i] + 2, stream); + init_to_zero(col.child_offsets); + } + col.num_rows = max_row_offsets[i] + 1; + col.validity = + cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); + col.type = to_json_col_type(column_category); + }; + + auto reinitialize_as_string = [&](auto i, auto& col) { + col.string_offsets.resize(max_row_offsets[i] + 1, stream); + col.string_lengths.resize(max_row_offsets[i] + 1, stream); + init_to_zero(col.string_offsets); + init_to_zero(col.string_lengths); + col.num_rows = max_row_offsets[i] + 1; + col.validity = + cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); + col.type = json_col_t::StringColumn; + // destroy references of all child columns after this step, by calling remove_child_columns + }; + + path_from_tree tree_path{column_categories, + column_parent_ids, + column_names, + is_array_of_arrays, + row_array_parent_col_id}; + + // 2. generate nested columns tree and its device_memory + // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order. + auto h_range_col_id_it = + thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin()); + std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { + return thrust::get<0>(a) < thrust::get<0>(b); + }); + + // use hash map because we may skip field name's col_ids + hashmap_of_device_columns columns; + // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking + std::map, NodeIndexT> mapped_columns; + // find column_ids which are values, but should be ignored in validity + auto ignore_vals = cudf::detail::make_host_vector(num_columns, stream); + std::fill(ignore_vals.begin(), ignore_vals.end(), false); + std::vector is_mixed_type_column(num_columns, 0); + std::vector is_pruned(num_columns, 0); + // for columns that are not mixed type but have been forced as string + std::vector forced_as_string_column(num_columns); + columns.try_emplace(parent_node_sentinel, std::ref(root)); + + std::function remove_child_columns = + [&](NodeIndexT this_col_id, device_json_column& col) { + for (auto const& col_name : col.column_order) { + auto child_id = mapped_columns[{this_col_id, col_name}]; + is_mixed_type_column[child_id] = 1; + remove_child_columns(child_id, col.child_columns.at(col_name)); + mapped_columns.erase({this_col_id, col_name}); + columns.erase(child_id); + } + col.child_columns.clear(); // their references are deleted above. + col.column_order.clear(); + }; + + auto name_and_parent_index = [&is_array_of_arrays, + &row_array_parent_col_id, + &column_parent_ids, + &column_categories, + &column_names](auto this_col_id) { + std::string name = ""; + auto parent_col_id = column_parent_ids[this_col_id]; + if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) { + if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) { + name = column_names[this_col_id]; + } else { + name = list_child_name; + } + } else if (column_categories[parent_col_id] == NC_FN) { + auto field_name_col_id = parent_col_id; + parent_col_id = column_parent_ids[parent_col_id]; + name = column_names[field_name_col_id]; + } else { + CUDF_FAIL("Unexpected parent column category"); + } + return std::pair{name, parent_col_id}; + }; + + // Prune columns that are not required to be parsed. + if (options.is_enabled_prune_columns()) { + for (auto const this_col_id : unique_col_ids) { + if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { + continue; + } + // Struct, List, String, Value + auto [name, parent_col_id] = name_and_parent_index(this_col_id); + // get path of this column, and get its dtype if present in options + auto const nt = tree_path.get_path(this_col_id); + std::optional const user_dtype = get_path_data_type(nt, options); + if (!user_dtype.has_value() and parent_col_id != parent_node_sentinel) { + is_pruned[this_col_id] = 1; + continue; + } else { + // make sure all its parents are not pruned. + while (parent_col_id != parent_node_sentinel and is_pruned[parent_col_id] == 1) { + is_pruned[parent_col_id] = 0; + parent_col_id = column_parent_ids[parent_col_id]; + } + } + } + } + + // Build the column tree, also, handles mixed types. + for (auto const this_col_id : unique_col_ids) { + if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { + continue; + } + // Struct, List, String, Value + auto [name, parent_col_id] = name_and_parent_index(this_col_id); + + // if parent is mixed type column or this column is pruned or if parent + // has been forced as string, ignore this column. + if (parent_col_id != parent_node_sentinel && + (is_mixed_type_column[parent_col_id] || is_pruned[this_col_id]) || + forced_as_string_column[parent_col_id]) { + ignore_vals[this_col_id] = true; + if (is_mixed_type_column[parent_col_id]) { is_mixed_type_column[this_col_id] = 1; } + if (forced_as_string_column[parent_col_id]) { forced_as_string_column[this_col_id] = true; } + continue; + } + + // If the child is already found, + // replace if this column is a nested column and the existing was a value column + // ignore this column if this column is a value column and the existing was a nested column + auto it = columns.find(parent_col_id); + CUDF_EXPECTS(it != columns.end(), "Parent column not found"); + auto& parent_col = it->second.get(); + bool replaced = false; + if (mapped_columns.count({parent_col_id, name}) > 0) { + auto const old_col_id = mapped_columns[{parent_col_id, name}]; + // If mixed type as string is enabled, make both of them strings and merge them. + // All child columns will be ignored when parsing. + if (is_enabled_mixed_types_as_string) { + bool const is_mixed_type = [&]() { + // If new or old is STR and they are all not null, make it mixed type, else ignore. + if (column_categories[this_col_id] == NC_VAL || + column_categories[this_col_id] == NC_STR) { + if (is_str_column_all_nulls[this_col_id]) return false; + } + if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { + if (is_str_column_all_nulls[old_col_id]) return false; + } + return true; + }(); + if (is_mixed_type) { + is_mixed_type_column[this_col_id] = 1; + is_mixed_type_column[old_col_id] = 1; + // if old col type (not cat) is list or struct, replace with string. + auto& col = columns.at(old_col_id).get(); + if (col.type == json_col_t::ListColumn or col.type == json_col_t::StructColumn) { + reinitialize_as_string(old_col_id, col); + remove_child_columns(old_col_id, col); + // all its children (which are already inserted) are ignored later. + } + col.forced_as_string_column = true; + columns.try_emplace(this_col_id, columns.at(old_col_id)); + continue; + } + } + + if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) { + ignore_vals[this_col_id] = true; + continue; + } + if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { + // remap + ignore_vals[old_col_id] = true; + mapped_columns.erase({parent_col_id, name}); + columns.erase(old_col_id); + parent_col.child_columns.erase(name); + replaced = true; // to skip duplicate name in column_order + } else { + // If this is a nested column but we're trying to insert either (a) a list node into a + // struct column or (b) a struct node into a list column, we fail + CUDF_EXPECTS(not((column_categories[old_col_id] == NC_LIST and + column_categories[this_col_id] == NC_STRUCT) or + (column_categories[old_col_id] == NC_STRUCT and + column_categories[this_col_id] == NC_LIST)), + "A mix of lists and structs within the same column is not supported"); + } + } + + auto this_column_category = column_categories[this_col_id]; + // get path of this column, check if it is a struct/list forced as string, and enforce it + auto const nt = tree_path.get_path(this_col_id); + std::optional const user_dtype = get_path_data_type(nt, options); + if ((column_categories[this_col_id] == NC_STRUCT or + column_categories[this_col_id] == NC_LIST) and + user_dtype.has_value() and user_dtype.value().id() == type_id::STRING) { + this_column_category = NC_STR; + } + + CUDF_EXPECTS(parent_col.child_columns.count(name) == 0, "duplicate column name: " + name); + // move into parent + device_json_column col(stream, mr); + initialize_json_columns(this_col_id, col, this_column_category); + if ((column_categories[this_col_id] == NC_STRUCT or + column_categories[this_col_id] == NC_LIST) and + user_dtype.has_value() and user_dtype.value().id() == type_id::STRING) { + col.forced_as_string_column = true; + forced_as_string_column[this_col_id] = true; + } + + auto inserted = parent_col.child_columns.try_emplace(name, std::move(col)).second; + CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent"); + if (not replaced) parent_col.column_order.push_back(name); + columns.try_emplace(this_col_id, std::ref(parent_col.child_columns.at(name))); + mapped_columns.try_emplace(std::make_pair(parent_col_id, name), this_col_id); + } + + if (is_enabled_mixed_types_as_string) { + // ignore all children of mixed type columns + for (auto const this_col_id : unique_col_ids) { + auto parent_col_id = column_parent_ids[this_col_id]; + if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 1) { + is_mixed_type_column[this_col_id] = 1; + ignore_vals[this_col_id] = true; + columns.erase(this_col_id); + } + // Convert only mixed type columns as string (so to copy), but not its children + if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 0 and + is_mixed_type_column[this_col_id] == 1) + column_categories[this_col_id] = NC_STR; + } + cudf::detail::cuda_memcpy_async(d_column_tree.node_categories.begin(), + column_categories.data(), + column_categories.size() * sizeof(column_categories[0]), + cudf::detail::host_memory_kind::PAGEABLE, + stream); + } + + // ignore all children of columns forced as string + for (auto const this_col_id : unique_col_ids) { + auto parent_col_id = column_parent_ids[this_col_id]; + if (parent_col_id != parent_node_sentinel and forced_as_string_column[parent_col_id]) { + forced_as_string_column[this_col_id] = true; + ignore_vals[this_col_id] = true; + } + // Convert only mixed type columns as string (so to copy), but not its children + if (parent_col_id != parent_node_sentinel and not forced_as_string_column[parent_col_id] and + forced_as_string_column[this_col_id]) + column_categories[this_col_id] = NC_STR; + } + cudf::detail::cuda_memcpy_async(d_column_tree.node_categories.begin(), + column_categories.data(), + column_categories.size() * sizeof(column_categories[0]), + cudf::detail::host_memory_kind::PAGEABLE, + stream); + + // restore unique_col_ids order + std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { + return thrust::get<1>(a) < thrust::get<1>(b); + }); + return {ignore_vals, columns}; +} + +void scatter_offsets(tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_span node_ids, + device_span sorted_col_ids, // Reuse this for parent_col_ids + tree_meta_t const& d_column_tree, + host_span ignore_vals, + hashmap_of_device_columns const& columns, + rmm::cuda_stream_view stream) +{ + auto const num_nodes = col_ids.size(); + auto const num_columns = d_column_tree.node_categories.size(); + // move columns data to device. + auto columns_data = cudf::detail::make_host_vector(num_columns, stream); + for (auto& [col_id, col_ref] : columns) { + if (col_id == parent_node_sentinel) continue; + auto& col = col_ref.get(); + columns_data[col_id] = json_column_data{col.string_offsets.data(), + col.string_lengths.data(), + col.child_offsets.data(), + static_cast(col.validity.data())}; + } + + auto d_ignore_vals = cudf::detail::make_device_uvector_async( + ignore_vals, stream, cudf::get_current_device_resource_ref()); + auto d_columns_data = cudf::detail::make_device_uvector_async( + columns_data, stream, cudf::get_current_device_resource_ref()); + + // 3. scatter string offsets to respective columns, set validity bits + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::counting_iterator(0), + num_nodes, + [column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin(), + row_offsets = row_offsets.begin(), + range_begin = tree.node_range_begin.begin(), + range_end = tree.node_range_end.begin(), + d_ignore_vals = d_ignore_vals.begin(), + d_columns_data = d_columns_data.begin()] __device__(size_type i) { + if (d_ignore_vals[col_ids[i]]) return; + auto const node_category = column_categories[col_ids[i]]; + switch (node_category) { + case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; + case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; + case NC_STR: [[fallthrough]]; + case NC_VAL: + if (d_ignore_vals[col_ids[i]]) break; + set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); + d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i]; + d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i]; + break; + default: break; + } + }); + + // 4. scatter List offset + // copy_if only node's whose parent is list, (node_id, parent_col_id) + // stable_sort by parent_col_id of {node_id}. + // For all unique parent_node_id of (i==0, i-1!=i), write start offset. + // (i==last, i+1!=i), write end offset. + // unique_copy_by_key {parent_node_id} {row_offset} to + // col[parent_col_id].child_offsets[row_offset[parent_node_id]] + + auto& parent_col_ids = sorted_col_ids; // reuse sorted_col_ids + auto parent_col_id = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [col_ids = col_ids.begin(), + parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) { + return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel + : col_ids[parent_node_ids[node_id]]; + })); + auto const list_children_end = thrust::copy_if( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id), + thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id) + + num_nodes, + thrust::make_counting_iterator(0), + thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), + [d_ignore_vals = d_ignore_vals.begin(), + parent_node_ids = tree.parent_node_ids.begin(), + column_categories = d_column_tree.node_categories.begin(), + col_ids = col_ids.begin()] __device__(size_type node_id) { + auto parent_node_id = parent_node_ids[node_id]; + return parent_node_id != parent_node_sentinel and + column_categories[col_ids[parent_node_id]] == NC_LIST and + (!d_ignore_vals[col_ids[parent_node_id]]); + }); + + auto const num_list_children = + list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()); + thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), + parent_col_ids.begin(), + parent_col_ids.begin() + num_list_children, + node_ids.begin()); + thrust::for_each_n( + rmm::exec_policy_nosync(stream), + thrust::make_counting_iterator(0), + num_list_children, + [node_ids = node_ids.begin(), + parent_node_ids = tree.parent_node_ids.begin(), + parent_col_ids = parent_col_ids.begin(), + row_offsets = row_offsets.begin(), + d_columns_data = d_columns_data.begin(), + num_list_children] __device__(size_type i) { + auto const node_id = node_ids[i]; + auto const parent_node_id = parent_node_ids[node_id]; + // scatter to list_offset + if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) { + d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] = + row_offsets[node_id]; + } + // last value of list child_offset is its size. + if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) { + d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] = + row_offsets[node_id] + 1; + } + }); + + // 5. scan on offsets. + for (auto& [id, col_ref] : columns) { + auto& col = col_ref.get(); + if (col.type == json_col_t::StringColumn) { + thrust::inclusive_scan(rmm::exec_policy_nosync(stream), + col.string_offsets.begin(), + col.string_offsets.end(), + col.string_offsets.begin(), + thrust::maximum{}); + } else if (col.type == json_col_t::ListColumn) { + thrust::inclusive_scan(rmm::exec_policy_nosync(stream), + col.child_offsets.begin(), + col.child_offsets.end(), + col.child_offsets.begin(), + thrust::maximum{}); + } + } + stream.synchronize(); +} + +namespace experimental { + +std::map unified_schema(cudf::io::json_reader_options const& options) +{ + return std::visit( + cudf::detail::visitor_overload{ + [](std::vector const& user_dtypes) { + std::map dnew; + std::transform(thrust::counting_iterator(0), + thrust::counting_iterator(user_dtypes.size()), + std::inserter(dnew, dnew.end()), + [&user_dtypes](auto i) { + return std::pair(std::to_string(i), schema_element{user_dtypes[i]}); + }); + return dnew; + }, + [](std::map const& user_dtypes) { + std::map dnew; + std::transform(user_dtypes.begin(), + user_dtypes.end(), + std::inserter(dnew, dnew.end()), + [](auto key_dtype) { + return std::pair(key_dtype.first, schema_element{key_dtype.second}); + }); + return dnew; + }, + [](std::map const& user_dtypes) { return user_dtypes; }}, + options.get_dtypes()); +} + +std::pair, hashmap_of_device_columns> build_tree( + device_json_column& root, + host_span is_str_column_all_nulls, + tree_meta_t& d_column_tree, + device_span d_unique_col_ids, + device_span d_max_row_offsets, + std::vector const& column_names, + NodeIndexT row_array_parent_col_id, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +/** + * @brief Constructs `d_json_column` from node tree representation + * Newly constructed columns are inserted into `root`'s children. + * `root` must be a list type. + * + * @param input Input JSON string device data + * @param tree Node tree representation of the JSON string + * @param col_ids Column ids of the nodes in the tree + * @param row_offsets Row offsets of the nodes in the tree + * @param root Root node of the `d_json_column` tree + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param options Parsing options specifying the parsing behaviour + * options affecting behaviour are + * is_enabled_lines: Whether the input is a line-delimited JSON + * is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the device memory + * of child_offets and validity members of `d_json_column` + */ +void make_device_json_column(device_span input, + tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_json_column& root, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + bool const is_enabled_lines = options.is_enabled_lines(); + bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); + // make a copy + auto sorted_col_ids = cudf::detail::make_device_uvector_async( + col_ids, stream, cudf::get_current_device_resource_ref()); + + // sort by {col_id} on {node_ids} stable + rmm::device_uvector node_ids(col_ids.size(), stream); + thrust::sequence(rmm::exec_policy_nosync(stream), node_ids.begin(), node_ids.end()); + thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream), + sorted_col_ids.begin(), + sorted_col_ids.end(), + node_ids.begin()); + + NodeIndexT const row_array_parent_col_id = + get_row_array_parent_col_id(col_ids, is_enabled_lines, stream); + + // 1. gather column information. + auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = + reduce_to_column_tree(tree, + col_ids, + sorted_col_ids, + node_ids, + row_offsets, + is_array_of_arrays, + row_array_parent_col_id, + stream); + + auto num_columns = d_unique_col_ids.size(); + std::vector column_names = copy_strings_to_host_sync( + input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream); + // array of arrays column names + if (is_array_of_arrays) { + auto const unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); + auto const column_parent_ids = + cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); + TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2; + auto values_column_indices = + get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream); + auto h_values_column_indices = + cudf::detail::make_host_vector_sync(values_column_indices, stream); + std::transform(unique_col_ids.begin(), + unique_col_ids.end(), + column_names.cbegin(), + column_names.begin(), + [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id]( + auto col_id, auto name) mutable { + return column_parent_ids[col_id] == row_array_parent_col_id + ? std::to_string(h_values_column_indices[col_id]) + : name; + }); + } + + auto const is_str_column_all_nulls = [&, &column_tree = d_column_tree]() { + if (is_enabled_mixed_types_as_string) { + return cudf::detail::make_std_vector_sync( + is_all_nulls_each_column(input, column_tree, tree, col_ids, options, stream), stream); + } + return std::vector(); + }(); + auto const [ignore_vals, columns] = build_tree(root, + is_str_column_all_nulls, + d_column_tree, + d_unique_col_ids, + d_max_row_offsets, + column_names, + row_array_parent_col_id, + is_array_of_arrays, + options, + stream, + mr); + if (ignore_vals.empty()) return; + scatter_offsets(tree, + col_ids, + row_offsets, + node_ids, + sorted_col_ids, + d_column_tree, + ignore_vals, + columns, + stream); +} + +std::pair, hashmap_of_device_columns> build_tree( + device_json_column& root, + host_span is_str_column_all_nulls, + tree_meta_t& d_column_tree, + device_span d_unique_col_ids, + device_span d_max_row_offsets, + std::vector const& column_names, + NodeIndexT row_array_parent_col_id, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + bool const is_enabled_lines = options.is_enabled_lines(); + bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); + auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream); + auto column_categories = + cudf::detail::make_host_vector_async(d_column_tree.node_categories, stream); + auto const column_parent_ids = + cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream); + auto column_range_beg = + cudf::detail::make_host_vector_async(d_column_tree.node_range_begin, stream); + auto const max_row_offsets = cudf::detail::make_host_vector_async(d_max_row_offsets, stream); + auto num_columns = d_unique_col_ids.size(); + stream.synchronize(); + + auto to_json_col_type = [](auto category) { + switch (category) { + case NC_STRUCT: return json_col_t::StructColumn; + case NC_LIST: return json_col_t::ListColumn; + case NC_STR: [[fallthrough]]; + case NC_VAL: return json_col_t::StringColumn; + default: return json_col_t::Unknown; + } + }; + + auto initialize_json_columns = [&](auto i, auto& col_ref, auto column_category) { + auto& col = col_ref.get(); + if (col.type != json_col_t::Unknown) { return; } + if (column_category == NC_ERR || column_category == NC_FN) { + return; + } else if (column_category == NC_VAL || column_category == NC_STR) { + col.string_offsets.resize(max_row_offsets[i] + 1, stream); + col.string_lengths.resize(max_row_offsets[i] + 1, stream); + thrust::fill( + rmm::exec_policy_nosync(stream), + thrust::make_zip_iterator(col.string_offsets.begin(), col.string_lengths.begin()), + thrust::make_zip_iterator(col.string_offsets.end(), col.string_lengths.end()), + thrust::make_tuple(0, 0)); + } else if (column_category == NC_LIST) { + col.child_offsets.resize(max_row_offsets[i] + 2, stream); + thrust::uninitialized_fill( + rmm::exec_policy_nosync(stream), col.child_offsets.begin(), col.child_offsets.end(), 0); + } + col.num_rows = max_row_offsets[i] + 1; + col.validity = + cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); + col.type = to_json_col_type(column_category); + }; + + // 2. generate nested columns tree and its device_memory + // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order. + auto h_range_col_id_it = + thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin()); + std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { + return thrust::get<0>(a) < thrust::get<0>(b); + }); + // adjacency list construction + std::map> adj; + for (auto const this_col_id : unique_col_ids) { + auto parent_col_id = column_parent_ids[this_col_id]; + adj[parent_col_id].push_back(this_col_id); + } + + // Pruning + auto is_pruned = cudf::detail::make_host_vector(num_columns, stream); + std::fill_n(is_pruned.begin(), num_columns, options.is_enabled_prune_columns()); + + // prune all children of a column, but not self. + auto ignore_all_children = [&](auto parent_col_id) { + std::deque offspring; + if (adj.count(parent_col_id)) { + for (auto const& child : adj[parent_col_id]) { + offspring.push_back(child); + } + } + while (!offspring.empty()) { + auto this_id = offspring.front(); + offspring.pop_front(); + is_pruned[this_id] = true; + if (adj.count(this_id)) { + for (auto const& child : adj[this_id]) { + offspring.push_back(child); + } + } + } + }; + + // Pruning: iterate through schema and mark only those columns and enforce type. + // NoPruning: iterate through schema and enforce type. + + if (adj[parent_node_sentinel].empty()) + return {cudf::detail::make_host_vector(0, stream), {}}; // for empty file + CUDF_EXPECTS(adj[parent_node_sentinel].size() == 1, "Should be 1"); + auto expected_types = cudf::detail::make_host_vector(num_columns, stream); + std::fill_n(expected_types.begin(), num_columns, NUM_NODE_CLASSES); + + auto lookup_names = [&column_names](auto child_ids, auto name) { + for (auto const& child_id : child_ids) { + if (column_names[child_id] == name) return child_id; + } + return -1; + }; + // recursive lambda on schema to mark columns as pruned. + std::function mark_is_pruned; + mark_is_pruned = [&is_pruned, + &mark_is_pruned, + &adj, + &lookup_names, + &column_categories, + &expected_types, + &ignore_all_children](NodeIndexT root, schema_element const& schema) -> void { + if (root == -1) return; + bool pass = + (schema.type == data_type{type_id::STRUCT} and column_categories[root] == NC_STRUCT) or + (schema.type == data_type{type_id::LIST} and column_categories[root] == NC_LIST) or + (schema.type != data_type{type_id::STRUCT} and schema.type != data_type{type_id::LIST} and + column_categories[root] != NC_FN); + if (!pass) { + // ignore all children of this column and prune this column. + is_pruned[root] = true; + ignore_all_children(root); + return; + } + is_pruned[root] = false; + auto expected_type = [](auto type, auto cat) { + if (type == data_type{type_id::STRUCT} and cat == NC_STRUCT) return NC_STRUCT; + if (type == data_type{type_id::LIST} and cat == NC_LIST) return NC_LIST; + if (type != data_type{type_id::STRUCT} and type != data_type{type_id::LIST}) return NC_STR; + return NC_ERR; + }(schema.type, column_categories[root]); + expected_types[root] = expected_type; // forced type. + // ignore children of nested columns, but not self. + if (expected_type == NC_STR and + (column_categories[root] == NC_STRUCT or column_categories[root] == NC_LIST)) + ignore_all_children(root); + if (not(schema.type == data_type{type_id::STRUCT} or schema.type == data_type{type_id::LIST})) + return; // no children to mark for non-nested. + auto child_ids = adj.count(root) ? adj[root] : std::vector{}; + if (schema.type == data_type{type_id::STRUCT}) { + for (auto const& key_pair : schema.child_types) { + auto col_id = lookup_names(child_ids, key_pair.first); + if (col_id == -1) continue; + is_pruned[col_id] = false; + for (auto const& child_id : adj[col_id]) // children of field (>1 if mixed) + mark_is_pruned(child_id, key_pair.second); + } + } else if (schema.type == data_type{type_id::LIST}) { + // partial solution for list children to have any name. + auto this_list_child_name = + schema.child_types.size() == 1 ? schema.child_types.begin()->first : list_child_name; + if (schema.child_types.count(this_list_child_name) == 0) return; + auto list_child = schema.child_types.at(this_list_child_name); + for (auto const& child_id : child_ids) + mark_is_pruned(child_id, list_child); + } + }; + if (is_array_of_arrays) { + if (adj[adj[parent_node_sentinel][0]].empty()) + return {cudf::detail::make_host_vector(0, stream), {}}; + auto root_list_col_id = + is_enabled_lines ? adj[parent_node_sentinel][0] : adj[adj[parent_node_sentinel][0]][0]; + // mark root and row array col_id as not pruned. + if (!is_enabled_lines) { + auto top_level_list_id = adj[parent_node_sentinel][0]; + is_pruned[top_level_list_id] = false; + } + is_pruned[root_list_col_id] = false; + std::visit(cudf::detail::visitor_overload{ + [&root_list_col_id, &adj, &mark_is_pruned, &column_names]( + std::vector const& user_dtypes) -> void { + for (size_t i = 0; i < adj[root_list_col_id].size() && i < user_dtypes.size(); + i++) { + NodeIndexT const first_child_id = adj[root_list_col_id][i]; + auto name = column_names[first_child_id]; + auto value_id = std::stol(name); + if (value_id >= 0 and value_id < static_cast(user_dtypes.size())) + mark_is_pruned(first_child_id, schema_element{user_dtypes[value_id]}); + // Note: mixed type - forced type, will work here. + } + }, + [&root_list_col_id, &adj, &mark_is_pruned, &column_names]( + std::map const& user_dtypes) -> void { + for (size_t i = 0; i < adj[root_list_col_id].size(); i++) { + auto const first_child_id = adj[root_list_col_id][i]; + auto name = column_names[first_child_id]; + if (user_dtypes.count(name)) + mark_is_pruned(first_child_id, schema_element{user_dtypes.at(name)}); + } + }, + [&root_list_col_id, &adj, &mark_is_pruned, &column_names]( + std::map const& user_dtypes) -> void { + for (size_t i = 0; i < adj[root_list_col_id].size(); i++) { + auto const first_child_id = adj[root_list_col_id][i]; + auto name = column_names[first_child_id]; + if (user_dtypes.count(name)) + mark_is_pruned(first_child_id, user_dtypes.at(name)); + } + }}, + options.get_dtypes()); + } else { + auto root_struct_col_id = + is_enabled_lines + ? adj[parent_node_sentinel][0] + : (adj[adj[parent_node_sentinel][0]].empty() ? -1 : adj[adj[parent_node_sentinel][0]][0]); + // mark root and row struct col_id as not pruned. + if (!is_enabled_lines) { + auto top_level_list_id = adj[parent_node_sentinel][0]; + is_pruned[top_level_list_id] = false; + } + is_pruned[root_struct_col_id] = false; + schema_element u_schema{data_type{type_id::STRUCT}}; + u_schema.child_types = unified_schema(options); + std::visit( + cudf::detail::visitor_overload{ + [&is_pruned, &root_struct_col_id, &adj, &mark_is_pruned]( + std::vector const& user_dtypes) -> void { + for (size_t i = 0; i < adj[root_struct_col_id].size() && i < user_dtypes.size(); i++) { + NodeIndexT const first_field_id = adj[root_struct_col_id][i]; + is_pruned[first_field_id] = false; + for (auto const& child_id : adj[first_field_id]) // children of field (>1 if mixed) + mark_is_pruned(child_id, schema_element{user_dtypes[i]}); + } + }, + [&root_struct_col_id, &adj, &mark_is_pruned, &u_schema]( + std::map const& user_dtypes) -> void { + mark_is_pruned(root_struct_col_id, u_schema); + }, + [&root_struct_col_id, &adj, &mark_is_pruned, &u_schema]( + std::map const& user_dtypes) -> void { + mark_is_pruned(root_struct_col_id, u_schema); + }}, + options.get_dtypes()); + } + // Useful for array of arrays + auto named_level = + is_enabled_lines + ? adj[parent_node_sentinel][0] + : (adj[adj[parent_node_sentinel][0]].empty() ? -1 : adj[adj[parent_node_sentinel][0]][0]); + + auto handle_mixed_types = [&column_categories, + &is_str_column_all_nulls, + &is_pruned, + &expected_types, + &is_enabled_mixed_types_as_string, + &ignore_all_children](std::vector& child_ids) { + // do these on unpruned columns only. + // when mixed types is disabled, ignore string sibling of nested column. + // when mixed types is disabled, and both list and struct columns are siblings, error out. + // when mixed types is enabled, force string type on all columns + + // Remove pruned children (forced type will not clash here because other types are already + // pruned) + child_ids.erase( + std::remove_if(child_ids.begin(), + child_ids.end(), + [&is_pruned](NodeIndexT child_id) { return is_pruned[child_id]; }), + child_ids.end()); + // find string id, struct id, list id. + NodeIndexT str_col_id{-1}, struct_col_id{-1}, list_col_id{-1}; + for (auto const& child_id : child_ids) { + if (column_categories[child_id] == NC_VAL || column_categories[child_id] == NC_STR) + str_col_id = child_id; + else if (column_categories[child_id] == NC_STRUCT) + struct_col_id = child_id; + else if (column_categories[child_id] == NC_LIST) + list_col_id = child_id; + } + // conditions for handling mixed types. + if (is_enabled_mixed_types_as_string) { + if (struct_col_id != -1 and list_col_id != -1) { + expected_types[struct_col_id] = NC_STR; + expected_types[list_col_id] = NC_STR; + // ignore children of nested columns. + ignore_all_children(struct_col_id); + ignore_all_children(list_col_id); + } + if ((struct_col_id != -1 or list_col_id != -1) and str_col_id != -1) { + if (is_str_column_all_nulls[str_col_id]) + is_pruned[str_col_id] = true; + else { + // ignore children of nested columns. + if (struct_col_id != -1) { + expected_types[struct_col_id] = NC_STR; + ignore_all_children(struct_col_id); + } + if (list_col_id != -1) { + expected_types[list_col_id] = NC_STR; + ignore_all_children(list_col_id); + } + } + } + } else { + // if both are present, error out. + CUDF_EXPECTS(struct_col_id == -1 or list_col_id == -1, + "A mix of lists and structs within the same column is not supported"); + // either one only: so ignore str column. + if ((struct_col_id != -1 or list_col_id != -1) and str_col_id != -1) { + is_pruned[str_col_id] = true; + } + } + }; + + using dev_ref = std::reference_wrapper; + std::unordered_map columns; + columns.try_emplace(parent_node_sentinel, std::ref(root)); + // convert adjaceny list to tree. + dev_ref parent_ref = std::ref(root); + // creates children column + std::function construct_tree; + construct_tree = [&](NodeIndexT root, dev_ref ref) -> void { + if (is_pruned[root]) return; + auto expected_category = + expected_types[root] == NUM_NODE_CLASSES ? column_categories[root] : expected_types[root]; + initialize_json_columns(root, ref, expected_category); + auto child_ids = adj.count(root) ? adj[root] : std::vector{}; + if (expected_category == NC_STRUCT) { + // find field column ids, and its children and create columns. + for (auto const& field_id : child_ids) { + auto name = column_names[field_id]; + if (is_pruned[field_id]) continue; + auto inserted = + ref.get().child_columns.try_emplace(name, device_json_column(stream, mr)).second; + ref.get().column_order.emplace_back(name); + CUDF_EXPECTS(inserted, + "struct child column insertion failed, duplicate column name in the parent"); + auto this_ref = std::ref(ref.get().child_columns.at(name)); + // Mixed type handling + auto& value_col_ids = adj[field_id]; + handle_mixed_types(value_col_ids); + if (value_col_ids.empty()) { + // If no column is present, remove the uninitialized column. + ref.get().child_columns.erase(name); + ref.get().column_order.pop_back(); + continue; + } + for (auto const& child_id : value_col_ids) // children of field (>1 if mixed) + { + if (is_pruned[child_id]) continue; + columns.try_emplace(child_id, this_ref); + construct_tree(child_id, this_ref); + } + } + } else if (expected_category == NC_LIST) { + // array of arrays interpreted as array of structs. + if (is_array_of_arrays and root == named_level) { + // create column names + std::map> array_values; + for (auto const& child_id : child_ids) { + if (is_pruned[child_id]) continue; + auto name = column_names[child_id]; + array_values[std::stoi(name)].push_back(child_id); + } + // + for (auto const& value_id_pair : array_values) { + auto [value_id, value_col_ids] = value_id_pair; + auto name = std::to_string(value_id); + auto inserted = + ref.get().child_columns.try_emplace(name, device_json_column(stream, mr)).second; + ref.get().column_order.emplace_back(name); + CUDF_EXPECTS(inserted, + "list child column insertion failed, duplicate column name in the parent"); + auto this_ref = std::ref(ref.get().child_columns.at(name)); + handle_mixed_types(value_col_ids); + if (value_col_ids.empty()) { + // If no column is present, remove the uninitialized column. + ref.get().child_columns.erase(name); + ref.get().column_order.pop_back(); + continue; + } + for (auto const& child_id : value_col_ids) // children of field (>1 if mixed) + { + if (is_pruned[child_id]) continue; + columns.try_emplace(child_id, this_ref); + construct_tree(child_id, this_ref); + } + } + } else { + if (child_ids.empty()) return; + auto inserted = + ref.get() + .child_columns.try_emplace(list_child_name, device_json_column(stream, mr)) + .second; + CUDF_EXPECTS(inserted, + "list child column insertion failed, duplicate column name in the parent"); + ref.get().column_order.emplace_back(list_child_name); + auto this_ref = std::ref(ref.get().child_columns.at(list_child_name)); + // Mixed type handling + handle_mixed_types(child_ids); + if (child_ids.empty()) { + // If no column is present, remove the uninitialized column. + ref.get().child_columns.erase(list_child_name); + } + for (auto const& child_id : child_ids) { + if (is_pruned[child_id]) continue; + columns.try_emplace(child_id, this_ref); + construct_tree(child_id, this_ref); + } + } + } + }; + auto inserted = parent_ref.get() + .child_columns.try_emplace(list_child_name, device_json_column(stream, mr)) + .second; + CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent"); + parent_ref = std::ref(parent_ref.get().child_columns.at(list_child_name)); + columns.try_emplace(adj[parent_node_sentinel][0], parent_ref); + construct_tree(adj[parent_node_sentinel][0], parent_ref); + + // Forced string type due to input schema and mixed type as string. + for (size_t i = 0; i < expected_types.size(); i++) { + if (expected_types[i] == NC_STR) { + if (columns.count(i)) { columns.at(i).get().forced_as_string_column = true; } + } + } + std::transform(expected_types.cbegin(), + expected_types.cend(), + column_categories.cbegin(), + expected_types.begin(), + [](auto exp, auto cat) { return exp == NUM_NODE_CLASSES ? cat : exp; }); + cudaMemcpyAsync(d_column_tree.node_categories.begin(), + expected_types.data(), + expected_types.size() * sizeof(column_categories[0]), + cudaMemcpyDefault, + stream.value()); + + return {is_pruned, columns}; +} +} // namespace experimental + +} // namespace cudf::io::json::detail diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index 17fa7abdffe..912e93d52ae 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -23,38 +23,30 @@ #include #include #include -#include +#include #include #include +#include #include #include #include #include -#include #include #include -#include #include #include #include -#include #include #include #include #include -#include -#include #include #include -#include -#include - namespace cudf::io::json::detail { -// DEBUG prints auto to_cat = [](auto v) -> std::string { switch (v) { case NC_STRUCT: return " S"; @@ -77,16 +69,16 @@ void print_tree(host_span input, tree_meta_t const& d_gpu_tree, rmm::cuda_stream_view stream) { - print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.node_categories, stream), + print_vec(cudf::detail::make_host_vector_sync(d_gpu_tree.node_categories, stream), "node_categories", to_cat); - print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.parent_node_ids, stream), + print_vec(cudf::detail::make_host_vector_sync(d_gpu_tree.parent_node_ids, stream), "parent_node_ids", to_int); print_vec( - cudf::detail::make_std_vector_sync(d_gpu_tree.node_levels, stream), "node_levels", to_int); - auto node_range_begin = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_begin, stream); - auto node_range_end = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_end, stream); + cudf::detail::make_host_vector_sync(d_gpu_tree.node_levels, stream), "node_levels", to_int); + auto node_range_begin = cudf::detail::make_host_vector_sync(d_gpu_tree.node_range_begin, stream); + auto node_range_end = cudf::detail::make_host_vector_sync(d_gpu_tree.node_range_end, stream); print_vec(node_range_begin, "node_range_begin", to_int); print_vec(node_range_end, "node_range_end", to_int); for (int i = 0; i < int(node_range_begin.size()); i++) { @@ -112,19 +104,20 @@ void print_tree(host_span input, * max row offsets of columns */ std::tuple, rmm::device_uvector> -reduce_to_column_tree(tree_meta_t& tree, - device_span original_col_ids, - device_span sorted_col_ids, - device_span ordered_node_ids, - device_span row_offsets, +reduce_to_column_tree(tree_meta_t const& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, bool is_array_of_arrays, NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); + // 1. column count for allocation - auto const num_columns = - thrust::unique_count(rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end()); + auto const num_columns = thrust::unique_count( + rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end()); // 2. reduce_by_key {col_id}, {row_offset}, max. rmm::device_uvector unique_col_ids(num_columns, stream); @@ -169,30 +162,34 @@ reduce_to_column_tree(tree_meta_t& tree, }); // 4. unique_copy parent_node_ids, ranges - rmm::device_uvector column_levels(0, stream); // not required + rmm::device_uvector column_levels(num_columns, stream); // not required rmm::device_uvector parent_col_ids(num_columns, stream); rmm::device_uvector col_range_begin(num_columns, stream); // Field names rmm::device_uvector col_range_end(num_columns, stream); rmm::device_uvector unique_node_ids(num_columns, stream); - thrust::unique_by_key_copy(rmm::exec_policy(stream), + thrust::unique_by_key_copy(rmm::exec_policy_nosync(stream), sorted_col_ids.begin(), sorted_col_ids.end(), ordered_node_ids.begin(), thrust::make_discard_iterator(), unique_node_ids.begin()); + thrust::copy_n( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), thrust::make_zip_iterator( + thrust::make_permutation_iterator(tree.node_levels.begin(), unique_node_ids.begin()), thrust::make_permutation_iterator(tree.parent_node_ids.begin(), unique_node_ids.begin()), thrust::make_permutation_iterator(tree.node_range_begin.begin(), unique_node_ids.begin()), thrust::make_permutation_iterator(tree.node_range_end.begin(), unique_node_ids.begin())), unique_node_ids.size(), - thrust::make_zip_iterator( - parent_col_ids.begin(), col_range_begin.begin(), col_range_end.begin())); + thrust::make_zip_iterator(column_levels.begin(), + parent_col_ids.begin(), + col_range_begin.begin(), + col_range_end.begin())); // convert parent_node_ids to parent_col_ids thrust::transform( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), parent_col_ids.begin(), parent_col_ids.end(), parent_col_ids.begin(), @@ -210,18 +207,17 @@ reduce_to_column_tree(tree_meta_t& tree, column_categories[parent_col_id] == NC_LIST && (!is_array_of_arrays || parent_col_id != row_array_parent_col_id)); }; + // Mixed types in List children go to different columns, // so all immediate children of list column should have same max_row_offsets. // create list's children max_row_offsets array. (initialize to zero) // atomicMax on children max_row_offsets array. // gather the max_row_offsets from children row offset array. { - rmm::device_uvector list_parents_children_max_row_offsets(num_columns, stream); - thrust::fill(rmm::exec_policy(stream), - list_parents_children_max_row_offsets.begin(), - list_parents_children_max_row_offsets.end(), - 0); - thrust::for_each(rmm::exec_policy(stream), + auto list_parents_children_max_row_offsets = + cudf::detail::make_zeroed_device_uvector_async( + static_cast(num_columns), stream, cudf::get_current_device_resource_ref()); + thrust::for_each(rmm::exec_policy_nosync(stream), unique_col_ids.begin(), unique_col_ids.end(), [column_categories = column_categories.begin(), @@ -237,8 +233,9 @@ reduce_to_column_tree(tree_meta_t& tree, ref.fetch_max(max_row_offsets[col_id], cuda::std::memory_order_relaxed); } }); + thrust::gather_if( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), parent_col_ids.begin(), parent_col_ids.end(), parent_col_ids.begin(), @@ -253,7 +250,7 @@ reduce_to_column_tree(tree_meta_t& tree, // copy lists' max_row_offsets to children. // all structs should have same size. thrust::transform_if( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), unique_col_ids.begin(), unique_col_ids.end(), max_row_offsets.begin(), @@ -279,7 +276,7 @@ reduce_to_column_tree(tree_meta_t& tree, // For Struct and List (to avoid copying entire strings when mixed type as string is enabled) thrust::transform_if( - rmm::exec_policy(stream), + rmm::exec_policy_nosync(stream), col_range_begin.begin(), col_range_begin.end(), column_categories.begin(), @@ -296,649 +293,6 @@ reduce_to_column_tree(tree_meta_t& tree, std::move(max_row_offsets)}; } -/** - * @brief Get the column indices for the values column for array of arrays rows - * - * @param row_array_children_level The level of the row array's children - * @param d_tree The tree metadata - * @param col_ids The column ids - * @param num_columns The number of columns - * @param stream The stream to use - * @return The value columns' indices - */ -rmm::device_uvector get_values_column_indices(TreeDepthT const row_array_children_level, - tree_meta_t const& d_tree, - device_span col_ids, - size_type const num_columns, - rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - auto [level2_nodes, level2_indices] = get_array_children_indices( - row_array_children_level, d_tree.node_levels, d_tree.parent_node_ids, stream); - auto col_id_location = thrust::make_permutation_iterator(col_ids.begin(), level2_nodes.begin()); - rmm::device_uvector values_column_indices(num_columns, stream); - thrust::scatter(rmm::exec_policy(stream), - level2_indices.begin(), - level2_indices.end(), - col_id_location, - values_column_indices.begin()); - return values_column_indices; -} - -/** - * @brief Copies strings specified by pair of begin, end offsets to host vector of strings. - * - * @param input String device buffer - * @param node_range_begin Begin offset of the strings - * @param node_range_end End offset of the strings - * @param stream CUDA stream - * @return Vector of strings - */ -std::vector copy_strings_to_host_sync( - device_span input, - device_span node_range_begin, - device_span node_range_end, - rmm::cuda_stream_view stream) -{ - CUDF_FUNC_RANGE(); - auto const num_strings = node_range_begin.size(); - rmm::device_uvector string_offsets(num_strings, stream); - rmm::device_uvector string_lengths(num_strings, stream); - auto d_offset_pairs = thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin()); - thrust::transform(rmm::exec_policy(stream), - d_offset_pairs, - d_offset_pairs + num_strings, - thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()), - [] __device__(auto const& offsets) { - // Note: first character for non-field columns - return thrust::make_tuple( - static_cast(thrust::get<0>(offsets)), - static_cast(thrust::get<1>(offsets) - thrust::get<0>(offsets))); - }); - - cudf::io::parse_options_view options_view{}; - options_view.quotechar = '\0'; // no quotes - options_view.keepquotes = true; - auto d_offset_length_it = - thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()); - auto d_column_names = parse_data(input.data(), - d_offset_length_it, - num_strings, - data_type{type_id::STRING}, - rmm::device_buffer{}, - 0, - options_view, - stream, - rmm::mr::get_current_device_resource()); - auto to_host = [stream](auto const& col) { - if (col.is_empty()) return std::vector{}; - auto const scv = cudf::strings_column_view(col); - auto const h_chars = cudf::detail::make_std_vector_async( - cudf::device_span(scv.chars_begin(stream), scv.chars_size(stream)), stream); - auto const h_offsets = cudf::detail::make_std_vector_async( - cudf::device_span(scv.offsets().data() + scv.offset(), - scv.size() + 1), - stream); - stream.synchronize(); - - // build std::string vector from chars and offsets - std::vector host_data; - host_data.reserve(col.size()); - std::transform( - std::begin(h_offsets), - std::end(h_offsets) - 1, - std::begin(h_offsets) + 1, - std::back_inserter(host_data), - [&](auto start, auto end) { return std::string(h_chars.data() + start, end - start); }); - return host_data; - }; - return to_host(d_column_names->view()); -} - -/** - * @brief Checks if all strings in each string column in the tree are nulls. - * For non-string columns, it's set as true. If any of rows in a string column is false, it's set as - * false. - * - * @param input Input JSON string device data - * @param d_column_tree column tree representation of JSON string - * @param tree Node tree representation of the JSON string - * @param col_ids Column ids of the nodes in the tree - * @param options Parsing options specifying the parsing behaviour - * @param stream CUDA stream used for device memory operations and kernel launches - * @return Array of bytes where each byte indicate if it is all nulls string column. - */ -rmm::device_uvector is_all_nulls_each_column(device_span input, - tree_meta_t const& d_column_tree, - tree_meta_t const& tree, - device_span col_ids, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream) -{ - auto const num_nodes = col_ids.size(); - auto const num_cols = d_column_tree.node_categories.size(); - rmm::device_uvector is_all_nulls(num_cols, stream); - thrust::fill(rmm::exec_policy(stream), is_all_nulls.begin(), is_all_nulls.end(), true); - - auto parse_opt = parsing_options(options, stream); - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::counting_iterator(0), - num_nodes, - [options = parse_opt.view(), - data = input.data(), - column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin(), - range_begin = tree.node_range_begin.begin(), - range_end = tree.node_range_end.begin(), - is_all_nulls = is_all_nulls.begin()] __device__(size_type i) { - auto const node_category = column_categories[col_ids[i]]; - if (node_category == NC_STR or node_category == NC_VAL) { - auto const is_null_literal = serialized_trie_contains( - options.trie_na, - {data + range_begin[i], static_cast(range_end[i] - range_begin[i])}); - if (!is_null_literal) is_all_nulls[col_ids[i]] = false; - } - }); - return is_all_nulls; -} - -/** - * @brief Holds member data pointers of `d_json_column` - * - */ -struct json_column_data { - using row_offset_t = json_column::row_offset_t; - row_offset_t* string_offsets; - row_offset_t* string_lengths; - row_offset_t* child_offsets; - bitmask_type* validity; -}; - -/** - * @brief Constructs `d_json_column` from node tree representation - * Newly constructed columns are insert into `root`'s children. - * `root` must be a list type. - * - * @param input Input JSON string device data - * @param tree Node tree representation of the JSON string - * @param col_ids Column ids of the nodes in the tree - * @param row_offsets Row offsets of the nodes in the tree - * @param root Root node of the `d_json_column` tree - * @param is_array_of_arrays Whether the tree is an array of arrays - * @param options Parsing options specifying the parsing behaviour - * options affecting behaviour are - * is_enabled_lines: Whether the input is a line-delimited JSON - * is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string - * @param stream CUDA stream used for device memory operations and kernel launches - * @param mr Device memory resource used to allocate the device memory - * of child_offets and validity members of `d_json_column` - */ -void make_device_json_column(device_span input, - tree_meta_t& tree, - device_span col_ids, - device_span row_offsets, - device_json_column& root, - bool is_array_of_arrays, - cudf::io::json_reader_options const& options, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - - bool const is_enabled_lines = options.is_enabled_lines(); - bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string(); - auto const num_nodes = col_ids.size(); - rmm::device_uvector sorted_col_ids(col_ids.size(), stream); // make a copy - thrust::copy(rmm::exec_policy(stream), col_ids.begin(), col_ids.end(), sorted_col_ids.begin()); - - // sort by {col_id} on {node_ids} stable - rmm::device_uvector node_ids(col_ids.size(), stream); - thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end()); - thrust::stable_sort_by_key( - rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin()); - - NodeIndexT const row_array_parent_col_id = [&]() { - NodeIndexT value = parent_node_sentinel; - if (!col_ids.empty()) { - auto const list_node_index = is_enabled_lines ? 0 : 1; - CUDF_CUDA_TRY(cudaMemcpyAsync(&value, - col_ids.data() + list_node_index, - sizeof(NodeIndexT), - cudaMemcpyDefault, - stream.value())); - stream.synchronize(); - } - return value; - }(); - - // 1. gather column information. - auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = - reduce_to_column_tree(tree, - col_ids, - sorted_col_ids, - node_ids, - row_offsets, - is_array_of_arrays, - row_array_parent_col_id, - stream); - auto num_columns = d_unique_col_ids.size(); - auto unique_col_ids = cudf::detail::make_std_vector_async(d_unique_col_ids, stream); - auto column_categories = - cudf::detail::make_std_vector_async(d_column_tree.node_categories, stream); - auto column_parent_ids = - cudf::detail::make_std_vector_async(d_column_tree.parent_node_ids, stream); - auto column_range_beg = - cudf::detail::make_std_vector_async(d_column_tree.node_range_begin, stream); - auto max_row_offsets = cudf::detail::make_std_vector_async(d_max_row_offsets, stream); - std::vector column_names = copy_strings_to_host_sync( - input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream); - stream.synchronize(); - // array of arrays column names - if (is_array_of_arrays) { - TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2; - auto values_column_indices = - get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream); - auto h_values_column_indices = - cudf::detail::make_std_vector_async(values_column_indices, stream); - stream.synchronize(); - std::transform(unique_col_ids.begin(), - unique_col_ids.end(), - column_names.begin(), - column_names.begin(), - [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id]( - auto col_id, auto name) mutable { - return column_parent_ids[col_id] == row_array_parent_col_id - ? std::to_string(h_values_column_indices[col_id]) - : name; - }); - } - - auto to_json_col_type = [](auto category) { - switch (category) { - case NC_STRUCT: return json_col_t::StructColumn; - case NC_LIST: return json_col_t::ListColumn; - case NC_STR: [[fallthrough]]; - case NC_VAL: return json_col_t::StringColumn; - default: return json_col_t::Unknown; - } - }; - auto init_to_zero = [stream](auto& v) { - thrust::uninitialized_fill(rmm::exec_policy_nosync(stream), v.begin(), v.end(), 0); - }; - - auto initialize_json_columns = [&](auto i, auto& col) { - if (column_categories[i] == NC_ERR || column_categories[i] == NC_FN) { - return; - } else if (column_categories[i] == NC_VAL || column_categories[i] == NC_STR) { - col.string_offsets.resize(max_row_offsets[i] + 1, stream); - col.string_lengths.resize(max_row_offsets[i] + 1, stream); - init_to_zero(col.string_offsets); - init_to_zero(col.string_lengths); - } else if (column_categories[i] == NC_LIST) { - col.child_offsets.resize(max_row_offsets[i] + 2, stream); - init_to_zero(col.child_offsets); - } - col.num_rows = max_row_offsets[i] + 1; - col.validity = - cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); - col.type = to_json_col_type(column_categories[i]); - }; - - auto reinitialize_as_string = [&](auto i, auto& col) { - col.string_offsets.resize(max_row_offsets[i] + 1, stream); - col.string_lengths.resize(max_row_offsets[i] + 1, stream); - init_to_zero(col.string_offsets); - init_to_zero(col.string_lengths); - col.num_rows = max_row_offsets[i] + 1; - col.validity = - cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr); - col.type = json_col_t::StringColumn; - // destroy references of all child columns after this step, by calling remove_child_columns - }; - - path_from_tree tree_path{column_categories, - column_parent_ids, - column_names, - is_array_of_arrays, - row_array_parent_col_id}; - - // 2. generate nested columns tree and its device_memory - // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order. - auto h_range_col_id_it = - thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin()); - std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { - return thrust::get<0>(a) < thrust::get<0>(b); - }); - - std::vector is_str_column_all_nulls{}; - if (is_enabled_mixed_types_as_string) { - is_str_column_all_nulls = cudf::detail::make_std_vector_sync( - is_all_nulls_each_column(input, d_column_tree, tree, col_ids, options, stream), stream); - } - - // use hash map because we may skip field name's col_ids - std::unordered_map> columns; - // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking - std::map, NodeIndexT> mapped_columns; - // find column_ids which are values, but should be ignored in validity - auto ignore_vals = cudf::detail::make_host_vector(num_columns, stream); - std::vector is_mixed_type_column(num_columns, 0); - std::vector is_pruned(num_columns, 0); - columns.try_emplace(parent_node_sentinel, std::ref(root)); - - std::function remove_child_columns = - [&](NodeIndexT this_col_id, device_json_column& col) { - for (auto col_name : col.column_order) { - auto child_id = mapped_columns[{this_col_id, col_name}]; - is_mixed_type_column[child_id] = 1; - remove_child_columns(child_id, col.child_columns.at(col_name)); - mapped_columns.erase({this_col_id, col_name}); - columns.erase(child_id); - } - col.child_columns.clear(); // their references are deleted above. - col.column_order.clear(); - }; - - auto name_and_parent_index = [&is_array_of_arrays, - &row_array_parent_col_id, - &column_parent_ids, - &column_categories, - &column_names](auto this_col_id) { - std::string name = ""; - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) { - if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) { - name = column_names[this_col_id]; - } else { - name = list_child_name; - } - } else if (column_categories[parent_col_id] == NC_FN) { - auto field_name_col_id = parent_col_id; - parent_col_id = column_parent_ids[parent_col_id]; - name = column_names[field_name_col_id]; - } else { - CUDF_FAIL("Unexpected parent column category"); - } - return std::pair{name, parent_col_id}; - }; - - // Prune columns that are not required to be parsed. - if (options.is_enabled_prune_columns()) { - for (auto const this_col_id : unique_col_ids) { - if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { - continue; - } - // Struct, List, String, Value - auto [name, parent_col_id] = name_and_parent_index(this_col_id); - // get path of this column, and get its dtype if present in options - auto const nt = tree_path.get_path(this_col_id); - std::optional const user_dtype = get_path_data_type(nt, options); - if (!user_dtype.has_value() and parent_col_id != parent_node_sentinel) { - is_pruned[this_col_id] = 1; - continue; - } else { - // make sure all its parents are not pruned. - while (parent_col_id != parent_node_sentinel and is_pruned[parent_col_id] == 1) { - is_pruned[parent_col_id] = 0; - parent_col_id = column_parent_ids[parent_col_id]; - } - } - } - } - - // Build the column tree, also, handles mixed types. - for (auto const this_col_id : unique_col_ids) { - if (column_categories[this_col_id] == NC_ERR || column_categories[this_col_id] == NC_FN) { - continue; - } - // Struct, List, String, Value - auto [name, parent_col_id] = name_and_parent_index(this_col_id); - - // if parent is mixed type column or this column is pruned, ignore this column. - if (parent_col_id != parent_node_sentinel && - (is_mixed_type_column[parent_col_id] || is_pruned[this_col_id])) { - ignore_vals[this_col_id] = 1; - if (is_mixed_type_column[parent_col_id]) { is_mixed_type_column[this_col_id] = 1; } - continue; - } - - // If the child is already found, - // replace if this column is a nested column and the existing was a value column - // ignore this column if this column is a value column and the existing was a nested column - auto it = columns.find(parent_col_id); - CUDF_EXPECTS(it != columns.end(), "Parent column not found"); - auto& parent_col = it->second.get(); - bool replaced = false; - if (mapped_columns.count({parent_col_id, name}) > 0) { - auto const old_col_id = mapped_columns[{parent_col_id, name}]; - // If mixed type as string is enabled, make both of them strings and merge them. - // All child columns will be ignored when parsing. - if (is_enabled_mixed_types_as_string) { - bool const is_mixed_type = [&]() { - // If new or old is STR and they are all not null, make it mixed type, else ignore. - if (column_categories[this_col_id] == NC_VAL || - column_categories[this_col_id] == NC_STR) { - if (is_str_column_all_nulls[this_col_id]) return false; - } - if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { - if (is_str_column_all_nulls[old_col_id]) return false; - } - return true; - }(); - if (is_mixed_type) { - is_mixed_type_column[this_col_id] = 1; - is_mixed_type_column[old_col_id] = 1; - // if old col type (not cat) is list or struct, replace with string. - auto& col = columns.at(old_col_id).get(); - if (col.type == json_col_t::ListColumn or col.type == json_col_t::StructColumn) { - reinitialize_as_string(old_col_id, col); - remove_child_columns(old_col_id, col); - // all its children (which are already inserted) are ignored later. - } - col.forced_as_string_column = true; - columns.try_emplace(this_col_id, columns.at(old_col_id)); - continue; - } - } - - if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) { - ignore_vals[this_col_id] = 1; - continue; - } - if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) { - // remap - ignore_vals[old_col_id] = 1; - mapped_columns.erase({parent_col_id, name}); - columns.erase(old_col_id); - parent_col.child_columns.erase(name); - replaced = true; // to skip duplicate name in column_order - } else { - // If this is a nested column but we're trying to insert either (a) a list node into a - // struct column or (b) a struct node into a list column, we fail - CUDF_EXPECTS(not((column_categories[old_col_id] == NC_LIST and - column_categories[this_col_id] == NC_STRUCT) or - (column_categories[old_col_id] == NC_STRUCT and - column_categories[this_col_id] == NC_LIST)), - "A mix of lists and structs within the same column is not supported"); - } - } - - if (is_enabled_mixed_types_as_string) { - // get path of this column, check if it is a struct forced as string, and enforce it - auto const nt = tree_path.get_path(this_col_id); - std::optional const user_dtype = get_path_data_type(nt, options); - if (column_categories[this_col_id] == NC_STRUCT and user_dtype.has_value() and - user_dtype.value().id() == type_id::STRING) { - is_mixed_type_column[this_col_id] = 1; - column_categories[this_col_id] = NC_STR; - } - } - - CUDF_EXPECTS(parent_col.child_columns.count(name) == 0, "duplicate column name: " + name); - // move into parent - device_json_column col(stream, mr); - initialize_json_columns(this_col_id, col); - auto inserted = parent_col.child_columns.try_emplace(name, std::move(col)).second; - CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent"); - if (not replaced) parent_col.column_order.push_back(name); - columns.try_emplace(this_col_id, std::ref(parent_col.child_columns.at(name))); - mapped_columns.try_emplace(std::make_pair(parent_col_id, name), this_col_id); - } - - if (is_enabled_mixed_types_as_string) { - // ignore all children of mixed type columns - for (auto const this_col_id : unique_col_ids) { - auto parent_col_id = column_parent_ids[this_col_id]; - if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 1) { - is_mixed_type_column[this_col_id] = 1; - ignore_vals[this_col_id] = 1; - columns.erase(this_col_id); - } - // Convert only mixed type columns as string (so to copy), but not its children - if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 0 and - is_mixed_type_column[this_col_id] == 1) - column_categories[this_col_id] = NC_STR; - } - cudaMemcpyAsync(d_column_tree.node_categories.begin(), - column_categories.data(), - column_categories.size() * sizeof(column_categories[0]), - cudaMemcpyDefault, - stream.value()); - } - - // restore unique_col_ids order - std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) { - return thrust::get<1>(a) < thrust::get<1>(b); - }); - // move columns data to device. - auto columns_data = cudf::detail::make_host_vector(num_columns, stream); - for (auto& [col_id, col_ref] : columns) { - if (col_id == parent_node_sentinel) continue; - auto& col = col_ref.get(); - columns_data[col_id] = json_column_data{col.string_offsets.data(), - col.string_lengths.data(), - col.child_offsets.data(), - static_cast(col.validity.data())}; - } - - auto d_ignore_vals = cudf::detail::make_device_uvector_async( - ignore_vals, stream, rmm::mr::get_current_device_resource()); - auto d_columns_data = cudf::detail::make_device_uvector_async( - columns_data, stream, rmm::mr::get_current_device_resource()); - - // 3. scatter string offsets to respective columns, set validity bits - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::counting_iterator(0), - num_nodes, - [column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin(), - row_offsets = row_offsets.begin(), - range_begin = tree.node_range_begin.begin(), - range_end = tree.node_range_end.begin(), - d_ignore_vals = d_ignore_vals.begin(), - d_columns_data = d_columns_data.begin()] __device__(size_type i) { - if (d_ignore_vals[col_ids[i]]) return; - auto const node_category = column_categories[col_ids[i]]; - switch (node_category) { - case NC_STRUCT: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; - case NC_LIST: set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); break; - case NC_STR: [[fallthrough]]; - case NC_VAL: - if (d_ignore_vals[col_ids[i]]) break; - set_bit(d_columns_data[col_ids[i]].validity, row_offsets[i]); - d_columns_data[col_ids[i]].string_offsets[row_offsets[i]] = range_begin[i]; - d_columns_data[col_ids[i]].string_lengths[row_offsets[i]] = range_end[i] - range_begin[i]; - break; - default: break; - } - }); - - // 4. scatter List offset - // copy_if only node's whose parent is list, (node_id, parent_col_id) - // stable_sort by parent_col_id of {node_id}. - // For all unique parent_node_id of (i==0, i-1!=i), write start offset. - // (i==last, i+1!=i), write end offset. - // unique_copy_by_key {parent_node_id} {row_offset} to - // col[parent_col_id].child_offsets[row_offset[parent_node_id]] - - auto& parent_col_ids = sorted_col_ids; // reuse sorted_col_ids - auto parent_col_id = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - cuda::proclaim_return_type( - [col_ids = col_ids.begin(), - parent_node_ids = tree.parent_node_ids.begin()] __device__(size_type node_id) { - return parent_node_ids[node_id] == parent_node_sentinel ? parent_node_sentinel - : col_ids[parent_node_ids[node_id]]; - })); - auto const list_children_end = thrust::copy_if( - rmm::exec_policy(stream), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id), - thrust::make_zip_iterator(thrust::make_counting_iterator(0), parent_col_id) + - num_nodes, - thrust::make_counting_iterator(0), - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()), - [d_ignore_vals = d_ignore_vals.begin(), - parent_node_ids = tree.parent_node_ids.begin(), - column_categories = d_column_tree.node_categories.begin(), - col_ids = col_ids.begin()] __device__(size_type node_id) { - auto parent_node_id = parent_node_ids[node_id]; - return parent_node_id != parent_node_sentinel and - column_categories[col_ids[parent_node_id]] == NC_LIST and - (!d_ignore_vals[col_ids[parent_node_id]]); - }); - - auto const num_list_children = - list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin()); - thrust::stable_sort_by_key(rmm::exec_policy(stream), - parent_col_ids.begin(), - parent_col_ids.begin() + num_list_children, - node_ids.begin()); - thrust::for_each_n( - rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - num_list_children, - [node_ids = node_ids.begin(), - parent_node_ids = tree.parent_node_ids.begin(), - parent_col_ids = parent_col_ids.begin(), - row_offsets = row_offsets.begin(), - d_columns_data = d_columns_data.begin(), - num_list_children] __device__(size_type i) { - auto const node_id = node_ids[i]; - auto const parent_node_id = parent_node_ids[node_id]; - // scatter to list_offset - if (i == 0 or parent_node_ids[node_ids[i - 1]] != parent_node_id) { - d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id]] = - row_offsets[node_id]; - } - // last value of list child_offset is its size. - if (i == num_list_children - 1 or parent_node_ids[node_ids[i + 1]] != parent_node_id) { - d_columns_data[parent_col_ids[i]].child_offsets[row_offsets[parent_node_id] + 1] = - row_offsets[node_id] + 1; - } - }); - - // 5. scan on offsets. - for (auto& [id, col_ref] : columns) { - auto& col = col_ref.get(); - if (col.type == json_col_t::StringColumn) { - thrust::inclusive_scan(rmm::exec_policy_nosync(stream), - col.string_offsets.begin(), - col.string_offsets.end(), - col.string_offsets.begin(), - thrust::maximum{}); - } else if (col.type == json_col_t::ListColumn) { - thrust::inclusive_scan(rmm::exec_policy_nosync(stream), - col.child_offsets.begin(), - col.child_offsets.end(), - col.child_offsets.begin(), - thrust::maximum{}); - } - } - stream.synchronize(); -} - std::pair, std::vector> device_json_column_to_cudf_column( device_json_column& json_col, device_span d_input, @@ -963,7 +317,7 @@ std::pair, std::vector> device_json_co // Note: json_col modified here, moves this memory }; - auto get_child_schema = [schema](auto child_name) -> std::optional { + auto get_child_schema = [&schema](auto child_name) -> std::optional { if (schema.has_value()) { auto const result = schema.value().child_types.find(child_name); if (result != std::end(schema.value().child_types)) { return result->second; } @@ -971,6 +325,13 @@ std::pair, std::vector> device_json_co return {}; }; + auto get_list_child_schema = [&schema]() -> std::optional { + if (schema.has_value()) { + if (schema.value().child_types.size() > 0) return schema.value().child_types.begin()->second; + } + return {}; + }; + switch (json_col.type) { case json_col_t::StringColumn: { // move string_offsets to GPU and transform to string column @@ -980,39 +341,58 @@ std::pair, std::vector> device_json_co "string offset, string length mismatch"); rmm::device_uvector d_string_data(col_size, stream); // TODO how about directly storing pair in json_column? - auto offset_length_it = - thrust::make_zip_iterator(json_col.string_offsets.begin(), json_col.string_lengths.begin()); - data_type target_type{}; + auto [result_bitmask, null_count] = make_validity(json_col); - if (schema.has_value()) { + data_type target_type{}; + std::unique_ptr col{}; + if (options.normalize_whitespace && json_col.forced_as_string_column) { + CUDF_EXPECTS(prune_columns || options.mixed_types_as_string, + "Whitespace normalization of nested columns requested as string requires " + "either prune_columns or mixed_types_as_string to be enabled"); + auto [normalized_d_input, col_offsets, col_lengths] = + cudf::io::json::detail::normalize_whitespace( + d_input, json_col.string_offsets, json_col.string_lengths, stream, mr); + auto offset_length_it = thrust::make_zip_iterator(col_offsets.begin(), col_lengths.begin()); + target_type = data_type{type_id::STRING}; + // Convert strings to the inferred data type + col = parse_data(normalized_d_input.data(), + offset_length_it, + col_size, + target_type, + std::move(result_bitmask), + null_count, + options.view(), + stream, + mr); + } else { + auto offset_length_it = thrust::make_zip_iterator(json_col.string_offsets.begin(), + json_col.string_lengths.begin()); + if (schema.has_value()) { #ifdef NJP_DEBUG_PRINT - std::cout << "-> explicit type: " - << (schema.has_value() ? std::to_string(static_cast(schema->type.id())) - : "n/a"); + std::cout << "-> explicit type: " + << (schema.has_value() ? std::to_string(static_cast(schema->type.id())) + : "n/a"); #endif - target_type = schema.value().type; - } else if (json_col.forced_as_string_column) { - target_type = data_type{type_id::STRING}; - } - // Infer column type, if we don't have an explicit type for it - else { - target_type = cudf::io::detail::infer_data_type( - options.json_view(), d_input, offset_length_it, col_size, stream); + target_type = schema.value().type; + } + // Infer column type, if we don't have an explicit type for it + else { + target_type = cudf::io::detail::infer_data_type( + options.json_view(), d_input, offset_length_it, col_size, stream); + } + // Convert strings to the inferred data type + col = parse_data(d_input.data(), + offset_length_it, + col_size, + target_type, + std::move(result_bitmask), + null_count, + options.view(), + stream, + mr); } - auto [result_bitmask, null_count] = make_validity(json_col); - // Convert strings to the inferred data type - auto col = parse_data(d_input.data(), - offset_length_it, - col_size, - target_type, - std::move(result_bitmask), - null_count, - options.view(), - stream, - mr); - // Reset nullable if we do not have nulls // This is to match the existing JSON reader's behaviour: // - Non-string columns will always be returned as nullable @@ -1066,9 +446,8 @@ std::pair, std::vector> device_json_co rmm::device_buffer{}, 0); // Create children column - auto child_schema_element = json_col.child_columns.empty() - ? std::optional{} - : get_child_schema(json_col.child_columns.begin()->first); + auto child_schema_element = + json_col.child_columns.empty() ? std::optional{} : get_list_child_schema(); auto [child_column, names] = json_col.child_columns.empty() or (prune_columns and !child_schema_element.has_value()) ? std::pair, @@ -1106,6 +485,16 @@ std::pair, std::vector> device_json_co } } +template +auto make_device_json_column_dispatch(bool experimental, Args&&... args) +{ + if (experimental) { + return experimental::make_device_json_column(std::forward(args)...); + } else { + return make_device_json_column(std::forward(args)...); + } +} + table_with_metadata device_parse_nested_json(device_span d_input, cudf::io::json_reader_options const& options, rmm::cuda_stream_view stream, @@ -1116,13 +505,17 @@ table_with_metadata device_parse_nested_json(device_span d_input, auto gpu_tree = [&]() { // Parse the JSON and get the token stream const auto [tokens_gpu, token_indices_gpu] = - get_token_stream(d_input, options, stream, rmm::mr::get_current_device_resource()); + get_token_stream(d_input, options, stream, cudf::get_current_device_resource_ref()); // gpu tree generation - return get_tree_representation(tokens_gpu, - token_indices_gpu, - options.is_enabled_mixed_types_as_string(), - stream, - rmm::mr::get_current_device_resource()); + // Note that to normalize whitespaces in nested columns coerced to be string, we need the column + // to either be of mixed type or we need to request the column to be returned as string by + // pruning it with the STRING dtype + return get_tree_representation( + tokens_gpu, + token_indices_gpu, + options.is_enabled_mixed_types_as_string() || options.is_enabled_prune_columns(), + stream, + cudf::get_current_device_resource_ref()); }(); // IILE used to free memory of token data. #ifdef NJP_DEBUG_PRINT auto h_input = cudf::detail::make_host_vector_async(d_input, stream); @@ -1147,8 +540,9 @@ table_with_metadata device_parse_nested_json(device_span d_input, gpu_tree, is_array_of_arrays, options.is_enabled_lines(), + options.is_enabled_experimental(), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); device_json_column root_column(stream, mr); root_column.type = json_col_t::ListColumn; @@ -1159,15 +553,16 @@ table_with_metadata device_parse_nested_json(device_span d_input, 0); // Get internal JSON column - make_device_json_column(d_input, - gpu_tree, - gpu_col_id, - gpu_row_offsets, - root_column, - is_array_of_arrays, - options, - stream, - mr); + make_device_json_column_dispatch(options.is_enabled_experimental(), + d_input, + gpu_tree, + gpu_col_id, + gpu_row_offsets, + root_column, + is_array_of_arrays, + options, + stream, + mr); // data_root refers to the root column of the data represented by the given JSON string auto& data_root = diff --git a/cpp/src/io/json/json_normalization.cu b/cpp/src/io/json/json_normalization.cu index 760b2214365..2d435dc8e1a 100644 --- a/cpp/src/io/json/json_normalization.cu +++ b/cpp/src/io/json/json_normalization.cu @@ -16,16 +16,27 @@ #include "io/fst/lookup_tables.cuh" +#include +#include #include #include +#include #include #include #include #include -#include - +#include + +#include +#include +#include +#include +#include +#include #include +#include +#include #include #include @@ -214,14 +225,6 @@ std::array, NUM_SYMBOL_GROUPS - 1> const wna_sgs{ * | state is necessary to process escaped double-quote characters. Without this * | state, whitespaces following escaped double quotes inside strings may be removed. * - * NOTE: An important case NOT handled by this FST is that of whitespace following newline - * characters within a string. Consider the following example - * Input: {"a":"x\n y"} - * FST output: {"a":"x\ny"} - * Expected output: {"a":"x\n y"} - * Such strings are not part of the JSON standard (characters allowed within quotes should - * have ASCII at least 0x20 i.e. space character and above) but may be encountered while - * reading JSON files */ enum class dfa_states : StateT { TT_OOS = 0U, TT_DQS, TT_DEC, TT_NUM_STATES }; // Aliases for readability of the transition table @@ -254,17 +257,17 @@ struct TransduceToNormalizedWS { // Let the alphabet set be Sigma // --------------------------------------- // ---------- NON-SPECIAL CASES: ---------- - // Output symbol same as input symbol + // Input symbol translates to output symbol // state | read_symbol -> output_symbol - // DQS | Sigma -> Sigma - // OOS | Sigma\{,\t} -> Sigma\{,\t} - // DEC | Sigma -> Sigma + // DQS | Sigma -> + // OOS | Sigma\{,\t} -> + // DEC | Sigma -> // ---------- SPECIAL CASES: -------------- - // Input symbol translates to output symbol - // OOS | {} -> - // OOS | {\t} -> + // Output symbol same as input symbol + // OOS | {} -> {} + // OOS | {\t} -> {\t} - // Case when read symbol is a space or tab but is unquoted + // Case when read symbol is not an unquoted space or tab // This will be the same condition as in `operator()(state_id, match_id, read_symbol)` function // However, since there is no output in this case i.e. the count returned by // operator()(state_id, match_id, read_symbol) is zero, this function is never called. @@ -286,8 +289,8 @@ struct TransduceToNormalizedWS { SymbolT const read_symbol) const { // Case when read symbol is a space or tab but is unquoted - if (match_id == static_cast(dfa_symbol_group_id::WHITESPACE_SYMBOLS) && - state_id == static_cast(dfa_states::TT_OOS)) { + if (!(match_id == static_cast(dfa_symbol_group_id::WHITESPACE_SYMBOLS) && + state_id == static_cast(dfa_states::TT_OOS))) { return 0; } return 1; @@ -298,10 +301,11 @@ struct TransduceToNormalizedWS { namespace detail { -void normalize_single_quotes(datasource::owning_buffer>& indata, +void normalize_single_quotes(datasource::owning_buffer& indata, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { + CUDF_FUNC_RANGE(); static constexpr std::int32_t min_out = 0; static constexpr std::int32_t max_out = 2; auto parser = @@ -311,47 +315,141 @@ void normalize_single_quotes(datasource::owning_buffer outbuf(indata.size() * 2, stream, mr); + rmm::device_buffer outbuf(indata.size() * 2, stream, mr); rmm::device_scalar outbuf_size(stream, mr); - parser.Transduce(indata.data(), + parser.Transduce(reinterpret_cast(indata.data()), static_cast(indata.size()), - outbuf.data(), + static_cast(outbuf.data()), thrust::make_discard_iterator(), outbuf_size.data(), normalize_quotes::start_state, stream); outbuf.resize(outbuf_size.value(stream), stream); - datasource::owning_buffer> outdata(std::move(outbuf)); + datasource::owning_buffer outdata(std::move(outbuf)); std::swap(indata, outdata); } -void normalize_whitespace(datasource::owning_buffer>& indata, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + normalize_whitespace(device_span d_input, + device_span col_offsets, + device_span col_lengths, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { - static constexpr std::int32_t min_out = 0; - static constexpr std::int32_t max_out = 2; + /* + * Algorithm: + 1. Create a single buffer by concatenating the rows of the string column. Create segment offsets + and lengths array for concatenated buffer + 2. Run a whitespace normalization FST that performs NOP for non-whitespace and quoted + whitespace characters, and outputs indices of unquoted whitespace characters + 3. Update segment lengths based on the number of output indices between segment offsets + 4. Remove characters at output indices from concatenated buffer. + 5. Return updated buffer, segment lengths and updated segment offsets + */ + auto inbuf_lengths = cudf::detail::make_device_uvector_async( + col_lengths, stream, cudf::get_current_device_resource_ref()); + size_t inbuf_lengths_size = inbuf_lengths.size(); + size_type inbuf_size = + thrust::reduce(rmm::exec_policy_nosync(stream), inbuf_lengths.begin(), inbuf_lengths.end()); + rmm::device_uvector inbuf(inbuf_size, stream); + rmm::device_uvector inbuf_offsets(inbuf_lengths_size, stream); + thrust::exclusive_scan(rmm::exec_policy_nosync(stream), + inbuf_lengths.begin(), + inbuf_lengths.end(), + inbuf_offsets.begin(), + 0); + + auto input_it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [d_input = d_input.begin(), col_offsets = col_offsets.begin()] __device__( + size_t i) -> char const* { return &d_input[col_offsets[i]]; })); + auto output_it = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + cuda::proclaim_return_type( + [inbuf = inbuf.begin(), inbuf_offsets = inbuf_offsets.cbegin()] __device__( + size_t i) -> char* { return &inbuf[inbuf_offsets[i]]; })); + + { + // cub device batched copy + size_t temp_storage_bytes = 0; + cub::DeviceCopy::Batched(nullptr, + temp_storage_bytes, + input_it, + output_it, + inbuf_lengths.begin(), + inbuf_lengths_size, + stream.value()); + rmm::device_buffer temp_storage(temp_storage_bytes, stream); + cub::DeviceCopy::Batched(temp_storage.data(), + temp_storage_bytes, + input_it, + output_it, + inbuf_lengths.begin(), + inbuf_lengths_size, + stream.value()); + } + + // whitespace normalization : get the indices of the unquoted whitespace characters auto parser = fst::detail::make_fst(fst::detail::make_symbol_group_lut(normalize_whitespace::wna_sgs), fst::detail::make_transition_table(normalize_whitespace::wna_state_tt), - fst::detail::make_translation_functor( + fst::detail::make_translation_functor( normalize_whitespace::TransduceToNormalizedWS{}), stream); - rmm::device_uvector outbuf(indata.size(), stream, mr); - rmm::device_scalar outbuf_size(stream, mr); - parser.Transduce(indata.data(), - static_cast(indata.size()), - outbuf.data(), + rmm::device_uvector outbuf_indices(inbuf.size(), stream, mr); + rmm::device_scalar outbuf_indices_size(stream, mr); + parser.Transduce(inbuf.data(), + static_cast(inbuf.size()), thrust::make_discard_iterator(), - outbuf_size.data(), + outbuf_indices.data(), + outbuf_indices_size.data(), normalize_whitespace::start_state, stream); - outbuf.resize(outbuf_size.value(stream), stream); - datasource::owning_buffer> outdata(std::move(outbuf)); - std::swap(indata, outdata); + auto const num_deletions = outbuf_indices_size.value(stream); + outbuf_indices.resize(num_deletions, stream); + + // now these indices need to be removed + // TODO: is there a better way to do this? + thrust::for_each( + rmm::exec_policy_nosync(stream), + outbuf_indices.begin(), + outbuf_indices.end(), + [inbuf_offsets_begin = inbuf_offsets.begin(), + inbuf_offsets_end = inbuf_offsets.end(), + inbuf_lengths = inbuf_lengths.begin()] __device__(size_type idx) { + auto it = thrust::upper_bound(thrust::seq, inbuf_offsets_begin, inbuf_offsets_end, idx); + auto pos = thrust::distance(inbuf_offsets_begin, it) - 1; + cuda::atomic_ref ref{*(inbuf_lengths + pos)}; + ref.fetch_add(-1, cuda::std::memory_order_relaxed); + }); + + auto stencil = cudf::detail::make_zeroed_device_uvector_async( + static_cast(inbuf_size), stream, cudf::get_current_device_resource_ref()); + thrust::scatter(rmm::exec_policy_nosync(stream), + thrust::make_constant_iterator(true), + thrust::make_constant_iterator(true) + num_deletions, + outbuf_indices.begin(), + stencil.begin()); + thrust::remove_if(rmm::exec_policy_nosync(stream), + inbuf.begin(), + inbuf.end(), + stencil.begin(), + thrust::identity()); + inbuf.resize(inbuf_size - num_deletions, stream); + + thrust::exclusive_scan(rmm::exec_policy_nosync(stream), + inbuf_lengths.begin(), + inbuf_lengths.end(), + inbuf_offsets.begin(), + 0); + + stream.synchronize(); + return std::tuple{std::move(inbuf), std::move(inbuf_offsets), std::move(inbuf_lengths)}; } } // namespace detail diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu index ad807b57766..d949635c1cc 100644 --- a/cpp/src/io/json/json_tree.cu +++ b/cpp/src/io/json/json_tree.cu @@ -14,32 +14,35 @@ * limitations under the License. */ -#include "io/utilities/hostdevice_vector.hpp" +#include "io/utilities/parsing_utils.cuh" +#include "io/utilities/string_parsing.hpp" #include "nested_json.hpp" #include #include -#include #include #include #include #include #include +#include #include +#include #include #include #include #include -#include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -492,6 +495,85 @@ tree_meta_t get_tree_representation(device_span tokens, std::move(node_range_end)}; } +// Return field node ids after unicode decoding of field names and matching them to same field names +std::pair> remapped_field_nodes_after_unicode_decode( + device_span d_input, + tree_meta_t const& d_tree, + device_span keys, + rmm::cuda_stream_view stream) +{ + size_t num_keys = keys.size(); + if (num_keys == 0) { return {num_keys, rmm::device_uvector(num_keys, stream)}; } + rmm::device_uvector offsets(num_keys, stream); + rmm::device_uvector lengths(num_keys, stream); + auto offset_length_it = thrust::make_zip_iterator(offsets.begin(), lengths.begin()); + thrust::transform(rmm::exec_policy_nosync(stream), + keys.begin(), + keys.end(), + offset_length_it, + [node_range_begin = d_tree.node_range_begin.data(), + node_range_end = d_tree.node_range_end.data()] __device__(auto key) { + return thrust::make_tuple(node_range_begin[key], + node_range_end[key] - node_range_begin[key]); + }); + cudf::io::parse_options_view opt{',', '\n', '\0', '.'}; + opt.keepquotes = true; + + auto utf8_decoded_fields = parse_data(d_input.data(), + offset_length_it, + num_keys, + data_type{type_id::STRING}, + rmm::device_buffer{}, + 0, + opt, + stream, + cudf::get_current_device_resource_ref()); + // hash using iter, create a hashmap for 0-num_keys. + // insert and find. -> array + // store to static_map with keys as field key[index], and values as key[array[index]] + + auto str_view = strings_column_view{utf8_decoded_fields->view()}; + auto const char_ptr = str_view.chars_begin(stream); + auto const offset_ptr = str_view.offsets().begin(); + + // String hasher + auto const d_hasher = cuda::proclaim_return_type< + typename cudf::hashing::detail::default_hash::result_type>( + [char_ptr, offset_ptr] __device__(auto node_id) { + auto const field_name = cudf::string_view(char_ptr + offset_ptr[node_id], + offset_ptr[node_id + 1] - offset_ptr[node_id]); + return cudf::hashing::detail::default_hash{}(field_name); + }); + auto const d_equal = [char_ptr, offset_ptr] __device__(auto node_id1, auto node_id2) { + auto const field_name1 = cudf::string_view(char_ptr + offset_ptr[node_id1], + offset_ptr[node_id1 + 1] - offset_ptr[node_id1]); + auto const field_name2 = cudf::string_view(char_ptr + offset_ptr[node_id2], + offset_ptr[node_id2 + 1] - offset_ptr[node_id2]); + return field_name1 == field_name2; + }; + + using hasher_type = decltype(d_hasher); + constexpr size_type empty_node_index_sentinel = -1; + auto key_set = cuco::static_set{ + cuco::extent{compute_hash_table_size(num_keys)}, + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hasher}, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; + auto const counting_iter = thrust::make_counting_iterator(0); + rmm::device_uvector found_keys(num_keys, stream); + key_set.insert_and_find_async(counting_iter, + counting_iter + num_keys, + found_keys.begin(), + thrust::make_discard_iterator(), + stream.value()); + // set.size will synchronize the stream before return. + return {key_set.size(stream), std::move(found_keys)}; +} + /** * @brief Generates unique node_type id for each node. * Field nodes with the same name are assigned the same node_type id. @@ -500,11 +582,14 @@ tree_meta_t get_tree_representation(device_span tokens, * All inputs and outputs are in node_id order. * @param d_input JSON string in device memory * @param d_tree Tree representation of the JSON + * @param is_enabled_experimental Whether to enable experimental features such as + * utf8 field name support * @param stream CUDA stream used for device memory operations and kernel launches. * @return Vector of node_type ids */ rmm::device_uvector hash_node_type_with_field_name(device_span d_input, tree_meta_t const& d_tree, + bool is_enabled_experimental, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); @@ -536,7 +621,7 @@ rmm::device_uvector hash_node_type_with_field_name(device_span(0); + auto const counting_iter = thrust::make_counting_iterator(0); auto const is_field_name_node = [node_categories = d_tree.node_categories.data()] __device__(auto node_id) { @@ -545,24 +630,70 @@ rmm::device_uvector hash_node_type_with_field_name(device_span{d_hasher}, - {}, - {}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; - key_set.insert_if_async(iter, - iter + num_nodes, + auto key_set = cuco::static_set{ + cuco::extent{compute_hash_table_size(num_fields, 40)}, // 40% occupancy + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hasher}, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; + key_set.insert_if_async(counting_iter, + counting_iter + num_nodes, thrust::counting_iterator(0), // stencil is_field_name_node, stream.value()); + // experimental feature: utf8 field name support + // parse_data on field names, + // rehash it using another map, + // reassign the reverse map values to new matched node indices. + auto get_utf8_matched_field_nodes = [&]() { + auto make_map = [&stream](auto num_keys) { + using hasher_type3 = cudf::hashing::detail::default_hash; + return cuco::static_map{ + cuco::extent{compute_hash_table_size(num_keys, 100)}, // 100% occupancy + cuco::empty_key{empty_node_index_sentinel}, + cuco::empty_value{empty_node_index_sentinel}, + {}, + cuco::linear_probing<1, hasher_type3>{hasher_type3{}}, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; + }; + if (!is_enabled_experimental) { return std::pair{false, make_map(0)}; } + // get all unique field node ids for utf8 decoding + auto num_keys = key_set.size(stream); + rmm::device_uvector keys(num_keys, stream); + key_set.retrieve_all(keys.data(), stream.value()); + + auto [num_unique_fields, found_keys] = + remapped_field_nodes_after_unicode_decode(d_input, d_tree, keys, stream); + + auto is_need_remap = num_unique_fields != num_keys; + if (!is_need_remap) { return std::pair{false, make_map(0)}; } + + // store to static_map with keys as field keys[index], and values as keys[found_keys[index]] + auto reverse_map = make_map(num_keys); + auto matching_keys_iter = thrust::make_permutation_iterator(keys.begin(), found_keys.begin()); + auto pair_iter = + thrust::make_zip_iterator(thrust::make_tuple(keys.begin(), matching_keys_iter)); + reverse_map.insert_async(pair_iter, pair_iter + num_keys, stream); + return std::pair{is_need_remap, std::move(reverse_map)}; + }; + auto [is_need_remap, reverse_map] = get_utf8_matched_field_nodes(); + auto const get_hash_value = - [key_set = key_set.ref(cuco::op::find)] __device__(auto node_id) -> size_type { + [key_set = key_set.ref(cuco::op::find), + is_need_remap = is_need_remap, + rm = reverse_map.ref(cuco::op::find)] __device__(auto node_id) -> size_type { auto const it = key_set.find(node_id); + if (it != key_set.end() and is_need_remap) { + auto const it2 = rm.find(*it); + return (it2 == rm.end()) ? size_type{0} : it2->second; + } return (it == key_set.end()) ? size_type{0} : *it; }; @@ -734,14 +865,15 @@ std::pair, rmm::device_uvector> hash_n constexpr size_type empty_node_index_sentinel = -1; using hasher_type = decltype(d_hashed_cache); - auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_nodes)}, - cuco::empty_key{empty_node_index_sentinel}, - d_equal, - cuco::linear_probing<1, hasher_type>{d_hashed_cache}, - {}, - {}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto key_set = cuco::static_set{ + cuco::extent{compute_hash_table_size(num_nodes)}, + cuco::empty_key{empty_node_index_sentinel}, + d_equal, + cuco::linear_probing<1, hasher_type>{d_hashed_cache}, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; // insert and convert node ids to unique set ids auto nodes_itr = thrust::make_counting_iterator(0); @@ -770,6 +902,8 @@ std::pair, rmm::device_uvector> hash_n * @param d_tree Tree representation of the JSON * @param is_array_of_arrays Whether the tree is an array of arrays * @param is_enabled_lines Whether the input is a line-delimited JSON + * @param is_enabled_experimental Whether the experimental feature is enabled such as + * utf8 field name support * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory * @return column_id, parent_column_id @@ -779,6 +913,7 @@ std::pair, rmm::device_uvector> gene tree_meta_t const& d_tree, bool is_array_of_arrays, bool is_enabled_lines, + bool is_enabled_experimental, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { @@ -792,7 +927,7 @@ std::pair, rmm::device_uvector> gene auto [col_id, unique_keys] = [&]() { // Convert node_category + field_name to node_type. rmm::device_uvector node_type = - hash_node_type_with_field_name(d_input, d_tree, stream); + hash_node_type_with_field_name(d_input, d_tree, is_enabled_experimental, stream); // hash entire path from node to root. return hash_node_path(d_tree.node_levels, @@ -947,12 +1082,13 @@ records_orient_tree_traversal(device_span d_input, tree_meta_t const& d_tree, bool is_array_of_arrays, bool is_enabled_lines, + bool is_enabled_experimental, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - auto [new_col_id, new_parent_col_id] = - generate_column_id(d_input, d_tree, is_array_of_arrays, is_enabled_lines, stream, mr); + auto [new_col_id, new_parent_col_id] = generate_column_id( + d_input, d_tree, is_array_of_arrays, is_enabled_lines, is_enabled_experimental, stream, mr); auto row_offsets = compute_row_offsets( std::move(new_parent_col_id), d_tree, is_array_of_arrays, is_enabled_lines, stream, mr); diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp index 20c143f66c7..3d9a51833e0 100644 --- a/cpp/src/io/json/nested_json.hpp +++ b/cpp/src/io/json/nested_json.hpp @@ -22,8 +22,7 @@ #include #include #include - -#include +#include #include #include @@ -186,6 +185,55 @@ struct device_json_column { } }; +namespace experimental { +/* + * @brief Sparse graph adjacency matrix stored in Compressed Sparse Row (CSR) format. + */ +struct compressed_sparse_row { + rmm::device_uvector row_idx; + rmm::device_uvector col_idx; +}; + +/* + * @brief Auxiliary column tree properties that are required to construct the device json + * column subtree, but not required for the final cudf column construction. + */ +struct column_tree_properties { + rmm::device_uvector categories; + rmm::device_uvector max_row_offsets; + rmm::device_uvector mapped_ids; +}; + +namespace detail { +/** + * @brief Reduce node tree into column tree by aggregating each property of column. + * + * @param node_tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param sorted_col_ids Sorted column ids of nodes + * @param ordered_node_ids Node ids of nodes sorted by column ids + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return Tuple of compressed_sparse_row struct storing adjacency information of the column tree, + * and column_tree_properties struct storing properties of each node i.e. column category, max + * number of rows in the column, and column id + */ +CUDF_EXPORT +std::tuple reduce_to_column_tree( + tree_meta_t& node_tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT row_array_parent_col_id, + rmm::cuda_stream_view stream); + +} // namespace detail +} // namespace experimental + namespace detail { // TODO: return device_uvector instead of passing pre-allocated memory @@ -226,6 +274,21 @@ std::pair, rmm::device_uvector> pr device_span token_indices, rmm::cuda_stream_view stream); +/** + * @brief Validate the tokens conforming to behavior given in options. + * + * @param d_input The string of input characters + * @param tokens The tokens to be post-processed + * @param token_indices The tokens' corresponding indices that are post-processed + * @param options Parsing options specifying the parsing behaviour + * @param stream The cuda stream to dispatch GPU kernels to + */ +void validate_token_stream(device_span d_input, + device_span tokens, + device_span token_indices, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream); + /** * @brief Parses the given JSON string and generates a tree representation of the given input. * @@ -253,6 +316,8 @@ tree_meta_t get_tree_representation(device_span tokens, * index, level, begin index, and end index in the input JSON string * @param is_array_of_arrays Whether the tree is an array of arrays * @param is_enabled_lines Whether the input is a line-delimited JSON + * @param is_enabled_experimental Whether to enable experimental features such as utf-8 field name + * support * @param stream The CUDA stream to which kernels are dispatched * @param mr Optional, resource with which to allocate * @return A tuple of the output column indices and the row offsets within each column for each node @@ -263,6 +328,7 @@ records_orient_tree_traversal(device_span d_input, tree_meta_t const& d_tree, bool is_array_of_arrays, bool is_enabled_lines, + bool is_enabled_experimental, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); @@ -285,21 +351,74 @@ get_array_children_indices(TreeDepthT row_array_children_level, device_span node_levels, device_span parent_node_ids, rmm::cuda_stream_view stream); + /** - * @brief Reduce node tree into column tree by aggregating each property of column. + * @brief Reduces node tree representation to column tree representation. * - * @param tree json node tree to reduce (modified in-place, but restored to original state) - * @param col_ids column ids of each node (modified in-place, but restored to original state) - * @param row_offsets row offsets of each node (modified in-place, but restored to original state) - * @param stream The CUDA stream to which kernels are dispatched - * @return A tuple containing the column tree, identifier for each column and the maximum row index - * in each column + * @param tree Node tree representation of JSON string + * @param original_col_ids Column ids of nodes + * @param sorted_col_ids Sorted column ids of nodes + * @param ordered_node_ids Node ids of nodes sorted by column ids + * @param row_offsets Row offsets of nodes + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param row_array_parent_col_id Column id of row array, if is_array_of_arrays is true + * @param stream CUDA stream used for device memory operations and kernel launches + * @return A tuple of column tree representation of JSON string, column ids of columns, and + * max row offsets of columns */ +CUDF_EXPORT std::tuple, rmm::device_uvector> -reduce_to_column_tree(tree_meta_t& tree, - device_span col_ids, - device_span row_offsets, +reduce_to_column_tree(tree_meta_t const& tree, + device_span original_col_ids, + device_span sorted_col_ids, + device_span ordered_node_ids, + device_span row_offsets, + bool is_array_of_arrays, + NodeIndexT const row_array_parent_col_id, rmm::cuda_stream_view stream); +/** + * @brief Constructs `d_json_column` from node tree representation + * Newly constructed columns are insert into `root`'s children. + * `root` must be a list type. + * + * @param input Input JSON string device data + * @param tree Node tree representation of the JSON string + * @param col_ids Column ids of the nodes in the tree + * @param row_offsets Row offsets of the nodes in the tree + * @param root Root node of the `d_json_column` tree + * @param is_array_of_arrays Whether the tree is an array of arrays + * @param options Parsing options specifying the parsing behaviour + * options affecting behaviour are + * is_enabled_lines: Whether the input is a line-delimited JSON + * is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the device memory + * of child_offets and validity members of `d_json_column` + */ +void make_device_json_column(device_span input, + tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_json_column& root, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + +namespace experimental { +/** + * @copydoc cudf::io::json::detail::make_device_json_column + */ +void make_device_json_column(device_span input, + tree_meta_t const& tree, + device_span col_ids, + device_span row_offsets, + device_json_column& root, + bool is_array_of_arrays, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace experimental /** * @brief Retrieves the parse_options to be used for type inference and type casting diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index 1e484d74679..76816071d8c 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -31,12 +31,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -618,12 +618,14 @@ struct PdaSymbolToSymbolGroupId { constexpr auto pda_sgid_lookup_size = static_cast(sizeof(tos_sg_to_pda_sgid) / sizeof(tos_sg_to_pda_sgid[0])); // We map the delimiter character to LINE_BREAK symbol group id, and the newline character - // to OTHER. Note that delimiter cannot be any of opening(closing) brace, bracket, quote, + // to WHITE_SPACE. Note that delimiter cannot be any of opening(closing) brace, bracket, quote, // escape, comma, colon or whitespace characters. + auto constexpr newline = '\n'; + auto constexpr whitespace = ' '; auto const symbol_position = symbol == delimiter - ? static_cast('\n') - : (symbol == '\n' ? static_cast(delimiter) : static_cast(symbol)); + ? static_cast(newline) + : (symbol == newline ? static_cast(whitespace) : static_cast(symbol)); PdaSymbolGroupIdT symbol_gid = tos_sg_to_pda_sgid[min(symbol_position, pda_sgid_lookup_size - 1)]; return stack_idx * static_cast(symbol_group_id::NUM_PDA_INPUT_SGS) + @@ -1517,7 +1519,7 @@ std::pair, rmm::device_uvector> pr fst::detail::make_translation_functor(token_filter::TransduceToken{}), stream); - auto const mr = rmm::mr::get_current_device_resource(); + auto const mr = cudf::get_current_device_resource_ref(); rmm::device_scalar d_num_selected_tokens(stream, mr); rmm::device_uvector filtered_tokens_out{tokens.size(), stream, mr}; rmm::device_uvector filtered_token_indices_out{tokens.size(), stream, mr}; @@ -1660,6 +1662,7 @@ std::pair, rmm::device_uvector> ge if (delimiter_offset == 1) { tokens.set_element(0, token_t::LineEnd, stream); + validate_token_stream(json_in, tokens, tokens_indices, options, stream); auto [filtered_tokens, filtered_tokens_indices] = process_token_stream(tokens, tokens_indices, stream); tokens = std::move(filtered_tokens); @@ -2078,11 +2081,15 @@ cudf::io::parse_options parsing_options(cudf::io::json_reader_options const& opt { auto parse_opts = cudf::io::parse_options{',', '\n', '\"', '.'}; - parse_opts.dayfirst = options.is_enabled_dayfirst(); - parse_opts.keepquotes = options.is_enabled_keep_quotes(); - parse_opts.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); - parse_opts.trie_false = cudf::detail::create_serialized_trie({"false"}, stream); - parse_opts.trie_na = cudf::detail::create_serialized_trie({"", "null"}, stream); + parse_opts.dayfirst = options.is_enabled_dayfirst(); + parse_opts.keepquotes = options.is_enabled_keep_quotes(); + parse_opts.normalize_whitespace = options.is_enabled_normalize_whitespace(); + parse_opts.mixed_types_as_string = options.is_enabled_mixed_types_as_string(); + parse_opts.trie_true = cudf::detail::create_serialized_trie({"true"}, stream); + parse_opts.trie_false = cudf::detail::create_serialized_trie({"false"}, stream); + std::vector na_values{"", "null"}; + na_values.insert(na_values.end(), options.get_na_values().begin(), options.get_na_values().end()); + parse_opts.trie_na = cudf::detail::create_serialized_trie(na_values, stream); return parse_opts; } @@ -2125,10 +2132,10 @@ std::pair, std::vector> json_column_to // Move string_offsets and string_lengths to GPU rmm::device_uvector d_string_offsets = cudf::detail::make_device_uvector_async( - json_col.string_offsets, stream, rmm::mr::get_current_device_resource()); + json_col.string_offsets, stream, cudf::get_current_device_resource_ref()); rmm::device_uvector d_string_lengths = cudf::detail::make_device_uvector_async( - json_col.string_lengths, stream, rmm::mr::get_current_device_resource()); + json_col.string_lengths, stream, cudf::get_current_device_resource_ref()); // Prepare iterator that returns (string_offset, string_length)-tuples auto offset_length_it = diff --git a/cpp/src/io/json/process_tokens.cu b/cpp/src/io/json/process_tokens.cu new file mode 100644 index 00000000000..83c7b663980 --- /dev/null +++ b/cpp/src/io/json/process_tokens.cu @@ -0,0 +1,310 @@ + +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "io/utilities/trie.cuh" +#include "nested_json.hpp" +#include "tabulate_output_iterator.cuh" + +#include +#include +#include + +#include +#include + +#include +#include + +namespace cudf::io::json { +namespace detail { + +struct write_if { + using token_t = cudf::io::json::token_t; + using scan_type = thrust::pair; + PdaTokenT* tokens; + size_t n; + // Index, value + __device__ void operator()(size_type i, scan_type x) + { + if (i == n - 1 or tokens[i + 1] == token_t::LineEnd) { + if (x.first == token_t::ErrorBegin and tokens[i] != token_t::ErrorBegin) { + tokens[i] = token_t::ErrorBegin; + } + } + } +}; + +enum class number_state { + START = 0, + SAW_NEG, // not a complete state + LEADING_ZERO, + WHOLE, + SAW_RADIX, // not a complete state + FRACTION, + START_EXPONENT, // not a complete state + AFTER_SIGN_EXPONENT, // not a complete state + EXPONENT +}; + +enum class string_state { + NORMAL = 0, + ESCAPED, // not a complete state + ESCAPED_U // not a complete state +}; + +__device__ inline bool substr_eq(const char* data, + SymbolOffsetT const start, + SymbolOffsetT const end, + SymbolOffsetT const expected_len, + const char* expected) +{ + if (end - start != expected_len) { return false; } + for (auto idx = 0; idx < expected_len; idx++) { + if (data[start + idx] != expected[idx]) { return false; } + } + return true; +} + +void validate_token_stream(device_span d_input, + device_span tokens, + device_span token_indices, + cudf::io::json_reader_options const& options, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + if (!options.is_strict_validation()) { return; } + using token_t = cudf::io::json::token_t; + cudf::detail::optional_trie trie_na = + cudf::detail::create_serialized_trie(options.get_na_values(), stream); + auto trie_na_view = cudf::detail::make_trie_view(trie_na); + auto validate_values = cuda::proclaim_return_type( + [data = d_input.data(), + trie_na = trie_na_view, + allow_numeric_leading_zeros = options.is_allowed_numeric_leading_zeros(), + allow_nonnumeric = + options.is_allowed_nonnumeric_numbers()] __device__(SymbolOffsetT start, + SymbolOffsetT end) -> bool { + // This validates an unquoted value. A value must match https://www.json.org/json-en.html + // but the leading and training whitespace should already have been removed, and is not + // a string + auto c = data[start]; + auto is_null_literal = serialized_trie_contains(trie_na, {data + start, end - start}); + if (is_null_literal) { + return true; + } else if ('n' == c) { + return substr_eq(data, start, end, 4, "null"); + } else if ('t' == c) { + return substr_eq(data, start, end, 4, "true"); + } else if ('f' == c) { + return substr_eq(data, start, end, 5, "false"); + } else if (allow_nonnumeric && c == 'N') { + return substr_eq(data, start, end, 3, "NaN"); + } else if (allow_nonnumeric && c == 'I') { + return substr_eq(data, start, end, 8, "Infinity"); + } else if (allow_nonnumeric && c == '+') { + return substr_eq(data, start, end, 4, "+INF") || + substr_eq(data, start, end, 9, "+Infinity"); + } else if ('-' == c || c <= '9' && 'c' >= '0') { + // number + auto num_state = number_state::START; + for (auto at = start; at < end; at++) { + c = data[at]; + switch (num_state) { + case number_state::START: + if ('-' == c) { + num_state = number_state::SAW_NEG; + } else if ('0' == c) { + num_state = number_state::LEADING_ZERO; + } else if (c >= '1' && c <= '9') { + num_state = number_state::WHOLE; + } else { + return false; + } + break; + case number_state::SAW_NEG: + if ('0' == c) { + num_state = number_state::LEADING_ZERO; + } else if (c >= '1' && c <= '9') { + num_state = number_state::WHOLE; + } else if (allow_nonnumeric && 'I' == c) { + return substr_eq(data, start, end, 4, "-INF") || + substr_eq(data, start, end, 9, "-Infinity"); + } else { + return false; + } + break; + case number_state::LEADING_ZERO: + if (allow_numeric_leading_zeros && c >= '0' && c <= '9') { + num_state = number_state::WHOLE; + } else if ('.' == c) { + num_state = number_state::SAW_RADIX; + } else if ('e' == c || 'E' == c) { + num_state = number_state::START_EXPONENT; + } else { + return false; + } + break; + case number_state::WHOLE: + if (c >= '0' && c <= '9') { + num_state = number_state::WHOLE; + } else if ('.' == c) { + num_state = number_state::SAW_RADIX; + } else if ('e' == c || 'E' == c) { + num_state = number_state::START_EXPONENT; + } else { + return false; + } + break; + case number_state::SAW_RADIX: + if (c >= '0' && c <= '9') { + num_state = number_state::FRACTION; + } else if ('e' == c || 'E' == c) { + num_state = number_state::START_EXPONENT; + } else { + return false; + } + break; + case number_state::FRACTION: + if (c >= '0' && c <= '9') { + num_state = number_state::FRACTION; + } else if ('e' == c || 'E' == c) { + num_state = number_state::START_EXPONENT; + } else { + return false; + } + break; + case number_state::START_EXPONENT: + if ('+' == c || '-' == c) { + num_state = number_state::AFTER_SIGN_EXPONENT; + } else if (c >= '0' && c <= '9') { + num_state = number_state::EXPONENT; + } else { + return false; + } + break; + case number_state::AFTER_SIGN_EXPONENT: + if (c >= '0' && c <= '9') { + num_state = number_state::EXPONENT; + } else { + return false; + } + break; + case number_state::EXPONENT: + if (c >= '0' && c <= '9') { + num_state = number_state::EXPONENT; + } else { + return false; + } + break; + } + } + return num_state != number_state::AFTER_SIGN_EXPONENT && + num_state != number_state::START_EXPONENT && num_state != number_state::SAW_NEG && + num_state != number_state::SAW_RADIX; + } else { + return false; + } + }); + + auto validate_strings = cuda::proclaim_return_type( + [data = d_input.data(), + allow_unquoted_control_chars = + options.is_allowed_unquoted_control_chars()] __device__(SymbolOffsetT start, + SymbolOffsetT end) -> bool { + // This validates a quoted string. A string must match https://www.json.org/json-en.html + // but we already know that it has a starting and ending " and all white space has been + // stripped out. Also the base CUDF validation makes sure escaped chars are correct + // so we only need to worry about unquoted control chars + + auto state = string_state::NORMAL; + auto u_count = 0; + for (SymbolOffsetT idx = start + 1; idx < end; idx++) { + auto c = data[idx]; + if (!allow_unquoted_control_chars && static_cast(c) >= 0 && static_cast(c) < 32) { + return false; + } + + switch (state) { + case string_state::NORMAL: + if (c == '\\') { state = string_state::ESCAPED; } + break; + case string_state::ESCAPED: + // in Spark you can allow any char to be escaped, but CUDF + // validates it in some cases so we need to also validate it. + if (c == 'u') { + state = string_state::ESCAPED_U; + u_count = 0; + } else if (c == '"' || c == '\\' || c == '/' || c == 'b' || c == 'f' || c == 'n' || + c == 'r' || c == 't') { + state = string_state::NORMAL; + } else { + return false; + } + break; + case string_state::ESCAPED_U: + if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + u_count++; + if (u_count == 4) { + state = string_state::NORMAL; + u_count = 0; + } + } else { + return false; + } + break; + } + } + return string_state::NORMAL == state; + }); + + auto num_tokens = tokens.size(); + auto count_it = thrust::make_counting_iterator(0); + auto predicate = [tokens = tokens.begin(), + token_indices = token_indices.begin(), + validate_values, + validate_strings] __device__(auto i) -> bool { + if (tokens[i] == token_t::ValueEnd) { + return !validate_values(token_indices[i - 1], token_indices[i]); + } else if (tokens[i] == token_t::FieldNameEnd || tokens[i] == token_t::StringEnd) { + return !validate_strings(token_indices[i - 1], token_indices[i]); + } + return false; + }; + + using scan_type = write_if::scan_type; + auto conditional_write = write_if{tokens.begin(), num_tokens}; + auto conditional_output_it = cudf::detail::make_tabulate_output_iterator(conditional_write); + auto transform_op = cuda::proclaim_return_type( + [predicate, tokens = tokens.begin()] __device__(auto i) -> scan_type { + if (predicate(i)) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd}; + return {static_cast(tokens[i]), tokens[i] == token_t::LineEnd}; + }); + auto binary_op = cuda::proclaim_return_type( + [] __device__(scan_type prev, scan_type curr) -> scan_type { + auto op_result = (prev.first == token_t::ErrorBegin ? prev.first : curr.first); + return scan_type((curr.second ? curr.first : op_result), prev.second | curr.second); + }); + + thrust::transform_inclusive_scan(rmm::exec_policy(stream), + count_it, + count_it + num_tokens, + conditional_output_it, + transform_op, + binary_op); // in-place scan +} +} // namespace detail +} // namespace cudf::io::json diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index 590f70864b1..99a5b17bce8 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -25,12 +25,13 @@ #include #include #include +#include #include #include #include -#include +#include #include #include @@ -38,11 +39,14 @@ namespace cudf::io::json::detail { -size_t sources_size(host_span> const sources, - size_t range_offset, - size_t range_size) +namespace { + +// Return total size of sources enclosing the passed range +std::size_t sources_size(host_span> const sources, + std::size_t range_offset, + std::size_t range_size) { - return std::accumulate(sources.begin(), sources.end(), 0ul, [=](size_t sum, auto& source) { + return std::accumulate(sources.begin(), sources.end(), 0ul, [=](std::size_t sum, auto& source) { auto const size = source->size(); // TODO take care of 0, 0, or *, 0 case. return sum + @@ -50,109 +54,55 @@ size_t sources_size(host_span> const sources, }); } +// Return estimated size of subchunk using a heuristic involving the byte range size and the minimum +// subchunk size +std::size_t estimate_size_per_subchunk(std::size_t chunk_size) +{ + auto geometric_mean = [](double a, double b) { return std::sqrt(a * b); }; + // NOTE: heuristic for choosing subchunk size: geometric mean of minimum subchunk size (set to + // 10kb) and the byte range size + return geometric_mean(std::ceil(static_cast(chunk_size) / num_subchunks), + min_subchunk_size); +} + /** - * @brief Read from array of data sources into RMM buffer. The size of the returned device span - can be larger than the number of bytes requested from the list of sources when - the range to be read spans across multiple sources. This is due to the delimiter - characters inserted after the end of each accessed source. + * @brief Return the upper bound on the batch size for the JSON reader. * - * @param buffer Device span buffer to which data is read - * @param sources Array of data sources - * @param compression Compression format of source - * @param range_offset Number of bytes to skip from source start - * @param range_size Number of bytes to read from source - * @param stream CUDA stream used for device memory operations and kernel launches - * @returns A subspan of the input device span containing data read + * The datasources passed to the JSON reader are split into batches demarcated by byte range + * offsets and read iteratively. The batch size is capped at INT_MAX bytes, which is the + * default value returned by the function. This value can be overridden at runtime using the + * environment variable LIBCUDF_JSON_BATCH_SIZE + * + * @return size in bytes */ -device_span ingest_raw_input(device_span buffer, - host_span> sources, - compression_type compression, - size_t range_offset, - size_t range_size, - rmm::cuda_stream_view stream) +std::size_t get_batch_size_upper_bound() { - CUDF_FUNC_RANGE(); - // We append a line delimiter between two files to make sure the last line of file i and the first - // line of file i+1 don't end up on the same JSON line, if file i does not already end with a line - // delimiter. - auto constexpr num_delimiter_chars = 1; - - if (compression == compression_type::NONE) { - auto delimiter_map = cudf::detail::make_empty_host_vector(sources.size(), stream); - std::vector prefsum_source_sizes(sources.size()); - std::vector> h_buffers; - size_t bytes_read = 0; - std::transform_inclusive_scan(sources.begin(), - sources.end(), - prefsum_source_sizes.begin(), - std::plus{}, - [](std::unique_ptr const& s) { return s->size(); }); - auto upper = - std::upper_bound(prefsum_source_sizes.begin(), prefsum_source_sizes.end(), range_offset); - size_t start_source = std::distance(prefsum_source_sizes.begin(), upper); - - auto const total_bytes_to_read = - std::min(range_size, prefsum_source_sizes.back() - range_offset); - range_offset -= start_source ? prefsum_source_sizes[start_source - 1] : 0; - for (size_t i = start_source; i < sources.size() && bytes_read < total_bytes_to_read; i++) { - if (sources[i]->is_empty()) continue; - auto data_size = - std::min(sources[i]->size() - range_offset, total_bytes_to_read - bytes_read); - auto destination = reinterpret_cast(buffer.data()) + bytes_read + - (num_delimiter_chars * delimiter_map.size()); - if (sources[i]->is_device_read_preferred(data_size)) { - bytes_read += sources[i]->device_read(range_offset, data_size, destination, stream); - } else { - h_buffers.emplace_back(sources[i]->host_read(range_offset, data_size)); - auto const& h_buffer = h_buffers.back(); - CUDF_CUDA_TRY(cudaMemcpyAsync( - destination, h_buffer->data(), h_buffer->size(), cudaMemcpyHostToDevice, stream.value())); - bytes_read += h_buffer->size(); - } - range_offset = 0; - delimiter_map.push_back(bytes_read + (num_delimiter_chars * delimiter_map.size())); - } - // Removing delimiter inserted after last non-empty source is read - if (!delimiter_map.empty()) { delimiter_map.pop_back(); } - - // If this is a multi-file source, we scatter the JSON line delimiters between files - if (sources.size() > 1) { - static_assert(num_delimiter_chars == 1, - "Currently only single-character delimiters are supported"); - auto const delimiter_source = thrust::make_constant_iterator('\n'); - auto const d_delimiter_map = cudf::detail::make_device_uvector_async( - delimiter_map, stream, rmm::mr::get_current_device_resource()); - thrust::scatter(rmm::exec_policy_nosync(stream), - delimiter_source, - delimiter_source + d_delimiter_map.size(), - d_delimiter_map.data(), - buffer.data()); - } - stream.synchronize(); - return buffer.first(bytes_read + (delimiter_map.size() * num_delimiter_chars)); - } - // TODO: allow byte range reading from multiple compressed files. - auto remaining_bytes_to_read = std::min(range_size, sources[0]->size() - range_offset); - auto hbuffer = std::vector(remaining_bytes_to_read); - // Single read because only a single compressed source is supported - // Reading to host because decompression of a single block is much faster on the CPU - sources[0]->host_read(range_offset, remaining_bytes_to_read, hbuffer.data()); - auto uncomp_data = decompress(compression, hbuffer); - CUDF_CUDA_TRY(cudaMemcpyAsync(buffer.data(), - reinterpret_cast(uncomp_data.data()), - uncomp_data.size() * sizeof(char), - cudaMemcpyHostToDevice, - stream.value())); - stream.synchronize(); - return buffer.first(uncomp_data.size()); + auto const batch_size_str = std::getenv("LIBCUDF_JSON_BATCH_SIZE"); + int64_t const batch_size = batch_size_str != nullptr ? std::atol(batch_size_str) : 0L; + auto const batch_limit = static_cast(std::numeric_limits::max()); + auto const batch_size_upper_bound = static_cast( + (batch_size > 0 && batch_size < batch_limit) ? batch_size : batch_limit); + return batch_size_upper_bound; } -size_t estimate_size_per_subchunk(size_t chunk_size) +/** + * @brief Extract the first delimiter character position in the string + * + * @param d_data Device span in which to search for delimiter character + * @param delimiter Delimiter character to search for + * @param stream CUDA stream used for device memory operations and kernel launches + * + * @return Position of first delimiter character in device array + */ +size_type find_first_delimiter(device_span d_data, + char const delimiter, + rmm::cuda_stream_view stream) { - auto geometric_mean = [](double a, double b) { return std::sqrt(a * b); }; - // NOTE: heuristic for choosing subchunk size: geometric mean of minimum subchunk size (set to - // 10kb) and the byte range size - return geometric_mean(std::ceil((double)chunk_size / num_subchunks), min_subchunk_size); + auto const first_delimiter_position = + thrust::find(rmm::exec_policy(stream), d_data.begin(), d_data.end(), delimiter); + return first_delimiter_position != d_data.end() + ? static_cast(thrust::distance(d_data.begin(), first_delimiter_position)) + : -1; } /** @@ -168,19 +118,19 @@ size_t estimate_size_per_subchunk(size_t chunk_size) * @param stream CUDA stream used for device memory operations and kernel launches * @returns Data source owning buffer enclosing the bytes read */ -datasource::owning_buffer> get_record_range_raw_input( +datasource::owning_buffer get_record_range_raw_input( host_span> sources, json_reader_options const& reader_opts, rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - size_t const total_source_size = sources_size(sources, 0, 0); + std::size_t const total_source_size = sources_size(sources, 0, 0); auto constexpr num_delimiter_chars = 1; auto const num_extra_delimiters = num_delimiter_chars * (sources.size() - 1); compression_type const reader_compression = reader_opts.get_compression(); - size_t const chunk_offset = reader_opts.get_byte_range_offset(); - size_t chunk_size = reader_opts.get_byte_range_size(); + std::size_t const chunk_offset = reader_opts.get_byte_range_offset(); + std::size_t chunk_size = reader_opts.get_byte_range_size(); CUDF_EXPECTS(total_source_size ? chunk_offset < total_source_size : !chunk_offset, "Invalid offsetting", @@ -188,20 +138,20 @@ datasource::owning_buffer> get_record_range_raw_input( auto should_load_all_sources = !chunk_size || chunk_size >= total_source_size - chunk_offset; chunk_size = should_load_all_sources ? total_source_size - chunk_offset : chunk_size; - int const num_subchunks_prealloced = should_load_all_sources ? 0 : max_subchunks_prealloced; - size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); + int num_subchunks_prealloced = should_load_all_sources ? 0 : max_subchunks_prealloced; + std::size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); // The allocation for single source compressed input is estimated by assuming a ~4:1 // compression ratio. For uncompressed inputs, we can getter a better estimate using the idea // of subchunks. auto constexpr header_size = 4096; - size_t const buffer_size = + std::size_t buffer_size = reader_compression != compression_type::NONE ? total_source_size * estimated_compression_ratio + header_size : std::min(total_source_size, chunk_size + num_subchunks_prealloced * size_per_subchunk) + num_extra_delimiters; - rmm::device_uvector buffer(buffer_size, stream); - device_span bufspan(buffer); + rmm::device_buffer buffer(buffer_size, stream); + device_span bufspan(reinterpret_cast(buffer.data()), buffer.size()); // Offset within buffer indicating first read position std::int64_t buffer_offset = 0; @@ -213,55 +163,73 @@ datasource::owning_buffer> get_record_range_raw_input( chunk_offset == 0 ? 0 : find_first_delimiter(readbufspan, '\n', stream); if (first_delim_pos == -1) { // return empty owning datasource buffer - auto empty_buf = rmm::device_uvector(0, stream); - return datasource::owning_buffer>(std::move(empty_buf)); + auto empty_buf = rmm::device_buffer(0, stream); + return datasource::owning_buffer(std::move(empty_buf)); } else if (!should_load_all_sources) { // Find next delimiter - std::int64_t next_delim_pos = -1; - size_t next_subchunk_start = chunk_offset + chunk_size; - while (next_subchunk_start < total_source_size && next_delim_pos < buffer_offset) { - buffer_offset += readbufspan.size(); - readbufspan = ingest_raw_input(bufspan.last(buffer_size - buffer_offset), - sources, - reader_compression, - next_subchunk_start, - size_per_subchunk, - stream); - next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset; - if (next_delim_pos < buffer_offset) { next_subchunk_start += size_per_subchunk; } + std::int64_t next_delim_pos = -1; + std::size_t next_subchunk_start = chunk_offset + chunk_size; + while (next_delim_pos < buffer_offset) { + for (int subchunk = 0; + subchunk < num_subchunks_prealloced && next_delim_pos < buffer_offset && + next_subchunk_start < total_source_size; + subchunk++) { + buffer_offset += readbufspan.size(); + readbufspan = ingest_raw_input(bufspan.last(buffer_size - buffer_offset), + sources, + reader_compression, + next_subchunk_start, + size_per_subchunk, + stream); + next_delim_pos = find_first_delimiter(readbufspan, '\n', stream) + buffer_offset; + next_subchunk_start += size_per_subchunk; + } + if (next_delim_pos < buffer_offset) { + if (next_subchunk_start >= total_source_size) { + // If we have reached the end of source list but the source does not terminate with a + // newline character + next_delim_pos = buffer_offset + readbufspan.size(); + } else { + // Our buffer_size estimate is insufficient to read until the end of the line! We need to + // allocate more memory and try again! + num_subchunks_prealloced *= 2; + buffer_size = reader_compression != compression_type::NONE + ? 2 * buffer_size + : std::min(total_source_size, + buffer_size + num_subchunks_prealloced * size_per_subchunk) + + num_extra_delimiters; + buffer.resize(buffer_size, stream); + bufspan = device_span(reinterpret_cast(buffer.data()), buffer.size()); + } + } } - if (next_delim_pos < buffer_offset) next_delim_pos = buffer_offset + readbufspan.size(); - return datasource::owning_buffer>( + return datasource::owning_buffer( std::move(buffer), reinterpret_cast(buffer.data()) + first_delim_pos + shift_for_nonzero_offset, next_delim_pos - first_delim_pos - shift_for_nonzero_offset); } - return datasource::owning_buffer>( + return datasource::owning_buffer( std::move(buffer), reinterpret_cast(buffer.data()) + first_delim_pos + shift_for_nonzero_offset, readbufspan.size() - first_delim_pos - shift_for_nonzero_offset); } +// Helper function to read the current batch using byte range offsets and size +// passed table_with_metadata read_batch(host_span> sources, json_reader_options const& reader_opts, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - datasource::owning_buffer> bufview = + datasource::owning_buffer bufview = get_record_range_raw_input(sources, reader_opts, stream); // If input JSON buffer has single quotes and option to normalize single quotes is enabled, // invoke pre-processing FST if (reader_opts.is_enabled_normalize_single_quotes()) { - normalize_single_quotes(bufview, stream, rmm::mr::get_current_device_resource()); - } - - // If input JSON buffer has unquoted spaces and tabs and option to normalize whitespaces is - // enabled, invoke pre-processing FST - if (reader_opts.is_enabled_normalize_whitespace()) { - normalize_whitespace(bufview, stream, rmm::mr::get_current_device_resource()); + normalize_single_quotes(bufview, stream, cudf::get_current_device_resource_ref()); } auto buffer = @@ -270,6 +238,92 @@ table_with_metadata read_batch(host_span> sources, return device_parse_nested_json(buffer, reader_opts, stream, mr); } +} // anonymous namespace + +device_span ingest_raw_input(device_span buffer, + host_span> sources, + compression_type compression, + std::size_t range_offset, + std::size_t range_size, + rmm::cuda_stream_view stream) +{ + CUDF_FUNC_RANGE(); + // We append a line delimiter between two files to make sure the last line of file i and the first + // line of file i+1 don't end up on the same JSON line, if file i does not already end with a line + // delimiter. + auto constexpr num_delimiter_chars = 1; + + if (compression == compression_type::NONE) { + auto delimiter_map = cudf::detail::make_empty_host_vector(sources.size(), stream); + std::vector prefsum_source_sizes(sources.size()); + std::vector> h_buffers; + std::size_t bytes_read = 0; + std::transform_inclusive_scan(sources.begin(), + sources.end(), + prefsum_source_sizes.begin(), + std::plus{}, + [](std::unique_ptr const& s) { return s->size(); }); + auto upper = + std::upper_bound(prefsum_source_sizes.begin(), prefsum_source_sizes.end(), range_offset); + std::size_t start_source = std::distance(prefsum_source_sizes.begin(), upper); + + auto const total_bytes_to_read = + std::min(range_size, prefsum_source_sizes.back() - range_offset); + range_offset -= start_source ? prefsum_source_sizes[start_source - 1] : 0; + for (std::size_t i = start_source; i < sources.size() && bytes_read < total_bytes_to_read; + i++) { + if (sources[i]->is_empty()) continue; + auto data_size = + std::min(sources[i]->size() - range_offset, total_bytes_to_read - bytes_read); + auto destination = reinterpret_cast(buffer.data()) + bytes_read + + (num_delimiter_chars * delimiter_map.size()); + if (sources[i]->is_device_read_preferred(data_size)) { + bytes_read += sources[i]->device_read(range_offset, data_size, destination, stream); + } else { + h_buffers.emplace_back(sources[i]->host_read(range_offset, data_size)); + auto const& h_buffer = h_buffers.back(); + CUDF_CUDA_TRY(cudaMemcpyAsync( + destination, h_buffer->data(), h_buffer->size(), cudaMemcpyHostToDevice, stream.value())); + bytes_read += h_buffer->size(); + } + range_offset = 0; + delimiter_map.push_back(bytes_read + (num_delimiter_chars * delimiter_map.size())); + } + // Removing delimiter inserted after last non-empty source is read + if (!delimiter_map.empty()) { delimiter_map.pop_back(); } + + // If this is a multi-file source, we scatter the JSON line delimiters between files + if (sources.size() > 1) { + static_assert(num_delimiter_chars == 1, + "Currently only single-character delimiters are supported"); + auto const delimiter_source = thrust::make_constant_iterator('\n'); + auto const d_delimiter_map = cudf::detail::make_device_uvector_async( + delimiter_map, stream, cudf::get_current_device_resource_ref()); + thrust::scatter(rmm::exec_policy_nosync(stream), + delimiter_source, + delimiter_source + d_delimiter_map.size(), + d_delimiter_map.data(), + buffer.data()); + } + stream.synchronize(); + return buffer.first(bytes_read + (delimiter_map.size() * num_delimiter_chars)); + } + // TODO: allow byte range reading from multiple compressed files. + auto remaining_bytes_to_read = std::min(range_size, sources[0]->size() - range_offset); + auto hbuffer = std::vector(remaining_bytes_to_read); + // Single read because only a single compressed source is supported + // Reading to host because decompression of a single block is much faster on the CPU + sources[0]->host_read(range_offset, remaining_bytes_to_read, hbuffer.data()); + auto uncomp_data = decompress(compression, hbuffer); + CUDF_CUDA_TRY(cudaMemcpyAsync(buffer.data(), + reinterpret_cast(uncomp_data.data()), + uncomp_data.size() * sizeof(char), + cudaMemcpyHostToDevice, + stream.value())); + stream.synchronize(); + return buffer.first(uncomp_data.size()); +} + table_with_metadata read_json(host_span> sources, json_reader_options const& reader_opts, rmm::cuda_stream_view stream, @@ -296,15 +350,16 @@ table_with_metadata read_json(host_span> sources, * Note that the batched reader does not work for compressed inputs or for regular * JSON inputs. */ - size_t const total_source_size = sources_size(sources, 0, 0); - size_t chunk_offset = reader_opts.get_byte_range_offset(); - size_t chunk_size = reader_opts.get_byte_range_size(); - chunk_size = !chunk_size ? total_source_size - chunk_offset - : std::min(chunk_size, total_source_size - chunk_offset); + std::size_t const total_source_size = sources_size(sources, 0, 0); + std::size_t chunk_offset = reader_opts.get_byte_range_offset(); + std::size_t chunk_size = reader_opts.get_byte_range_size(); + chunk_size = !chunk_size ? total_source_size - chunk_offset + : std::min(chunk_size, total_source_size - chunk_offset); - size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); - size_t const batch_size_ub = - std::numeric_limits::max() - (max_subchunks_prealloced * size_per_subchunk); + std::size_t const size_per_subchunk = estimate_size_per_subchunk(chunk_size); + std::size_t const batch_size_upper_bound = get_batch_size_upper_bound(); + std::size_t const batch_size = + batch_size_upper_bound - (max_subchunks_prealloced * size_per_subchunk); /* * Identify the position (zero-indexed) of starting source file from which to begin @@ -314,10 +369,10 @@ table_with_metadata read_json(host_span> sources, */ // Prefix sum of source file sizes - size_t pref_source_size = 0; + std::size_t pref_source_size = 0; // Starting source file from which to being batching evaluated using byte range offset - size_t const start_source = [chunk_offset, &sources, &pref_source_size]() { - for (size_t src_idx = 0; src_idx < sources.size(); ++src_idx) { + std::size_t const start_source = [chunk_offset, &sources, &pref_source_size]() { + for (std::size_t src_idx = 0; src_idx < sources.size(); ++src_idx) { if (pref_source_size + sources[src_idx]->size() > chunk_offset) { return src_idx; } pref_source_size += sources[src_idx]->size(); } @@ -329,16 +384,16 @@ table_with_metadata read_json(host_span> sources, * batch begins, and `end_bytes_size` gives the terminal bytes position after which reading * stops. */ - size_t pref_bytes_size = chunk_offset; - size_t end_bytes_size = chunk_offset + chunk_size; - std::vector batch_offsets{pref_bytes_size}; - for (size_t i = start_source; i < sources.size() && pref_bytes_size < end_bytes_size;) { + std::size_t pref_bytes_size = chunk_offset; + std::size_t end_bytes_size = chunk_offset + chunk_size; + std::vector batch_offsets{pref_bytes_size}; + for (std::size_t i = start_source; i < sources.size() && pref_bytes_size < end_bytes_size;) { pref_source_size += sources[i]->size(); // If the current source file can subsume multiple batches, we split the file until the // boundary of the last batch exceeds the end of the file (indexed by `pref_source_size`) while (pref_bytes_size < end_bytes_size && - pref_source_size >= std::min(pref_bytes_size + batch_size_ub, end_bytes_size)) { - auto next_batch_size = std::min(batch_size_ub, end_bytes_size - pref_bytes_size); + pref_source_size >= std::min(pref_bytes_size + batch_size, end_bytes_size)) { + auto next_batch_size = std::min(batch_size, end_bytes_size - pref_bytes_size); batch_offsets.push_back(batch_offsets.back() + next_batch_size); pref_bytes_size += next_batch_size; } @@ -356,11 +411,11 @@ table_with_metadata read_json(host_span> sources, // Dispatch individual batches to read_batch and push the resulting table into // partial_tables array. Note that the reader options need to be updated for each // batch to adjust byte range offset and byte range size. - for (size_t i = 0; i < batch_offsets.size() - 1; i++) { + for (std::size_t i = 0; i < batch_offsets.size() - 1; i++) { batched_reader_opts.set_byte_range_offset(batch_offsets[i]); batched_reader_opts.set_byte_range_size(batch_offsets[i + 1] - batch_offsets[i]); partial_tables.emplace_back( - read_batch(sources, batched_reader_opts, stream, rmm::mr::get_current_device_resource())); + read_batch(sources, batched_reader_opts, stream, cudf::get_current_device_resource_ref())); } auto expects_schema_equality = diff --git a/cpp/src/io/json/read_json.hpp b/cpp/src/io/json/read_json.hpp index 32de4ebabfa..982190eecb5 100644 --- a/cpp/src/io/json/read_json.hpp +++ b/cpp/src/io/json/read_json.hpp @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -37,6 +37,20 @@ constexpr size_t min_subchunk_size = 10000; constexpr int estimated_compression_ratio = 4; constexpr int max_subchunks_prealloced = 3; +/** + * @brief Read from array of data sources into RMM buffer. The size of the returned device span + can be larger than the number of bytes requested from the list of sources when + the range to be read spans across multiple sources. This is due to the delimiter + characters inserted after the end of each accessed source. + * + * @param buffer Device span buffer to which data is read + * @param sources Array of data sources + * @param compression Compression format of source + * @param range_offset Number of bytes to skip from source start + * @param range_size Number of bytes to read from source + * @param stream CUDA stream used for device memory operations and kernel launches + * @returns A subspan of the input device span containing data read + */ device_span ingest_raw_input(device_span buffer, host_span> sources, compression_type compression, @@ -44,14 +58,20 @@ device_span ingest_raw_input(device_span buffer, size_t range_size, rmm::cuda_stream_view stream); +/** + * @brief Reads and returns the entire data set in batches. + * + * @param sources Input `datasource` objects to read the dataset from + * @param reader_opts Settings for controlling reading behavior + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource to use for device memory allocation + * + * @return cudf::table object that contains the array of cudf::column. + */ table_with_metadata read_json(host_span> sources, json_reader_options const& reader_opts, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); -size_type find_first_delimiter(device_span d_data, - char const delimiter, - rmm::cuda_stream_view stream); - } // namespace io::json::detail } // namespace CUDF_EXPORT cudf diff --git a/cpp/src/io/json/tabulate_output_iterator.cuh b/cpp/src/io/json/tabulate_output_iterator.cuh new file mode 100644 index 00000000000..7cf3655e259 --- /dev/null +++ b/cpp/src/io/json/tabulate_output_iterator.cuh @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cudf { +namespace detail { + +// Proxy reference that calls BinaryFunction with index value and the rhs of assignment operator +template +class tabulate_output_iterator_proxy { + public: + __host__ __device__ tabulate_output_iterator_proxy(const IndexT index, BinaryFunction fun) + : index(index), fun(fun) + { + } + template + __host__ __device__ tabulate_output_iterator_proxy operator=(const T& rhs_value) + { + fun(index, rhs_value); + return *this; + } + + private: + IndexT index; + BinaryFunction fun; +}; + +/** + * @brief Tabulate output iterator with custom binary function which takes index and value. + * + * @code {.cpp} + * #include "tabulate_output_iterator.cuh" + * #include + * #include + * #include + * + * struct set_bits_field { + * int* bitfield; + * __device__ inline void set_bit(size_t bit_index) + * { + * atomicOr(&bitfield[bit_index/32], (int{1} << (bit_index % 32))); + * } + * __device__ inline void clear_bit(size_t bit_index) + * { + * atomicAnd(&bitfield[bit_index / 32], ~(int{1} << (bit_index % 32))); + * } + * // Index, value + * __device__ void operator()(size_t i, bool x) + * { + * if (x) + * set_bit(i); + * else + * clear_bit(i); + * } + * }; + * + * thrust::device_vector v(1, 0x00000000); + * auto result_begin = thrust::make_tabulate_output_iterator(set_bits_field{v.data().get()}); + * auto value = thrust::make_transform_iterator(thrust::make_counting_iterator(0), + * [] __device__ (int x) { return x%2; }); + * thrust::copy(thrust::device, value, value+32, result_begin); + * assert(v[0] == 0xaaaaaaaa); + * @endcode + * + * + * @tparam BinaryFunction Binary function to be called with the Iterator value and the rhs of + * assignment operator. + * @tparam Iterator iterator type that acts as index of the output. + */ +template +class tabulate_output_iterator + : public thrust::iterator_adaptor, + thrust::counting_iterator, + thrust::use_default, + thrust::use_default, + thrust::use_default, + tabulate_output_iterator_proxy> { + public: + // parent class. + using super_t = thrust::iterator_adaptor, + thrust::counting_iterator, + thrust::use_default, + thrust::use_default, + thrust::use_default, + tabulate_output_iterator_proxy>; + // friend thrust::iterator_core_access to allow it access to the private interface dereference() + friend class thrust::iterator_core_access; + __host__ __device__ tabulate_output_iterator(BinaryFunction fun) : fun(fun) {} + + private: + BinaryFunction fun; + + // thrust::iterator_core_access accesses this function + __host__ __device__ typename super_t::reference dereference() const + { + return tabulate_output_iterator_proxy(*this->base(), fun); + } +}; + +template +tabulate_output_iterator __host__ __device__ +make_tabulate_output_iterator(BinaryFunction fun) +{ + return tabulate_output_iterator(fun); +} // end make_tabulate_output_iterator + +} // namespace detail +} // namespace cudf + +// Register tabulate_output_iterator_proxy with 'is_proxy_reference' from +// type_traits to enable its use with algorithms. +template +struct thrust::detail::is_proxy_reference< + cudf::detail::tabulate_output_iterator_proxy> + : public thrust::detail::true_type {}; diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu index c688c809e04..dc7199d7ab1 100644 --- a/cpp/src/io/json/write_json.cu +++ b/cpp/src/io/json/write_json.cu @@ -42,12 +42,11 @@ #include #include #include +#include #include #include #include -#include -#include #include #include @@ -437,7 +436,7 @@ std::unique_ptr join_list_of_strings(lists_column_view const& lists_stri // scatter string and separator auto labels = cudf::lists::detail::generate_labels( - lists_strings, num_strings, stream, rmm::mr::get_current_device_resource()); + lists_strings, num_strings, stream, cudf::get_current_device_resource_ref()); auto d_strings_children = cudf::column_device_view::create(strings_children, stream); thrust::for_each(rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -645,13 +644,13 @@ struct column_to_strings_fn { } }; auto new_offsets = cudf::lists::detail::get_normalized_offsets( - lists_column_view(column), stream_, rmm::mr::get_current_device_resource()); + lists_column_view(column), stream_, cudf::get_current_device_resource_ref()); auto const list_child_string = make_lists_column( column.size(), std::move(new_offsets), - std::move(child_string_with_null()), + child_string_with_null(), column.null_count(), - cudf::detail::copy_bitmask(column, stream_, rmm::mr::get_current_device_resource()), + cudf::detail::copy_bitmask(column, stream_, cudf::get_current_device_resource_ref()), stream_); return join_list_of_strings(lists_column_view(*list_child_string), list_row_begin_wrap.value(stream_), @@ -736,7 +735,7 @@ struct column_to_strings_fn { narep, options_.is_enabled_include_nulls(), stream_, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } private: @@ -765,17 +764,18 @@ std::unique_ptr make_strings_column_from_host(host_span offsets(host_strings.size() + 1, 0); std::transform_inclusive_scan(host_strings.begin(), host_strings.end(), offsets.begin() + 1, std::plus{}, [](auto& str) { return str.size(); }); - auto d_offsets = std::make_unique( - cudf::detail::make_device_uvector_sync(offsets, stream, rmm::mr::get_current_device_resource()), - rmm::device_buffer{}, - 0); + auto d_offsets = + std::make_unique(cudf::detail::make_device_uvector_sync( + offsets, stream, cudf::get_current_device_resource_ref()), + rmm::device_buffer{}, + 0); return cudf::make_strings_column( host_strings.size(), std::move(d_offsets), d_chars.release(), 0, {}); } @@ -798,7 +798,7 @@ std::unique_ptr make_column_names_column(host_span #include -#include +#include #include #include @@ -692,11 +692,12 @@ class metadata { * @brief `column_device_view` and additional, ORC specific, information on the column. */ struct orc_column_device_view : public column_device_view { - __device__ orc_column_device_view(column_device_view col, thrust::optional parent_idx) + __device__ orc_column_device_view(column_device_view col, + cuda::std::optional parent_idx) : column_device_view{col}, parent_index{parent_idx} { } - thrust::optional parent_index; + cuda::std::optional parent_index; bitmask_type const* pushdown_mask = nullptr; }; diff --git a/cpp/src/io/orc/reader_impl.hpp b/cpp/src/io/orc/reader_impl.hpp index 94b294087b8..bb2d6dbcc9f 100644 --- a/cpp/src/io/orc/reader_impl.hpp +++ b/cpp/src/io/orc/reader_impl.hpp @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/src/io/orc/reader_impl_decode.cu b/cpp/src/io/orc/reader_impl_decode.cu index e3b9a048be8..d628e936cb1 100644 --- a/cpp/src/io/orc/reader_impl_decode.cu +++ b/cpp/src/io/orc/reader_impl_decode.cu @@ -28,13 +28,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -506,7 +506,7 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector const& } } auto const d_prefix_sums_to_update = cudf::detail::make_device_uvector_async( - prefix_sums_to_update, stream, rmm::mr::get_current_device_resource()); + prefix_sums_to_update, stream, cudf::get_current_device_resource_ref()); thrust::for_each( rmm::exec_policy_nosync(stream), @@ -683,7 +683,7 @@ std::vector find_table_splits(table_view const& input, segment_length = std::min(segment_length, input.num_rows()); auto const d_segmented_sizes = cudf::detail::segmented_row_bit_count( - input, segment_length, stream, rmm::mr::get_current_device_resource()); + input, segment_length, stream, cudf::get_current_device_resource_ref()); auto segmented_sizes = cudf::detail::hostdevice_vector(d_segmented_sizes->size(), stream); @@ -777,7 +777,7 @@ void reader_impl::decompress_and_decode_stripes(read_mode mode) [](auto const& sum, auto const& cols_level) { return sum + cols_level.size(); }); return cudf::detail::make_zeroed_device_uvector_async( - num_total_cols * stripe_count, _stream, rmm::mr::get_current_device_resource()); + num_total_cols * stripe_count, _stream, cudf::get_current_device_resource_ref()); }(); std::size_t num_processed_lvl_columns = 0; std::size_t num_processed_prev_lvl_columns = 0; diff --git a/cpp/src/io/orc/reader_impl_helpers.cpp b/cpp/src/io/orc/reader_impl_helpers.cpp index c943ae17d97..4c1079cffe8 100644 --- a/cpp/src/io/orc/reader_impl_helpers.cpp +++ b/cpp/src/io/orc/reader_impl_helpers.cpp @@ -16,7 +16,7 @@ #include "reader_impl_helpers.hpp" -#include +#include namespace cudf::io::orc::detail { diff --git a/cpp/src/io/orc/reader_impl_helpers.hpp b/cpp/src/io/orc/reader_impl_helpers.hpp index a563fb19e15..5528b2ee763 100644 --- a/cpp/src/io/orc/reader_impl_helpers.hpp +++ b/cpp/src/io/orc/reader_impl_helpers.hpp @@ -21,9 +21,9 @@ #include "io/utilities/column_buffer.hpp" #include +#include #include -#include #include #include diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu index 80f32512b98..5c70e35fd2e 100644 --- a/cpp/src/io/orc/stripe_enc.cu +++ b/cpp/src/io/orc/stripe_enc.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1425,7 +1426,7 @@ void decimal_sizes_to_offsets(device_2dspan rg_bounds, // Copy the vector of views to the device so that we can pass it to the kernel auto d_sizes = cudf::detail::make_device_uvector_async( - h_sizes, stream, rmm::mr::get_current_device_resource()); + h_sizes, stream, cudf::get_current_device_resource_ref()); constexpr int block_size = 256; dim3 const grid_size{static_cast(elem_sizes.size()), // num decimal columns diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu index f3b8cfbc836..60a64fb0ee6 100644 --- a/cpp/src/io/orc/writer_impl.cu +++ b/cpp/src/io/orc/writer_impl.cu @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -50,7 +52,6 @@ #include #include #include -#include #include #include #include @@ -532,20 +533,20 @@ auto uncomp_block_alignment(CompressionKind compression_kind) { if (compression_kind == NONE or nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) { - return 1u; + return 1ul; } - return 1u << nvcomp::compress_input_alignment_bits(to_nvcomp_compression_type(compression_kind)); + return nvcomp::required_alignment(to_nvcomp_compression_type(compression_kind)); } auto comp_block_alignment(CompressionKind compression_kind) { if (compression_kind == NONE or nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) { - return 1u; + return 1ul; } - return 1u << nvcomp::compress_output_alignment_bits(to_nvcomp_compression_type(compression_kind)); + return nvcomp::required_alignment(to_nvcomp_compression_type(compression_kind)); } /** @@ -728,7 +729,7 @@ std::vector> calculate_aligned_rowgroup_bounds( cudaMemcpyDefault, stream.value())); auto const d_stripes = cudf::detail::make_device_uvector_async( - segmentation.stripes, stream, rmm::mr::get_current_device_resource()); + segmentation.stripes, stream, cudf::get_current_device_resource_ref()); // One thread per column, per stripe thrust::for_each_n( @@ -1354,7 +1355,7 @@ encoded_footer_statistics finish_statistic_blobs(Footer const& footer, } // Copy to device auto const d_stat_chunks = cudf::detail::make_device_uvector_async( - h_stat_chunks, stream, rmm::mr::get_current_device_resource()); + h_stat_chunks, stream, cudf::get_current_device_resource_ref()); stats_merge.host_to_device_async(stream); // Encode and return @@ -1738,7 +1739,7 @@ pushdown_null_masks init_pushdown_null_masks(orc_table_view& orc_table, // Attach null masks to device column views (async) auto const d_mask_ptrs = cudf::detail::make_device_uvector_async( - mask_ptrs, stream, rmm::mr::get_current_device_resource()); + mask_ptrs, stream, cudf::get_current_device_resource_ref()); thrust::for_each_n( rmm::exec_policy(stream), thrust::make_counting_iterator(0ul), @@ -1828,10 +1829,10 @@ orc_table_view make_orc_table_view(table_view const& table, return orc_column.orc_kind(); }); auto const d_type_kinds = cudf::detail::make_device_uvector_async( - type_kinds, stream, rmm::mr::get_current_device_resource()); + type_kinds, stream, cudf::get_current_device_resource_ref()); rmm::device_uvector d_orc_columns(orc_columns.size(), stream); - using stack_value_type = thrust::pair>; + using stack_value_type = thrust::pair>; rmm::device_uvector stack_storage(orc_columns.size(), stream); // pre-order append ORC device columns @@ -1847,7 +1848,7 @@ orc_table_view make_orc_table_view(table_view const& table, thrust::make_reverse_iterator(d_table.end()), thrust::make_reverse_iterator(d_table.begin()), [&stack](column_device_view const& c) { - stack.push({&c, thrust::nullopt}); + stack.push({&c, cuda::std::nullopt}); }); uint32_t idx = 0; @@ -1879,7 +1880,7 @@ orc_table_view make_orc_table_view(table_view const& table, std::move(d_orc_columns), str_col_indexes, cudf::detail::make_device_uvector_sync( - str_col_indexes, stream, rmm::mr::get_current_device_resource())}; + str_col_indexes, stream, cudf::get_current_device_resource_ref())}; } hostdevice_2dvector calculate_rowgroup_bounds(orc_table_view const& orc_table, @@ -1978,7 +1979,7 @@ encoder_decimal_info decimal_chunk_sizes(orc_table_view& orc_table, // Gather the row group sizes and copy to host auto d_tmp_rowgroup_sizes = rmm::device_uvector(segmentation.num_rowgroups(), stream); - std::map> rg_sizes; + std::map> rg_sizes; for (auto const& [col_idx, esizes] : elem_sizes) { // Copy last elem in each row group - equal to row group size thrust::tabulate(rmm::exec_policy(stream), @@ -1991,14 +1992,14 @@ encoder_decimal_info decimal_chunk_sizes(orc_table_view& orc_table, return src[rg_bounds[idx][col_idx].end - 1]; }); - rg_sizes[col_idx] = cudf::detail::make_std_vector_async(d_tmp_rowgroup_sizes, stream); + rg_sizes.emplace(col_idx, cudf::detail::make_host_vector_async(d_tmp_rowgroup_sizes, stream)); } return {std::move(elem_sizes), std::move(rg_sizes)}; } std::map decimal_column_sizes( - std::map> const& chunk_sizes) + std::map> const& chunk_sizes) { std::map column_sizes; std::transform(chunk_sizes.cbegin(), @@ -2056,7 +2057,7 @@ auto set_rowgroup_char_counts(orc_table_view& orc_table, orc_table.d_string_column_indices, stream); - auto const h_counts = cudf::detail::make_std_vector_sync(counts, stream); + auto const h_counts = cudf::detail::make_host_vector_sync(counts, stream); for (auto col_idx : orc_table.string_column_indices) { auto& str_column = orc_table.column(col_idx); @@ -2239,7 +2240,7 @@ stripe_dictionaries build_dictionaries(orc_table_view& orc_table, // Create the inverse permutation - i.e. the mapping from the original order to the sorted auto order_copy = cudf::detail::make_device_uvector_async( - sd.data_order, current_stream, rmm::mr::get_current_device_resource()); + sd.data_order, current_stream, cudf::get_current_device_resource_ref()); thrust::scatter(rmm::exec_policy_nosync(current_stream), thrust::counting_iterator(0), thrust::counting_iterator(sd.data_order.size()), diff --git a/cpp/src/io/orc/writer_impl.hpp b/cpp/src/io/orc/writer_impl.hpp index f5f8b3cfed9..cae849ee315 100644 --- a/cpp/src/io/orc/writer_impl.hpp +++ b/cpp/src/io/orc/writer_impl.hpp @@ -90,8 +90,9 @@ struct stripe_rowgroups { */ struct encoder_decimal_info { std::map> - elem_sizes; ///< Column index -> per-element size map - std::map> rg_sizes; ///< Column index -> per-rowgroup size map + elem_sizes; ///< Column index -> per-element size map + std::map> + rg_sizes; ///< Column index -> per-rowgroup size map }; /** diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu index a43c6d4cbb6..17ccb73c0a8 100644 --- a/cpp/src/io/parquet/chunk_dict.cu +++ b/cpp/src/io/parquet/chunk_dict.cu @@ -22,6 +22,7 @@ #include +#include #include namespace cudf::io::parquet::detail { @@ -30,28 +31,14 @@ namespace { constexpr int DEFAULT_BLOCK_SIZE = 256; } -template -CUDF_KERNEL void __launch_bounds__(block_size) - initialize_chunk_hash_maps_kernel(device_span chunks) -{ - auto const chunk = chunks[blockIdx.x]; - auto const t = threadIdx.x; - // fut: Now that per-chunk dict is same size as ck.num_values, try to not use one block per chunk - for (thread_index_type i = 0; i < chunk.dict_map_size; i += block_size) { - if (t + i < chunk.dict_map_size) { - new (&chunk.dict_map_slots[t + i].first) map_type::atomic_key_type{KEY_SENTINEL}; - new (&chunk.dict_map_slots[t + i].second) map_type::atomic_mapped_type{VALUE_SENTINEL}; - } - } -} - template struct equality_functor { column_device_view const& col; - __device__ bool operator()(size_type lhs_idx, size_type rhs_idx) + __device__ bool operator()(key_type lhs_idx, key_type rhs_idx) const { - // We don't call this for nulls so this is fine - auto const equal = cudf::experimental::row::equality::nan_equal_physical_equality_comparator{}; + // We don't call this for nulls so this is fine. + auto constexpr equal = + cudf::experimental::row::equality::nan_equal_physical_equality_comparator{}; return equal(col.element(lhs_idx), col.element(rhs_idx)); } }; @@ -59,38 +46,167 @@ struct equality_functor { template struct hash_functor { column_device_view const& col; - __device__ auto operator()(size_type idx) const + uint32_t const seed = 0; + __device__ auto operator()(key_type idx) const { - return cudf::hashing::detail::MurmurHash3_x86_32{}(col.element(idx)); + return cudf::hashing::detail::MurmurHash3_x86_32{seed}(col.element(idx)); } }; +template struct map_insert_fn { - map_type::device_mutable_view& map; + storage_ref_type const& storage_ref; + EncColumnChunk* const& chunk; template - __device__ bool operator()(column_device_view const& col, size_type i) + __device__ void operator()(size_type const s_start_value_idx, size_type const end_value_idx) { if constexpr (column_device_view::has_element_accessor()) { - auto hash_fn = hash_functor{col}; - auto equality_fn = equality_functor{col}; - return map.insert(std::pair(i, i), hash_fn, equality_fn); + using block_reduce = cub::BlockReduce; + __shared__ typename block_reduce::TempStorage reduce_storage; + + auto const col = chunk->col_desc; + column_device_view const& data_col = *col->leaf_column; + __shared__ size_type total_num_dict_entries; + + using equality_fn_type = equality_functor; + using hash_fn_type = hash_functor; + // Choosing `linear_probing` over `double_hashing` for slighhhtly better performance seen in + // benchmarks. + using probing_scheme_type = cuco::linear_probing; + + // Make a view of the hash map. + auto hash_map_ref = cuco::static_map_ref{cuco::empty_key{KEY_SENTINEL}, + cuco::empty_value{VALUE_SENTINEL}, + equality_fn_type{data_col}, + probing_scheme_type{hash_fn_type{data_col}}, + cuco::thread_scope_block, + storage_ref}; + + // Create a map ref with `cuco::insert` operator + auto map_insert_ref = hash_map_ref.with_operators(cuco::insert); + auto const t = threadIdx.x; + + // Create atomic refs to the current chunk's num_dict_entries and uniq_data_size + cuda::atomic_ref const chunk_num_dict_entries{chunk->num_dict_entries}; + cuda::atomic_ref const chunk_uniq_data_size{chunk->uniq_data_size}; + + // Note: Adjust the following loop to use `cg::tile` if needed in the future. + for (thread_index_type val_idx = s_start_value_idx + t; val_idx - t < end_value_idx; + val_idx += block_size) { + size_type is_unique = 0; + size_type uniq_elem_size = 0; + + // Check if this index is valid. + auto const is_valid = + val_idx < end_value_idx and val_idx < data_col.size() and data_col.is_valid(val_idx); + + // Insert tile_val_idx to hash map and count successful insertions. + if (is_valid) { + // Insert the keys using a single thread for best performance for now. + is_unique = map_insert_ref.insert(cuco::pair{val_idx, val_idx}); + uniq_elem_size = [&]() -> size_type { + if (not is_unique) { return 0; } + switch (col->physical_type) { + case Type::INT32: return 4; + case Type::INT64: return 8; + case Type::INT96: return 12; + case Type::FLOAT: return 4; + case Type::DOUBLE: return 8; + case Type::BYTE_ARRAY: { + auto const col_type = data_col.type().id(); + if (col_type == type_id::STRING) { + // Strings are stored as 4 byte length + string bytes + return 4 + data_col.element(val_idx).size_bytes(); + } else if (col_type == type_id::LIST) { + // Binary is stored as 4 byte length + bytes + return 4 + + get_element(data_col, val_idx).size_bytes(); + } + CUDF_UNREACHABLE( + "Byte array only supports string and list column types for dictionary " + "encoding!"); + } + case Type::FIXED_LEN_BYTE_ARRAY: + if (data_col.type().id() == type_id::DECIMAL128) { return sizeof(__int128_t); } + CUDF_UNREACHABLE( + "Fixed length byte array only supports decimal 128 column types for dictionary " + "encoding!"); + default: CUDF_UNREACHABLE("Unsupported type for dictionary encoding"); + } + }(); + } + // Reduce num_unique and uniq_data_size from all tiles. + auto num_unique = block_reduce(reduce_storage).Sum(is_unique); + __syncthreads(); + auto uniq_data_size = block_reduce(reduce_storage).Sum(uniq_elem_size); + // The first thread in the block atomically updates total num_unique and uniq_data_size + if (t == 0) { + total_num_dict_entries = + chunk_num_dict_entries.fetch_add(num_unique, cuda::std::memory_order_relaxed); + total_num_dict_entries += num_unique; + chunk_uniq_data_size.fetch_add(uniq_data_size, cuda::std::memory_order_relaxed); + } + __syncthreads(); + + // Check if the num unique values in chunk has already exceeded max dict size and early exit + if (total_num_dict_entries > MAX_DICT_SIZE) { return; } + } // for loop } else { CUDF_UNREACHABLE("Unsupported type to insert in map"); } } }; +template struct map_find_fn { - map_type::device_view& map; - + storage_ref_type const& storage_ref; + EncColumnChunk* const& chunk; template - __device__ map_type::device_view::iterator operator()(column_device_view const& col, size_type i) + __device__ void operator()(size_type const s_start_value_idx, + size_type const end_value_idx, + size_type const s_ck_start_val_idx) { if constexpr (column_device_view::has_element_accessor()) { - auto hash_fn = hash_functor{col}; - auto equality_fn = equality_functor{col}; - return map.find(i, hash_fn, equality_fn); + auto const col = chunk->col_desc; + column_device_view const& data_col = *col->leaf_column; + + using equality_fn_type = equality_functor; + using hash_fn_type = hash_functor; + // Choosing `linear_probing` over `double_hashing` for slighhhtly better performance seen in + // benchmarks. + using probing_scheme_type = cuco::linear_probing; + + // Make a view of the hash map. + auto hash_map_ref = cuco::static_map_ref{cuco::empty_key{KEY_SENTINEL}, + cuco::empty_value{VALUE_SENTINEL}, + equality_fn_type{data_col}, + probing_scheme_type{hash_fn_type{data_col}}, + cuco::thread_scope_block, + storage_ref}; + + // Create a map ref with `cuco::find` operator + auto const map_find_ref = hash_map_ref.with_operators(cuco::find); + auto const t = threadIdx.x; + + // Note: Adjust the following loop to use `cg::tiles` if needed in the future. + for (thread_index_type val_idx = s_start_value_idx + t; val_idx < end_value_idx; + val_idx += block_size) { + // Find the key using a single thread for best performance for now. + if (data_col.is_valid(val_idx)) { + // No need for atomic as this is not going to be modified by any other thread. + chunk->dict_index[val_idx - s_ck_start_val_idx] = [&]() { + auto const found_slot = map_find_ref.find(val_idx); + + // Fail if we didn't find the previously inserted key. + cudf_assert(found_slot != map_find_ref.end() && + "Unable to find value in map in dictionary index construction"); + + // Return the found value. + return found_slot->second; + }(); + } + } } else { CUDF_UNREACHABLE("Unsupported type to find in map"); } @@ -99,124 +215,61 @@ struct map_find_fn { template CUDF_KERNEL void __launch_bounds__(block_size) - populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan frags) + populate_chunk_hash_maps_kernel(device_span const map_storage, + cudf::detail::device_2dspan frags) { - auto col_idx = blockIdx.y; - auto block_x = blockIdx.x; - auto t = threadIdx.x; - auto frag = frags[col_idx][block_x]; - auto chunk = frag.chunk; - auto col = chunk->col_desc; + auto const col_idx = blockIdx.y; + auto const block_x = blockIdx.x; + auto const frag = frags[col_idx][block_x]; + auto chunk = frag.chunk; + auto col = chunk->col_desc; if (not chunk->use_dictionary) { return; } - using block_reduce = cub::BlockReduce; - __shared__ typename block_reduce::TempStorage reduce_storage; - size_type start_row = frag.start_row; size_type end_row = frag.start_row + frag.num_rows; - // Find the bounds of values in leaf column to be inserted into the map for current chunk + // Find the bounds of values in leaf column to be inserted into the map for current chunk. size_type const s_start_value_idx = row_to_value_idx(start_row, *col); size_type const end_value_idx = row_to_value_idx(end_row, *col); column_device_view const& data_col = *col->leaf_column; - - // Make a view of the hash map - auto hash_map_mutable = map_type::device_mutable_view(chunk->dict_map_slots, - chunk->dict_map_size, - cuco::empty_key{KEY_SENTINEL}, - cuco::empty_value{VALUE_SENTINEL}); - - __shared__ size_type total_num_dict_entries; - thread_index_type val_idx = s_start_value_idx + t; - while (val_idx - block_size < end_value_idx) { - auto const is_valid = - val_idx < end_value_idx and val_idx < data_col.size() and data_col.is_valid(val_idx); - - // insert element at val_idx to hash map and count successful insertions - size_type is_unique = 0; - size_type uniq_elem_size = 0; - if (is_valid) { - is_unique = - type_dispatcher(data_col.type(), map_insert_fn{hash_map_mutable}, data_col, val_idx); - uniq_elem_size = [&]() -> size_type { - if (not is_unique) { return 0; } - switch (col->physical_type) { - case Type::INT32: return 4; - case Type::INT64: return 8; - case Type::INT96: return 12; - case Type::FLOAT: return 4; - case Type::DOUBLE: return 8; - case Type::BYTE_ARRAY: { - auto const col_type = data_col.type().id(); - if (col_type == type_id::STRING) { - // Strings are stored as 4 byte length + string bytes - return 4 + data_col.element(val_idx).size_bytes(); - } else if (col_type == type_id::LIST) { - // Binary is stored as 4 byte length + bytes - return 4 + get_element(data_col, val_idx).size_bytes(); - } - CUDF_UNREACHABLE( - "Byte array only supports string and list column types for dictionary " - "encoding!"); - } - case Type::FIXED_LEN_BYTE_ARRAY: - if (data_col.type().id() == type_id::DECIMAL128) { return sizeof(__int128_t); } - CUDF_UNREACHABLE( - "Fixed length byte array only supports decimal 128 column types for dictionary " - "encoding!"); - default: CUDF_UNREACHABLE("Unsupported type for dictionary encoding"); - } - }(); - } - - auto num_unique = block_reduce(reduce_storage).Sum(is_unique); - __syncthreads(); - auto uniq_data_size = block_reduce(reduce_storage).Sum(uniq_elem_size); - if (t == 0) { - total_num_dict_entries = atomicAdd(&chunk->num_dict_entries, num_unique); - total_num_dict_entries += num_unique; - atomicAdd(&chunk->uniq_data_size, uniq_data_size); - } - __syncthreads(); - - // Check if the num unique values in chunk has already exceeded max dict size and early exit - if (total_num_dict_entries > MAX_DICT_SIZE) { return; } - - val_idx += block_size; - } // while + storage_ref_type const storage_ref{chunk->dict_map_size, + map_storage.data() + chunk->dict_map_offset}; + type_dispatcher(data_col.type(), + map_insert_fn{storage_ref, chunk}, + s_start_value_idx, + end_value_idx); } template CUDF_KERNEL void __launch_bounds__(block_size) - collect_map_entries_kernel(device_span chunks) + collect_map_entries_kernel(device_span const map_storage, + device_span chunks) { auto& chunk = chunks[blockIdx.x]; if (not chunk.use_dictionary) { return; } - auto t = threadIdx.x; - auto map = map_type::device_view(chunk.dict_map_slots, - chunk.dict_map_size, - cuco::empty_key{KEY_SENTINEL}, - cuco::empty_value{VALUE_SENTINEL}); - - __shared__ cuda::atomic counter; + auto t = threadIdx.x; + __shared__ cuda::atomic counter; using cuda::std::memory_order_relaxed; - if (t == 0) { new (&counter) cuda::atomic{0}; } + if (t == 0) { new (&counter) cuda::atomic{0}; } __syncthreads(); - for (size_type i = 0; i < chunk.dict_map_size; i += block_size) { - if (t + i < chunk.dict_map_size) { - auto* slot = reinterpret_cast(map.begin_slot() + t + i); - auto key = slot->first; + + // Iterate over all windows in the map. + for (; t < chunk.dict_map_size; t += block_size) { + auto window = map_storage.data() + chunk.dict_map_offset + t; + // Collect all slots from each window. + for (auto& slot : *window) { + auto const key = slot.first; if (key != KEY_SENTINEL) { - auto loc = counter.fetch_add(1, memory_order_relaxed); + auto const loc = counter.fetch_add(1, memory_order_relaxed); cudf_assert(loc < MAX_DICT_SIZE && "Number of filled slots exceeds max dict size"); chunk.dict_data[loc] = key; - // If sorting dict page ever becomes a hard requirement, enable the following statement and - // add a dict sorting step before storing into the slot's second field. - // chunk.dict_data_idx[loc] = t + i; - slot->second = loc; + // If sorting dict page ever becomes a hard requirement, enable the following statement + // and add a dict sorting step before storing into the slot's second field. + // chunk.dict_data_idx[loc] = idx; + slot.second = loc; } } } @@ -224,75 +277,60 @@ CUDF_KERNEL void __launch_bounds__(block_size) template CUDF_KERNEL void __launch_bounds__(block_size) - get_dictionary_indices_kernel(cudf::detail::device_2dspan frags) + get_dictionary_indices_kernel(device_span const map_storage, + cudf::detail::device_2dspan frags) { - auto col_idx = blockIdx.y; - auto block_x = blockIdx.x; - auto t = threadIdx.x; - auto frag = frags[col_idx][block_x]; - auto chunk = frag.chunk; - auto col = chunk->col_desc; + auto const col_idx = blockIdx.y; + auto const block_x = blockIdx.x; + auto const frag = frags[col_idx][block_x]; + auto chunk = frag.chunk; if (not chunk->use_dictionary) { return; } size_type start_row = frag.start_row; size_type end_row = frag.start_row + frag.num_rows; + auto const col = chunk->col_desc; // Find the bounds of values in leaf column to be searched in the map for current chunk auto const s_start_value_idx = row_to_value_idx(start_row, *col); auto const s_ck_start_val_idx = row_to_value_idx(chunk->start_row, *col); auto const end_value_idx = row_to_value_idx(end_row, *col); column_device_view const& data_col = *col->leaf_column; - - auto map = map_type::device_view(chunk->dict_map_slots, - chunk->dict_map_size, - cuco::empty_key{KEY_SENTINEL}, - cuco::empty_value{VALUE_SENTINEL}); - - thread_index_type val_idx = s_start_value_idx + t; - while (val_idx < end_value_idx) { - if (data_col.is_valid(val_idx)) { - auto found_slot = type_dispatcher(data_col.type(), map_find_fn{map}, data_col, val_idx); - cudf_assert(found_slot != map.end() && - "Unable to find value in map in dictionary index construction"); - if (found_slot != map.end()) { - // No need for atomic as this is not going to be modified by any other thread - auto* val_ptr = reinterpret_cast(&found_slot->second); - chunk->dict_index[val_idx - s_ck_start_val_idx] = *val_ptr; - } - } - - val_idx += block_size; - } -} - -void initialize_chunk_hash_maps(device_span chunks, rmm::cuda_stream_view stream) -{ - constexpr int block_size = 1024; - initialize_chunk_hash_maps_kernel - <<>>(chunks); + storage_ref_type const storage_ref{chunk->dict_map_size, + map_storage.data() + chunk->dict_map_offset}; + + type_dispatcher(data_col.type(), + map_find_fn{storage_ref, chunk}, + s_start_value_idx, + end_value_idx, + s_ck_start_val_idx); } -void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, +void populate_chunk_hash_maps(device_span const map_storage, + cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream) { dim3 const dim_grid(frags.size().second, frags.size().first); populate_chunk_hash_maps_kernel - <<>>(frags); + <<>>(map_storage, frags); } -void collect_map_entries(device_span chunks, rmm::cuda_stream_view stream) +void collect_map_entries(device_span const map_storage, + device_span chunks, + rmm::cuda_stream_view stream) { constexpr int block_size = 1024; - collect_map_entries_kernel<<>>(chunks); + collect_map_entries_kernel + <<>>(map_storage, chunks); } -void get_dictionary_indices(cudf::detail::device_2dspan frags, +void get_dictionary_indices(device_span const map_storage, + cudf::detail::device_2dspan frags, rmm::cuda_stream_view stream) { dim3 const dim_grid(frags.size().second, frags.size().first); get_dictionary_indices_kernel - <<>>(frags); + <<>>(map_storage, frags); } } // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index e13ed5e85e5..b978799b8bc 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -140,6 +140,7 @@ struct parquet_field_bool_list : public parquet_field_listgetb(); assert_bool_field_type(current_byte); + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i] = current_byte == static_cast(FieldType::BOOLEAN_TRUE); }; bind_read_func(read_value); @@ -189,6 +190,7 @@ struct parquet_field_int_list : public parquet_field_list { parquet_field_int_list(int f, std::vector& v) : parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i] = cpr->get_zigzag(); }; this->bind_read_func(read_value); @@ -233,6 +235,7 @@ struct parquet_field_string_list : public parquet_field_listget_u32(); CUDF_EXPECTS(l < static_cast(cpr->m_end - cpr->m_cur), "string length mismatch"); + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i].assign(reinterpret_cast(cpr->m_cur), l); cpr->m_cur += l; }; @@ -270,6 +273,7 @@ struct parquet_field_enum_list : public parquet_field_list : parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i] = static_cast(cpr->get_i32()); }; this->bind_read_func(read_value); @@ -304,10 +308,10 @@ class parquet_field_struct : public parquet_field { template class parquet_field_union_struct : public parquet_field { E& enum_val; - thrust::optional& val; // union structs are always wrapped in std::optional + cuda::std::optional& val; // union structs are always wrapped in std::optional public: - parquet_field_union_struct(int f, E& ev, thrust::optional& v) + parquet_field_union_struct(int f, E& ev, cuda::std::optional& v) : parquet_field(f), enum_val(ev), val(v) { } @@ -355,6 +359,7 @@ struct parquet_field_struct_list : public parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); cpr->read(&val[i]); }; this->bind_read_func(read_value); @@ -399,6 +404,7 @@ struct parquet_field_binary_list auto const l = cpr->get_u32(); CUDF_EXPECTS(l <= static_cast(cpr->m_end - cpr->m_cur), "binary length mismatch"); + CUDF_EXPECTS(i < val.size(), "Index out of bounds"); val[i].resize(l); val[i].assign(cpr->m_cur, cpr->m_cur + l); cpr->m_cur += l; @@ -431,10 +437,10 @@ class parquet_field_struct_blob : public parquet_field { */ template class parquet_field_optional : public parquet_field { - thrust::optional& val; + cuda::std::optional& val; public: - parquet_field_optional(int f, thrust::optional& v) : parquet_field(f), val(v) {} + parquet_field_optional(int f, cuda::std::optional& v) : parquet_field(f), val(v) {} inline void operator()(CompactProtocolReader* cpr, int field_type) { diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index a3f91f6859b..9ed2929a70e 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -893,7 +893,7 @@ __device__ void gpuDecodeLevels(page_state_s* s, { bool has_repetition = s->col.max_level[level_type::REPETITION] > 0; - constexpr int batch_size = 32; + constexpr int batch_size = cudf::detail::warp_size; int cur_leaf_count = target_leaf_count; while (s->error == 0 && s->nz_count < target_leaf_count && s->input_value_count < s->num_input_values) { diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index 8ee4c175e09..7c985643887 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -20,7 +20,7 @@ #include -#include +#include #include #include @@ -94,10 +94,10 @@ struct LogicalType { BSON }; Type type; - thrust::optional decimal_type; - thrust::optional time_type; - thrust::optional timestamp_type; - thrust::optional int_type; + cuda::std::optional decimal_type; + cuda::std::optional time_type; + cuda::std::optional timestamp_type; + cuda::std::optional int_type; LogicalType(Type tp = UNDEFINED) : type(tp) {} LogicalType(DecimalType&& dt) : type(DECIMAL), decimal_type(dt) {} @@ -178,21 +178,21 @@ struct SchemaElement { // 5: nested fields int32_t num_children = 0; // 6: DEPRECATED: record the original type before conversion to parquet type - thrust::optional converted_type; + cuda::std::optional converted_type; // 7: DEPRECATED: record the scale for DECIMAL converted type int32_t decimal_scale = 0; // 8: DEPRECATED: record the precision for DECIMAL converted type int32_t decimal_precision = 0; // 9: save field_id from original schema - thrust::optional field_id; + cuda::std::optional field_id; // 10: replaces converted type - thrust::optional logical_type; + cuda::std::optional logical_type; // extra cudf specific fields bool output_as_byte_array = false; // cudf type determined from arrow:schema - thrust::optional arrow_type; + cuda::std::optional arrow_type; // The following fields are filled in later during schema initialization int max_definition_level = 0; @@ -203,10 +203,9 @@ struct SchemaElement { bool operator==(SchemaElement const& other) const { return type == other.type && converted_type == other.converted_type && - type_length == other.type_length && repetition_type == other.repetition_type && - name == other.name && num_children == other.num_children && - decimal_scale == other.decimal_scale && decimal_precision == other.decimal_precision && - field_id == other.field_id; + type_length == other.type_length && name == other.name && + num_children == other.num_children && decimal_scale == other.decimal_scale && + decimal_precision == other.decimal_precision && field_id == other.field_id; } // the parquet format is a little squishy when it comes to interpreting @@ -259,21 +258,21 @@ struct SchemaElement { */ struct Statistics { // deprecated max value in signed comparison order - thrust::optional> max; + cuda::std::optional> max; // deprecated min value in signed comparison order - thrust::optional> min; + cuda::std::optional> min; // count of null values in the column - thrust::optional null_count; + cuda::std::optional null_count; // count of distinct values occurring - thrust::optional distinct_count; + cuda::std::optional distinct_count; // max value for column determined by ColumnOrder - thrust::optional> max_value; + cuda::std::optional> max_value; // min value for column determined by ColumnOrder - thrust::optional> min_value; + cuda::std::optional> min_value; // If true, max_value is the actual maximum value for a column - thrust::optional is_max_value_exact; + cuda::std::optional is_max_value_exact; // If true, min_value is the actual minimum value for a column - thrust::optional is_min_value_exact; + cuda::std::optional is_min_value_exact; }; /** @@ -282,7 +281,7 @@ struct Statistics { struct SizeStatistics { // Number of variable-width bytes stored for the page/chunk. Should not be set for anything // but the BYTE_ARRAY physical type. - thrust::optional unencoded_byte_array_data_bytes; + cuda::std::optional unencoded_byte_array_data_bytes; /** * When present, there is expected to be one element corresponding to each * repetition (i.e. size=max repetition_level+1) where each element @@ -291,14 +290,14 @@ struct SizeStatistics { * * This value should not be written if max_repetition_level is 0. */ - thrust::optional> repetition_level_histogram; + cuda::std::optional> repetition_level_histogram; /** * Same as repetition_level_histogram except for definition levels. * * This value should not be written if max_definition_level is 0 or 1. */ - thrust::optional> definition_level_histogram; + cuda::std::optional> definition_level_histogram; }; /** @@ -319,7 +318,7 @@ struct OffsetIndex { std::vector page_locations; // per-page size info. see description of the same field in SizeStatistics. only present for // columns with a BYTE_ARRAY physical type. - thrust::optional> unencoded_byte_array_data_bytes; + cuda::std::optional> unencoded_byte_array_data_bytes; }; /** @@ -331,10 +330,10 @@ struct ColumnIndex { std::vector> max_values; // upper bound for values in each page BoundaryOrder boundary_order = BoundaryOrder::UNORDERED; // Indicates if min and max values are ordered - thrust::optional> null_counts; // Optional count of null values per page + cuda::std::optional> null_counts; // Optional count of null values per page // Repetition/definition level histograms for the column chunk - thrust::optional> repetition_level_histogram; - thrust::optional> definition_level_histogram; + cuda::std::optional> repetition_level_histogram; + cuda::std::optional> definition_level_histogram; }; /** @@ -384,11 +383,11 @@ struct ColumnChunkMetaData { Statistics statistics; // Set of all encodings used for pages in this column chunk. This information can be used to // determine if all data pages are dictionary encoded for example. - thrust::optional> encoding_stats; + cuda::std::optional> encoding_stats; // Optional statistics to help estimate total memory when converted to in-memory representations. // The histograms contained in these statistics can also be useful in some cases for more // fine-grained nullability/list length filter pushdown. - thrust::optional size_statistics; + cuda::std::optional size_statistics; }; /** @@ -430,13 +429,13 @@ struct RowGroup { int64_t num_rows = 0; // If set, specifies a sort ordering of the rows in this RowGroup. // The sorting columns can be a subset of all the columns. - thrust::optional> sorting_columns; + cuda::std::optional> sorting_columns; // Byte offset from beginning of file to first page (data or dictionary) in this row group - thrust::optional file_offset; + cuda::std::optional file_offset; // Total byte size of all compressed (and potentially encrypted) column data in this row group - thrust::optional total_compressed_size; + cuda::std::optional total_compressed_size; // Row group ordinal in the file - thrust::optional ordinal; + cuda::std::optional ordinal; }; /** @@ -461,7 +460,7 @@ struct FileMetaData { std::vector row_groups; std::vector key_value_metadata; std::string created_by = ""; - thrust::optional> column_orders; + cuda::std::optional> column_orders; }; /** diff --git a/cpp/src/io/parquet/parquet_gpu.cuh b/cpp/src/io/parquet/parquet_gpu.cuh index e3c44c78898..7c09764da2d 100644 --- a/cpp/src/io/parquet/parquet_gpu.cuh +++ b/cpp/src/io/parquet/parquet_gpu.cuh @@ -18,25 +18,37 @@ #include "parquet_gpu.hpp" +#include #include #include -#include +#include +#include namespace cudf::io::parquet::detail { -auto constexpr KEY_SENTINEL = size_type{-1}; -auto constexpr VALUE_SENTINEL = size_type{-1}; +using key_type = size_type; +using mapped_type = size_type; +using slot_type = cuco::pair; -using map_type = cuco::legacy::static_map; +auto constexpr map_cg_size = + 1; ///< A CUDA Cooperative Group of 1 thread (set for best performance) to handle each subset. + ///< Note: Adjust insert and find loops to use `cg::tile` if increasing this. +auto constexpr window_size = + 1; ///< Number of concurrent slots (set for best performance) handled by each thread. +auto constexpr occupancy_factor = 1.43f; ///< cuCollections suggests using a hash map of size + ///< N * (1/0.7) = 1.43 to target a 70% occupancy factor. -/** - * @brief The alias of `map_type::pair_atomic_type` class. - * - * Declare this struct by trivial subclassing instead of type aliasing so we can have forward - * declaration of this struct somewhere else. - */ -struct slot_type : public map_type::pair_atomic_type {}; +auto constexpr KEY_SENTINEL = key_type{-1}; +auto constexpr VALUE_SENTINEL = mapped_type{-1}; +auto constexpr SCOPE = cuda::thread_scope_block; + +using storage_type = cuco::aow_storage, + cudf::detail::cuco_allocator>; +using storage_ref_type = typename storage_type::ref_type; +using window_type = typename storage_type::window_type; /** * @brief Return the byte length of parquet dtypes that are physically represented by INT32 @@ -81,4 +93,43 @@ inline size_type __device__ row_to_value_idx(size_type idx, return idx; } +/** + * @brief Insert chunk values into their respective hash maps + * + * @param map_storage Bulk hashmap storage + * @param frags Column fragments + * @param stream CUDA stream to use + */ +void populate_chunk_hash_maps(device_span const map_storage, + cudf::detail::device_2dspan frags, + rmm::cuda_stream_view stream); + +/** + * @brief Compact dictionary hash map entries into chunk.dict_data + * + * @param map_storage Bulk hashmap storage + * @param chunks Flat span of chunks to compact hash maps for + * @param stream CUDA stream to use + */ +void collect_map_entries(device_span const map_storage, + device_span chunks, + rmm::cuda_stream_view stream); + +/** + * @brief Get the Dictionary Indices for each row + * + * For each row of a chunk, gets the indices into chunk.dict_data which contains the value otherwise + * stored in input column [row]. Stores these indices into chunk.dict_index. + * + * Since dict_data itself contains indices into the original cudf column, this means that + * col[row] == col[dict_data[dict_index[row - chunk.start_row]]] + * + * @param map_storage Bulk hashmap storage + * @param frags Column fragments + * @param stream CUDA stream to use + */ +void get_dictionary_indices(device_span const map_storage, + cudf::detail::device_2dspan frags, + rmm::cuda_stream_view stream); + } // namespace cudf::io::parquet::detail diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index efc1f5ebab1..1390339c1ae 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -394,13 +394,14 @@ struct ColumnChunkDesc { uint8_t def_level_bits_, uint8_t rep_level_bits_, Compression codec_, - thrust::optional logical_type_, + cuda::std::optional logical_type_, int32_t ts_clock_rate_, int32_t src_col_index_, int32_t src_col_schema_, column_chunk_info const* chunk_info_, float list_bytes_per_row_est_, - bool strings_to_categorical_) + bool strings_to_categorical_, + int32_t src_file_idx_) : compressed_data(compressed_data_), compressed_size(compressed_size_), num_values(num_values_), @@ -419,7 +420,8 @@ struct ColumnChunkDesc { src_col_schema(src_col_schema_), h_chunk_info(chunk_info_), list_bytes_per_row_est(list_bytes_per_row_est_), - is_strings_to_cat(strings_to_categorical_) + is_strings_to_cat(strings_to_categorical_), + src_file_idx(src_file_idx_) { } @@ -438,12 +440,12 @@ struct ColumnChunkDesc { int32_t num_data_pages{}; // number of data pages int32_t num_dict_pages{}; // number of dictionary pages PageInfo const* dict_page{}; - string_index_pair* str_dict_index{}; // index for string dictionary - bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column - void** column_data_base{}; // base pointers of column data - void** column_string_base{}; // base pointers of column string data - Compression codec{}; // compressed codec enum - thrust::optional logical_type{}; // logical type + string_index_pair* str_dict_index{}; // index for string dictionary + bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column + void** column_data_base{}; // base pointers of column data + void** column_string_base{}; // base pointers of column string data + Compression codec{}; // compressed codec enum + cuda::std::optional logical_type{}; // logical type int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) int32_t src_col_index{}; // my input column index @@ -456,6 +458,7 @@ struct ColumnChunkDesc { bool is_strings_to_cat{}; // convert strings to hashes bool is_large_string_col{}; // `true` if string data uses 64-bit offsets + int32_t src_file_idx{}; // source file index }; /** @@ -514,7 +517,6 @@ constexpr unsigned int kDictHashBits = 16; constexpr size_t kDictScratchSize = (1 << kDictHashBits) * sizeof(uint32_t); struct EncPage; -struct slot_type; // convert Encoding to a mask value constexpr uint32_t encoding_to_mask(Encoding encoding) @@ -560,7 +562,8 @@ struct EncColumnChunk { uint8_t is_compressed; //!< Nonzero if the chunk uses compression uint32_t dictionary_size; //!< Size of dictionary page including header uint32_t ck_stat_size; //!< Size of chunk-level statistics (included in 1st page header) - slot_type* dict_map_slots; //!< Hash map storage for calculating dict encoding for this chunk + uint32_t dict_map_offset; //!< Offset of the hash map storage for calculating dict encoding for + //!< this chunk size_type dict_map_size; //!< Size of dict_map_slots size_type num_dict_entries; //!< Total number of entries in dictionary size_type @@ -1001,46 +1004,6 @@ void InitFragmentStatistics(device_span groups, device_span fragments, rmm::cuda_stream_view stream); -/** - * @brief Initialize per-chunk hash maps used for dictionary with sentinel values - * - * @param chunks Flat span of chunks to initialize hash maps for - * @param stream CUDA stream to use - */ -void initialize_chunk_hash_maps(device_span chunks, rmm::cuda_stream_view stream); - -/** - * @brief Insert chunk values into their respective hash maps - * - * @param frags Column fragments - * @param stream CUDA stream to use - */ -void populate_chunk_hash_maps(cudf::detail::device_2dspan frags, - rmm::cuda_stream_view stream); - -/** - * @brief Compact dictionary hash map entries into chunk.dict_data - * - * @param chunks Flat span of chunks to compact hash maps for - * @param stream CUDA stream to use - */ -void collect_map_entries(device_span chunks, rmm::cuda_stream_view stream); - -/** - * @brief Get the Dictionary Indices for each row - * - * For each row of a chunk, gets the indices into chunk.dict_data which contains the value otherwise - * stored in input column [row]. Stores these indices into chunk.dict_index. - * - * Since dict_data itself contains indices into the original cudf column, this means that - * col[row] == col[dict_data[dict_index[row - chunk.start_row]]] - * - * @param frags Column fragments - * @param stream CUDA stream to use - */ -void get_dictionary_indices(cudf::detail::device_2dspan frags, - rmm::cuda_stream_view stream); - /** * @brief Launches kernel for initializing encoder data pages * diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp index 481c1e9fcdd..b90ca36c8c7 100644 --- a/cpp/src/io/parquet/predicate_pushdown.cpp +++ b/cpp/src/io/parquet/predicate_pushdown.cpp @@ -25,12 +25,10 @@ #include #include #include +#include #include #include -#include -#include - #include #include @@ -154,7 +152,7 @@ struct stats_caster { } void set_index(size_type index, - thrust::optional> const& binary_value, + cuda::std::optional> const& binary_value, Type const type) { if (binary_value.has_value()) { @@ -236,8 +234,8 @@ struct stats_caster { max.set_index(stats_idx, max_value, colchunk.meta_data.type); } else { // Marking it null, if column present in row group - min.set_index(stats_idx, thrust::nullopt, {}); - max.set_index(stats_idx, thrust::nullopt, {}); + min.set_index(stats_idx, cuda::std::nullopt, {}); + max.set_index(stats_idx, cuda::std::nullopt, {}); } stats_idx++; } @@ -399,7 +397,7 @@ std::optional>> aggregate_reader_metadata::fi std::reference_wrapper filter, rmm::cuda_stream_view stream) const { - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); // Create row group indices. std::vector> filtered_row_group_indices; std::vector> all_row_group_indices; @@ -468,7 +466,7 @@ std::optional>> aggregate_reader_metadata::fi auto validity_it = cudf::detail::make_counting_transform_iterator( 0, [bitmask = host_bitmask.data()](auto bit_index) { return bit_is_set(bitmask, bit_index); }); - auto is_row_group_required = cudf::detail::make_std_vector_sync( + auto const is_row_group_required = cudf::detail::make_host_vector_sync( device_span(predicate.data(), predicate.size()), stream); // Return only filtered row groups based on predicate diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp index 65dafb568c0..dd354b905f3 100644 --- a/cpp/src/io/parquet/reader.cpp +++ b/cpp/src/io/parquet/reader.cpp @@ -16,7 +16,7 @@ #include "reader_impl.hpp" -#include +#include namespace cudf::io::parquet::detail { diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index 68ec61ead0a..7d817bde7af 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -23,8 +23,7 @@ #include #include #include - -#include +#include #include #include @@ -39,7 +38,7 @@ namespace { // be treated as a string. Currently the only logical type that has special handling is DECIMAL. // Other valid types in the future would be UUID (still treated as string) and FLOAT16 (which // for now would also be treated as a string). -inline bool is_treat_fixed_length_as_string(thrust::optional const& logical_type) +inline bool is_treat_fixed_length_as_string(cuda::std::optional const& logical_type) { if (!logical_type.has_value()) { return true; } return logical_type->type != LogicalType::DECIMAL; @@ -470,8 +469,10 @@ reader::impl::impl(std::size_t chunk_read_limit, _input_pass_read_limit{pass_read_limit} { // Open and parse the source dataset metadata - _metadata = - std::make_unique(_sources, options.is_enabled_use_arrow_schema()); + _metadata = std::make_unique( + _sources, + options.is_enabled_use_arrow_schema(), + options.get_columns().has_value() and options.is_enabled_allow_mismatched_pq_schemas()); // Strings may be returned as either string or categorical columns _strings_to_categorical = options.is_enabled_convert_strings_to_categories(); @@ -704,7 +705,7 @@ table_with_metadata reader::impl::finalize_output(read_mode mode, auto predicate = cudf::detail::compute_column(*read_table, _expr_conv.get_converted_expr().value().get(), _stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_EXPECTS(predicate->view().type().id() == type_id::BOOL8, "Predicate filter should return a boolean"); // Exclude columns present in filter only in output @@ -769,11 +770,14 @@ parquet_column_schema walk_schema(aggregate_reader_metadata const* mt, int idx) parquet_metadata read_parquet_metadata(host_span const> sources) { - // do not use arrow schema when reading information from parquet metadata. + // Do not use arrow schema when reading information from parquet metadata. static constexpr auto use_arrow_schema = false; + // Do not select any columns when only reading the parquet metadata. + static constexpr auto has_column_projection = false; + // Open and parse the source dataset metadata - auto metadata = aggregate_reader_metadata(sources, use_arrow_schema); + auto metadata = aggregate_reader_metadata(sources, use_arrow_schema, has_column_projection); return parquet_metadata{parquet_schema{walk_schema(&metadata, 0)}, metadata.get_num_rows(), diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp index 5e3cc4301f9..2d46da14bec 100644 --- a/cpp/src/io/parquet/reader_impl.hpp +++ b/cpp/src/io/parquet/reader_impl.hpp @@ -28,11 +28,10 @@ #include #include #include +#include #include #include -#include -#include #include #include @@ -369,7 +368,7 @@ class reader::impl { size_t chunk_num_rows); rmm::cuda_stream_view _stream; - rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; + rmm::device_async_resource_ref _mr{cudf::get_current_device_resource_ref()}; // Reader configs. struct { diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu index 794750ab6d2..c588fedb85c 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.cu +++ b/cpp/src/io/parquet/reader_impl_chunking.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -77,9 +78,9 @@ void print_cumulative_page_info(device_span d_pages, device_span d_c_info, rmm::cuda_stream_view stream) { - std::vector pages = cudf::detail::make_std_vector_sync(d_pages, stream); - std::vector chunks = cudf::detail::make_std_vector_sync(d_chunks, stream); - std::vector c_info = cudf::detail::make_std_vector_sync(d_c_info, stream); + auto const pages = cudf::detail::make_host_vector_sync(d_pages, stream); + auto const chunks = cudf::detail::make_host_vector_sync(d_chunks, stream); + auto const c_info = cudf::detail::make_host_vector_sync(d_c_info, stream); printf("------------\nCumulative sizes by page\n"); @@ -370,11 +371,11 @@ int64_t find_next_split(int64_t cur_pos, * * @return A tuple of Parquet clock rate and Parquet decimal type. */ -[[nodiscard]] std::tuple> conversion_info( +[[nodiscard]] std::tuple> conversion_info( type_id column_type_id, type_id timestamp_type_id, Type physical, - thrust::optional logical_type) + cuda::std::optional logical_type) { int32_t const clock_rate = is_chrono(data_type{column_type_id}) ? to_clockrate(timestamp_type_id) : 0; @@ -385,7 +386,7 @@ int64_t find_next_split(int64_t cur_pos, // if decimal but not outputting as float or decimal, then convert to no logical type if (column_type_id != type_id::FLOAT64 and not cudf::is_fixed_point(data_type{column_type_id})) { - return std::make_tuple(clock_rate, thrust::nullopt); + return std::make_tuple(clock_rate, cuda::std::nullopt); } } @@ -441,7 +442,7 @@ adjust_cumulative_sizes(device_span c_info, { // sort by row count rmm::device_uvector c_info_sorted = - make_device_uvector_async(c_info, stream, rmm::mr::get_current_device_resource()); + make_device_uvector_async(c_info, stream, cudf::get_current_device_resource_ref()); thrust::sort( rmm::exec_policy_nosync(stream), c_info_sorted.begin(), c_info_sorted.end(), row_count_less{}); @@ -647,7 +648,7 @@ std::tuple, size_t, size_t> compute_next_subpass( auto [aggregated_info, page_keys_by_split] = adjust_cumulative_sizes(c_info, pages, stream); // bring back to the cpu - auto const h_aggregated_info = cudf::detail::make_std_vector_sync(aggregated_info, stream); + auto const h_aggregated_info = cudf::detail::make_host_vector_sync(aggregated_info, stream); // print_cumulative_row_info(h_aggregated_info, "adjusted"); // TODO: if the user has explicitly specified skip_rows/num_rows we could be more intelligent @@ -694,8 +695,7 @@ std::vector compute_page_splits_by_row(device_span h_aggregated_info = - cudf::detail::make_std_vector_sync(aggregated_info, stream); + auto const h_aggregated_info = cudf::detail::make_host_vector_sync(aggregated_info, stream); // print_cumulative_row_info(h_aggregated_info, "adjusted"); std::vector splits; @@ -847,9 +847,9 @@ std::vector compute_page_splits_by_row(device_span compute_page_splits_by_row(device_span compute_page_splits_by_row(device_span chunk // add to the cumulative_page_info data rmm::device_uvector d_temp_cost = cudf::detail::make_device_uvector_async( - temp_cost, stream, rmm::mr::get_current_device_resource()); + temp_cost, stream, cudf::get_current_device_resource_ref()); auto iter = thrust::make_counting_iterator(size_t{0}); thrust::for_each(rmm::exec_policy_nosync(stream), iter, @@ -1304,9 +1314,8 @@ void reader::impl::setup_next_pass(read_mode mode) printf("\tskip_rows: %'lu\n", pass.skip_rows); printf("\tnum_rows: %'lu\n", pass.num_rows); printf("\tbase mem usage: %'lu\n", pass.base_mem_size); - auto const num_columns = _input_columns.size(); - std::vector h_page_offsets = - cudf::detail::make_std_vector_sync(pass.page_offsets, _stream); + auto const num_columns = _input_columns.size(); + auto const h_page_offsets = cudf::detail::make_host_vector_sync(pass.page_offsets, _stream); for (size_t c_idx = 0; c_idx < num_columns; c_idx++) { printf("\t\tColumn %'lu: num_pages(%'d)\n", c_idx, @@ -1348,7 +1357,7 @@ void reader::impl::setup_next_subpass(read_mode mode) [&]() -> std::tuple, size_t, size_t> { if (!pass.has_compressed_data || _input_pass_read_limit == 0) { rmm::device_uvector page_indices( - num_columns, _stream, rmm::mr::get_current_device_resource()); + num_columns, _stream, cudf::get_current_device_resource_ref()); auto iter = thrust::make_counting_iterator(0); thrust::transform(rmm::exec_policy_nosync(_stream), iter, @@ -1426,7 +1435,7 @@ void reader::impl::setup_next_subpass(read_mode mode) subpass.pages = subpass.page_buf; } - std::vector h_spans = cudf::detail::make_std_vector_async(page_indices, _stream); + auto const h_spans = cudf::detail::make_host_vector_async(page_indices, _stream); subpass.pages.device_to_host_async(_stream); _stream.synchronize(); @@ -1464,7 +1473,7 @@ void reader::impl::setup_next_subpass(read_mode mode) printf("\t\tTotal expected usage: %'lu\n", total_expected_size == 0 ? subpass.decomp_page_data.size() + pass.base_mem_size : total_expected_size + pass.base_mem_size); - std::vector h_page_indices = cudf::detail::make_std_vector_sync(page_indices, _stream); + auto const h_page_indices = cudf::detail::make_host_vector_sync(page_indices, _stream); for (size_t c_idx = 0; c_idx < num_columns; c_idx++) { printf("\t\tColumn %'lu: pages(%'lu - %'lu)\n", c_idx, @@ -1502,10 +1511,13 @@ void reader::impl::create_global_chunk_info() std::transform( _input_columns.begin(), _input_columns.end(), column_mapping.begin(), [&](auto const& col) { // translate schema_idx into something we can use for the page indexes - if (auto it = std::find_if( - columns.begin(), - columns.end(), - [&col](auto const& col_chunk) { return col_chunk.schema_idx == col.schema_idx; }); + if (auto it = std::find_if(columns.begin(), + columns.end(), + [&](auto const& col_chunk) { + return col_chunk.schema_idx == + _metadata->map_schema_index(col.schema_idx, + rg.source_index); + }); it != columns.end()) { return std::distance(columns.begin(), it); } @@ -1526,7 +1538,8 @@ void reader::impl::create_global_chunk_info() auto col = _input_columns[i]; // look up metadata auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx); - auto& schema = _metadata->get_schema(col.schema_idx); + auto& schema = _metadata->get_schema( + _metadata->map_schema_index(col.schema_idx, rg.source_index), rg.source_index); auto [clock_rate, logical_type] = conversion_info(to_type_id(schema, _strings_to_categorical, _options.timestamp_type.id()), @@ -1565,9 +1578,9 @@ void reader::impl::create_global_chunk_info() col.schema_idx, chunk_info, list_bytes_per_row_est, - schema.type == BYTE_ARRAY and _strings_to_categorical)); + schema.type == BYTE_ARRAY and _strings_to_categorical, + rg.source_index)); } - // Adjust for skip_rows when updating the remaining rows after the first group remaining_rows -= (skip_rows) ? std::min(rg.start_row + row_group.num_rows - skip_rows, remaining_rows) diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp index 581c44d024b..6d566b5815e 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.cpp +++ b/cpp/src/io/parquet/reader_impl_helpers.cpp @@ -38,7 +38,7 @@ namespace flatbuf = cudf::io::parquet::flatbuf; namespace { -thrust::optional converted_to_logical_type(SchemaElement const& schema) +cuda::std::optional converted_to_logical_type(SchemaElement const& schema) { if (schema.converted_type.has_value()) { switch (schema.converted_type.value()) { @@ -66,7 +66,7 @@ thrust::optional converted_to_logical_type(SchemaElement const& sch default: return LogicalType{LogicalType::UNDEFINED}; } } - return thrust::nullopt; + return cuda::std::nullopt; } } // namespace @@ -246,7 +246,7 @@ void metadata::sanitize_schema() struct_elem.repetition_type = REQUIRED; struct_elem.num_children = schema_elem.num_children; struct_elem.type = UNDEFINED_TYPE; - struct_elem.converted_type = thrust::nullopt; + struct_elem.converted_type = cuda::std::nullopt; // swap children struct_elem.children_idx = std::move(schema_elem.children_idx); @@ -380,6 +380,17 @@ aggregate_reader_metadata::collect_keyval_metadata() const return kv_maps; } +std::vector> aggregate_reader_metadata::init_schema_idx_maps( + bool const has_cols_from_mismatched_srcs) const +{ + // Only initialize if more than 1 data sources and has select columns from mismatched data sources + if (has_cols_from_mismatched_srcs and per_file_metadata.size() > 1) { + return std::vector>{per_file_metadata.size() - 1}; + } + + return {}; +} + int64_t aggregate_reader_metadata::calc_num_rows() const { return std::accumulate( @@ -412,8 +423,13 @@ void aggregate_reader_metadata::column_info_for_row_group(row_group_info& rg_inf std::vector chunks(rg.columns.size()); for (size_t col_idx = 0; col_idx < rg.columns.size(); col_idx++) { - auto const& col_chunk = rg.columns[col_idx]; - auto& schema = get_schema(col_chunk.schema_idx); + auto const& col_chunk = rg.columns[col_idx]; + auto const is_schema_idx_mapped = + is_schema_index_mapped(col_chunk.schema_idx, rg_info.source_index); + auto const mapped_schema_idx = is_schema_idx_mapped + ? map_schema_index(col_chunk.schema_idx, rg_info.source_index) + : col_chunk.schema_idx; + auto& schema = get_schema(mapped_schema_idx, is_schema_idx_mapped ? rg_info.source_index : 0); auto const max_def_level = schema.max_definition_level; auto const max_rep_level = schema.max_repetition_level; @@ -539,26 +555,49 @@ void aggregate_reader_metadata::column_info_for_row_group(row_group_info& rg_inf } aggregate_reader_metadata::aggregate_reader_metadata( - host_span const> sources, bool use_arrow_schema) + host_span const> sources, + bool use_arrow_schema, + bool has_cols_from_mismatched_srcs) : per_file_metadata(metadatas_from_sources(sources)), keyval_maps(collect_keyval_metadata()), + schema_idx_maps(init_schema_idx_maps(has_cols_from_mismatched_srcs)), num_rows(calc_num_rows()), num_row_groups(calc_num_row_groups()) { - if (per_file_metadata.size() > 0) { - auto const& first_meta = per_file_metadata.front(); + if (per_file_metadata.size() > 1) { + auto& first_meta = per_file_metadata.front(); auto const num_cols = first_meta.row_groups.size() > 0 ? first_meta.row_groups.front().columns.size() : 0; - auto const& schema = first_meta.schema; - - // Verify that the input files have matching numbers of columns and schema. - for (auto const& pfm : per_file_metadata) { - if (pfm.row_groups.size() > 0) { - CUDF_EXPECTS(num_cols == pfm.row_groups.front().columns.size(), - "All sources must have the same number of columns"); + auto& schema = first_meta.schema; + + // Validate that all sources have the same schema unless we are reading select columns + // from mismatched sources, in which case, we will only check the projected columns later. + if (not has_cols_from_mismatched_srcs) { + // Verify that the input files have matching numbers of columns and schema. + for (auto const& pfm : per_file_metadata) { + if (pfm.row_groups.size() > 0) { + CUDF_EXPECTS(num_cols == pfm.row_groups.front().columns.size(), + "All sources must have the same number of columns"); + } + CUDF_EXPECTS(schema == pfm.schema, "All sources must have the same schema"); } - CUDF_EXPECTS(schema == pfm.schema, "All sources must have the same schema"); } + + // Mark the column schema in the first (default) source as nullable if it is nullable in any of + // the input sources. This avoids recomputing this within build_column() and + // populate_metadata(). + std::for_each( + thrust::make_counting_iterator(static_cast(1)), + thrust::make_counting_iterator(schema.size()), + [&](auto const schema_idx) { + if (schema[schema_idx].repetition_type == REQUIRED and + std::any_of( + per_file_metadata.begin() + 1, per_file_metadata.end(), [&](auto const& pfm) { + return pfm.schema[schema_idx].repetition_type != REQUIRED; + })) { + schema[schema_idx].repetition_type = OPTIONAL; + } + }); } // Collect and apply arrow:schema from Parquet's key value metadata section @@ -632,7 +671,7 @@ arrow_schema_data_types aggregate_reader_metadata::collect_arrow_schema() const if (field->type_type() == flatbuf::Type::Type_Duration) { auto type_data = field->type_as_Duration(); if (type_data != nullptr) { - auto name = (field->name()) ? field->name()->str() : ""; + auto name = field->name() ? field->name()->str() : ""; // set the schema_elem type to duration type schema_elem.type = duration_from_flatbuffer(type_data); arrow_type_col_seen |= (schema_elem.type.id() != type_id::EMPTY); @@ -868,12 +907,16 @@ ColumnChunkMetaData const& aggregate_reader_metadata::get_column_metadata(size_t size_type src_idx, int schema_idx) const { + // Map schema index to the provided source file index + schema_idx = map_schema_index(schema_idx, src_idx); + auto col = std::find_if(per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(), per_file_metadata[src_idx].row_groups[row_group_index].columns.end(), [schema_idx](ColumnChunk const& col) { return col.schema_idx == schema_idx; }); CUDF_EXPECTS(col != std::end(per_file_metadata[src_idx].row_groups[row_group_index].columns), - "Found no metadata for schema index"); + "Found no metadata for schema index", + std::range_error); return col->meta_data; } @@ -897,6 +940,46 @@ aggregate_reader_metadata::get_rowgroup_metadata() const return rg_metadata; } +bool aggregate_reader_metadata::is_schema_index_mapped(int schema_idx, int pfm_idx) const +{ + // Check if schema_idx or pfm_idx is invalid + CUDF_EXPECTS( + schema_idx >= 0 and pfm_idx >= 0 and pfm_idx < static_cast(per_file_metadata.size()), + "Parquet reader encountered an invalid schema_idx or pfm_idx", + std::out_of_range); + + // True if root index requested or zeroth file index or schema_idx maps doesn't exist. (i.e. + // schemas are identical). + if (schema_idx == 0 or pfm_idx == 0 or schema_idx_maps.empty()) { return true; } + + // Check if mapped + auto const& schema_idx_map = schema_idx_maps[pfm_idx - 1]; + return schema_idx_map.find(schema_idx) != schema_idx_map.end(); +} + +int aggregate_reader_metadata::map_schema_index(int schema_idx, int pfm_idx) const +{ + // Check if schema_idx or pfm_idx is invalid + CUDF_EXPECTS( + schema_idx >= 0 and pfm_idx >= 0 and pfm_idx < static_cast(per_file_metadata.size()), + "Parquet reader encountered an invalid schema_idx or pfm_idx", + std::out_of_range); + + // Check if pfm_idx is zero or root index requested or schema_idx_maps doesn't exist (i.e. + // schemas are identical). + if (schema_idx == 0 or pfm_idx == 0 or schema_idx_maps.empty()) { return schema_idx; } + + // schema_idx_maps will only have > 0 size when we are reading matching column projection from + // mismatched Parquet sources. + auto const& schema_idx_map = schema_idx_maps[pfm_idx - 1]; + CUDF_EXPECTS(schema_idx_map.find(schema_idx) != schema_idx_map.end(), + "Unmapped schema index encountered in the specified source tree", + std::out_of_range); + + // Return the mapped schema idx. + return schema_idx_map.at(schema_idx); +} + std::string aggregate_reader_metadata::get_pandas_index() const { // Assumes that all input files have the same metadata @@ -1041,18 +1124,19 @@ aggregate_reader_metadata::select_columns( std::optional> const& filter_columns_names, bool include_index, bool strings_to_categorical, - type_id timestamp_type_id) const + type_id timestamp_type_id) { - auto find_schema_child = [&](SchemaElement const& schema_elem, std::string const& name) { - auto const& col_schema_idx = - std::find_if(schema_elem.children_idx.cbegin(), - schema_elem.children_idx.cend(), - [&](size_t col_schema_idx) { return get_schema(col_schema_idx).name == name; }); - - return (col_schema_idx != schema_elem.children_idx.end()) - ? static_cast(*col_schema_idx) - : -1; - }; + auto const find_schema_child = + [&](SchemaElement const& schema_elem, std::string const& name, int const pfm_idx = 0) { + auto const& col_schema_idx = std::find_if( + schema_elem.children_idx.cbegin(), + schema_elem.children_idx.cend(), + [&](size_t col_schema_idx) { return get_schema(col_schema_idx, pfm_idx).name == name; }); + + return (col_schema_idx != schema_elem.children_idx.end()) + ? static_cast(*col_schema_idx) + : -1; + }; std::vector output_columns; std::vector input_columns; @@ -1074,7 +1158,7 @@ aggregate_reader_metadata::select_columns( if (schema_elem.is_stub()) { // is this legit? CUDF_EXPECTS(schema_elem.num_children == 1, "Unexpected number of children for stub"); - auto child_col_name_info = (col_name_info) ? &col_name_info->children[0] : nullptr; + auto const child_col_name_info = col_name_info ? &col_name_info->children[0] : nullptr; return build_column( child_col_name_info, schema_elem.children_idx[0], out_col_array, has_list_parent); } @@ -1154,6 +1238,93 @@ aggregate_reader_metadata::select_columns( return path_is_valid; }; + // Compares two schema elements to be equal except their number of children + auto const equal_to_except_num_children = [](SchemaElement const& lhs, SchemaElement const& rhs) { + return lhs.type == rhs.type and lhs.converted_type == rhs.converted_type and + lhs.type_length == rhs.type_length and lhs.name == rhs.name and + lhs.decimal_scale == rhs.decimal_scale and + lhs.decimal_precision == rhs.decimal_precision and lhs.field_id == rhs.field_id; + }; + + // Maps a projected column's schema_idx in the zeroth per_file_metadata (source) to the + // corresponding schema_idx in pfm_idx'th per_file_metadata (destination). The projected + // column's path must match across sources, else an appropriate exception is thrown. + std::function map_column = + [&](column_name_info const* col_name_info, + int const src_schema_idx, + int const dst_schema_idx, + int const pfm_idx) { + auto const& src_schema_elem = get_schema(src_schema_idx); + auto const& dst_schema_elem = get_schema(dst_schema_idx, pfm_idx); + + // Check the schema elements to be equal except their number of children as we only care about + // the specific column paths in the schema trees. Raise an invalid_argument error if the + // schema elements don't match. + CUDF_EXPECTS(equal_to_except_num_children(src_schema_elem, dst_schema_elem), + "Encountered mismatching SchemaElement properties for a column in " + "the selected path", + std::invalid_argument); + + // Get the schema_idx_map for this data source (pfm) + auto& schema_idx_map = schema_idx_maps[pfm_idx - 1]; + // Map the schema index from 0th tree (src) to the one in the current (dst) tree. + schema_idx_map[src_schema_idx] = dst_schema_idx; + + // If src_schema_elem is a stub, it does not exist in the column_name_info and column_buffer + // hierarchy. So continue on with mapping. + if (src_schema_elem.is_stub()) { + // Check if dst_schema_elem is also a stub i.e. has num_children == 1 that we didn't + // previously check. Raise an invalid_argument error if dst_schema_elem is not a stub. + CUDF_EXPECTS(dst_schema_elem.is_stub(), + "Encountered mismatching schemas for stub.", + std::invalid_argument); + auto const child_col_name_info = col_name_info ? &col_name_info->children[0] : nullptr; + return map_column(child_col_name_info, + src_schema_elem.children_idx[0], + dst_schema_elem.children_idx[0], + pfm_idx); + } + + // The path ends here. If this is a list/struct col (has children), then map all its children + // which must be identical. + if (col_name_info == nullptr or col_name_info->children.empty()) { + // Check the number of children to be equal to be mapped. An out_of_range error if the + // number of children isn't equal. + CUDF_EXPECTS(src_schema_elem.num_children == dst_schema_elem.num_children, + "Encountered mismatching number of children for a " + "column in the selected path", + std::out_of_range); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(src_schema_elem.num_children), + [&](auto const child_idx) { + map_column(nullptr, + src_schema_elem.children_idx[child_idx], + dst_schema_elem.children_idx[child_idx], + pfm_idx); + }); + } + // The path goes further down to specific child(ren) of this column so map only those + // children. + else { + std::for_each( + col_name_info->children.cbegin(), + col_name_info->children.cend(), + [&](auto const& child_col_name_info) { + // Ensure that each named child column exists in the destination schema tree for the + // paths to align up. An out_of_range error otherwise. + CUDF_EXPECTS( + find_schema_child(dst_schema_elem, child_col_name_info.name, pfm_idx) != -1, + "Encountered mismatching schema tree depths across data sources", + std::out_of_range); + map_column(&child_col_name_info, + find_schema_child(src_schema_elem, child_col_name_info.name), + find_schema_child(dst_schema_elem, child_col_name_info.name, pfm_idx), + pfm_idx); + }); + } + }; + std::vector output_column_schemas; // @@ -1287,7 +1458,28 @@ aggregate_reader_metadata::select_columns( for (auto& col : selected_columns) { auto const& top_level_col_schema_idx = find_schema_child(root, col.name); bool valid_column = build_column(&col, top_level_col_schema_idx, output_columns, false); - if (valid_column) output_column_schemas.push_back(top_level_col_schema_idx); + if (valid_column) { + output_column_schemas.push_back(top_level_col_schema_idx); + + // Map the column's schema_idx across the rest of the data sources if required. + if (per_file_metadata.size() > 1 and not schema_idx_maps.empty()) { + std::for_each(thrust::make_counting_iterator(static_cast(1)), + thrust::make_counting_iterator(per_file_metadata.size()), + [&](auto const pfm_idx) { + auto const& dst_root = get_schema(0, pfm_idx); + // Ensure that each top level column exists in the destination schema + // tree. An out_of_range error is thrown otherwise. + CUDF_EXPECTS( + find_schema_child(dst_root, col.name, pfm_idx) != -1, + "Encountered mismatching schema tree depths across data sources", + std::out_of_range); + map_column(&col, + top_level_col_schema_idx, + find_schema_child(dst_root, col.name, pfm_idx), + pfm_idx); + }); + } + } } } diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp index 309132a5347..6487c92f48f 100644 --- a/cpp/src/io/parquet/reader_impl_helpers.hpp +++ b/cpp/src/io/parquet/reader_impl_helpers.hpp @@ -128,6 +128,7 @@ struct arrow_schema_data_types { class aggregate_reader_metadata { std::vector per_file_metadata; std::vector> keyval_maps; + std::vector> schema_idx_maps; int64_t num_rows; size_type num_row_groups; @@ -144,6 +145,19 @@ class aggregate_reader_metadata { [[nodiscard]] std::vector> collect_keyval_metadata() const; + /** + * @brief Initialize the vector of schema_idx maps. + * + * Initializes a vector of hash maps that will store the one-to-one mappings between the + * schema_idx'es of the selected columns in the zeroth per_file_metadata (source) and each + * kth per_file_metadata (destination) for k in range: [1, per_file_metadata.size()-1]. + * + * @param has_cols_from_mismatched_srcs True if we are reading select cols from mismatched + * parquet schemas. + */ + [[nodiscard]] std::vector> init_schema_idx_maps( + bool has_cols_from_mismatched_srcs) const; + /** * @brief Decodes and constructs the arrow schema from the ARROW_SCHEMA_KEY IPC message * in key value metadata section of Parquet file footer @@ -183,10 +197,28 @@ class aggregate_reader_metadata { public: aggregate_reader_metadata(host_span const> sources, - bool use_arrow_schema); + bool use_arrow_schema, + bool has_cols_from_mismatched_srcs); [[nodiscard]] RowGroup const& get_row_group(size_type row_group_index, size_type src_idx) const; + /** + * @brief Extracts the schema_idx'th column chunk metadata from row_group_index'th row group of + * the src_idx'th file. + * + * Extracts the schema_idx'th column chunk metadata from the specified row group index of the + * src_idx'th file. Note that the schema_idx is actually the index in the zeroth file which may + * not be the same in all files, in which case, the schema_idx is mapped to the corresponding + * index in the src_idx'th file and returned. A range_error error is thrown if schema_idx + * doesn't exist or isn't mapped to the src_idx file. + * + * @param row_group_index The row group index in the file to extract column chunk metadata from. + * @param src_idx The per_file_metadata index to extract extract column chunk metadata from. + * @param schema_idx The schema_idx of the column chunk to be extracted + * + * @return The requested column chunk metadata or a range_error error if the schema index isn't + * valid. + */ [[nodiscard]] ColumnChunkMetaData const& get_column_metadata(size_type row_group_index, size_type src_idx, int schema_idx) const; @@ -202,16 +234,52 @@ class aggregate_reader_metadata { [[nodiscard]] auto get_num_row_groups() const { return num_row_groups; } - [[nodiscard]] auto const& get_schema(int schema_idx) const + /** + * @brief Checks if a schema index from 0th source is mapped to the specified file index + * + * @param schema_idx The index of the SchemaElement in the zeroth file. + * @param pfm_idx The index of the file (per_file_metadata) to check mappings for. + * + * @return True if schema index is mapped + */ + [[nodiscard]] bool is_schema_index_mapped(int schema_idx, int pfm_idx) const; + + /** + * @brief Maps schema index from 0th source file to the specified file index + * + * @param schema_idx The index of the SchemaElement in the zeroth file. + * @param pfm_idx The index of the file (per_file_metadata) to map the schema_idx to. + * + * @return Mapped schema index + */ + [[nodiscard]] int map_schema_index(int schema_idx, int pfm_idx) const; + + /** + * @brief Extracts the schema_idx'th SchemaElement from the pfm_idx'th file + * + * @param schema_idx The index of the SchemaElement to be extracted. + * @param pfm_idx The index of the per_file_metadata to extract SchemaElement from, default = 0 if + * not specified. + * + * @return The requested SchemaElement or an error if invalid schema_idx or pfm_idx. + */ + [[nodiscard]] auto const& get_schema(int schema_idx, int pfm_idx = 0) const { - return per_file_metadata[0].schema[schema_idx]; + CUDF_EXPECTS( + schema_idx >= 0 and pfm_idx >= 0 and pfm_idx < static_cast(per_file_metadata.size()), + "Parquet reader encountered an invalid schema_idx or pfm_idx", + std::out_of_range); + return per_file_metadata[pfm_idx].schema[schema_idx]; } [[nodiscard]] auto const& get_key_value_metadata() const& { return keyval_maps; } [[nodiscard]] auto&& get_key_value_metadata() && { return std::move(keyval_maps); } /** - * @brief Gets the concrete nesting depth of output cudf columns + * @brief Gets the concrete nesting depth of output cudf columns. + * + * Gets the nesting depth of the output cudf column for the given schema. + * The nesting depth must be equal for the given schema_index across all sources. * * @param schema_index Schema index of the input column * @@ -314,7 +382,7 @@ class aggregate_reader_metadata { std::optional> const& filter_columns_names, bool include_index, bool strings_to_categorical, - type_id timestamp_type_id) const; + type_id timestamp_type_id); }; /** diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index e006cc7d714..8e67f233213 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include @@ -77,23 +79,30 @@ void print_pages(cudf::detail::hostdevice_vector& pages, rmm::cuda_str * is indicated when adding new values. This function generates the mappings of * the R/D levels to those start/end bounds * - * @param remap Maps column schema index to the R/D remapping vectors for that column - * @param src_col_schema The column schema to generate the new mapping for + * @param remap Maps column schema index to the R/D remapping vectors for that column for a + * particular input source file + * @param src_col_schema The source column schema to generate the new mapping for + * @param mapped_src_col_schema Mapped column schema for src_file_idx'th file + * @param src_file_idx The input source file index for the column schema * @param md File metadata information */ -void generate_depth_remappings(std::map, std::vector>>& remap, - int src_col_schema, - aggregate_reader_metadata const& md) +void generate_depth_remappings( + std::map, std::pair, std::vector>>& remap, + int const src_col_schema, + int const mapped_src_col_schema, + int const src_file_idx, + aggregate_reader_metadata const& md) { // already generated for this level - if (remap.find(src_col_schema) != remap.end()) { return; } - auto schema = md.get_schema(src_col_schema); - int max_depth = md.get_output_nesting_depth(src_col_schema); + if (remap.find({src_col_schema, src_file_idx}) != remap.end()) { return; } + auto const& schema = md.get_schema(mapped_src_col_schema, src_file_idx); + auto const max_depth = md.get_output_nesting_depth(src_col_schema); - CUDF_EXPECTS(remap.find(src_col_schema) == remap.end(), + CUDF_EXPECTS(remap.find({src_col_schema, src_file_idx}) == remap.end(), "Attempting to remap a schema more than once"); auto inserted = - remap.insert(std::pair, std::vector>>{src_col_schema, {}}); + remap.insert(std::pair, std::pair, std::vector>>{ + {src_col_schema, src_file_idx}, {}}); auto& depth_remap = inserted.first->second; std::vector& rep_depth_remap = (depth_remap.first); @@ -134,15 +143,15 @@ void generate_depth_remappings(std::map, std::ve auto find_shallowest = [&](int r) { int shallowest = -1; int cur_depth = max_depth - 1; - int schema_idx = src_col_schema; + int schema_idx = mapped_src_col_schema; while (schema_idx > 0) { - auto cur_schema = md.get_schema(schema_idx); + auto& cur_schema = md.get_schema(schema_idx, src_file_idx); if (cur_schema.max_repetition_level == r) { // if this is a repeated field, map it one level deeper shallowest = cur_schema.is_stub() ? cur_depth + 1 : cur_depth; } // if it's one-level encoding list - else if (cur_schema.is_one_level_list(md.get_schema(cur_schema.parent_idx))) { + else if (cur_schema.is_one_level_list(md.get_schema(cur_schema.parent_idx, src_file_idx))) { shallowest = cur_depth - 1; } if (!cur_schema.is_stub()) { cur_depth--; } @@ -157,10 +166,10 @@ void generate_depth_remappings(std::map, std::ve for (int s_idx = schema.max_definition_level; s_idx >= 0; s_idx--) { auto find_deepest = [&](int d) { SchemaElement prev_schema; - int schema_idx = src_col_schema; + int schema_idx = mapped_src_col_schema; int r1 = 0; while (schema_idx > 0) { - SchemaElement cur_schema = md.get_schema(schema_idx); + SchemaElement cur_schema = md.get_schema(schema_idx, src_file_idx); if (cur_schema.max_definition_level == d) { // if this is a repeated field, map it one level deeper r1 = cur_schema.is_stub() ? prev_schema.max_repetition_level @@ -173,10 +182,10 @@ void generate_depth_remappings(std::map, std::ve // we now know R1 from above. return the deepest nesting level that has the // same repetition level - schema_idx = src_col_schema; + schema_idx = mapped_src_col_schema; int depth = max_depth - 1; while (schema_idx > 0) { - SchemaElement cur_schema = md.get_schema(schema_idx); + SchemaElement cur_schema = md.get_schema(schema_idx, src_file_idx); if (cur_schema.max_repetition_level == r1) { // if this is a repeated field, map it one level deeper depth = cur_schema.is_stub() ? depth + 1 : depth; @@ -391,7 +400,7 @@ void fill_in_page_info(host_span chunks, } auto d_page_indexes = cudf::detail::make_device_uvector_async( - page_indexes, stream, rmm::mr::get_current_device_resource()); + page_indexes, stream, cudf::get_current_device_resource_ref()); auto iter = thrust::make_counting_iterator(0); thrust::for_each( @@ -753,7 +762,7 @@ void reader::impl::build_string_dict_indices() // allocate and distribute pointers pass.str_dict_index = cudf::detail::make_zeroed_device_uvector_async( - total_str_dict_indexes, _stream, rmm::mr::get_current_device_resource()); + total_str_dict_indexes, _stream, cudf::get_current_device_resource_ref()); auto iter = thrust::make_counting_iterator(0); thrust::for_each( @@ -781,9 +790,20 @@ void reader::impl::allocate_nesting_info() std::vector per_page_nesting_info_size(num_columns); auto iter = thrust::make_counting_iterator(size_type{0}); std::transform(iter, iter + num_columns, per_page_nesting_info_size.begin(), [&](size_type i) { + // Schema index of the current input column auto const schema_idx = _input_columns[i].schema_idx; - auto const& schema = _metadata->get_schema(schema_idx); - return max(schema.max_definition_level + 1, _metadata->get_output_nesting_depth(schema_idx)); + // Get the max_definition_level of this column across all sources. + auto max_definition_level = _metadata->get_schema(schema_idx).max_definition_level + 1; + std::for_each(thrust::make_counting_iterator(static_cast(1)), + thrust::make_counting_iterator(_sources.size()), + [&](auto const src_file_idx) { + auto const& schema = _metadata->get_schema( + _metadata->map_schema_index(schema_idx, src_file_idx), src_file_idx); + max_definition_level = + std::max(max_definition_level, schema.max_definition_level + 1); + }); + + return std::max(max_definition_level, _metadata->get_output_nesting_depth(schema_idx)); }); // compute total # of page_nesting infos needed and allocate space. doing this in one @@ -811,6 +831,8 @@ void reader::impl::allocate_nesting_info() page_nesting_decode_info.device_ptr() + src_info_index; pages[target_page_index + p_idx].nesting_info_size = per_page_nesting_info_size[idx]; + // Set the number of output nesting levels from the zeroth source as nesting must be + // identical across sources. pages[target_page_index + p_idx].num_output_nesting_levels = _metadata->get_output_nesting_depth(src_col_schema); @@ -819,25 +841,36 @@ void reader::impl::allocate_nesting_info() target_page_index += subpass.column_page_count[idx]; } + // Reset the target_page_index + target_page_index = 0; + // fill in int nesting_info_index = 0; - std::map, std::vector>> depth_remapping; for (size_t idx = 0; idx < _input_columns.size(); idx++) { auto const src_col_schema = _input_columns[idx].schema_idx; - // schema of the input column - auto& schema = _metadata->get_schema(src_col_schema); // real depth of the output cudf column hierarchy (1 == no nesting, 2 == 1 level, etc) + // nesting depth must be same across sources so getting it from the zeroth source is ok int const max_output_depth = _metadata->get_output_nesting_depth(src_col_schema); + // Map to store depths if this column has lists + std::map, std::pair, std::vector>> depth_remapping; // if this column has lists, generate depth remapping - std::map, std::vector>> depth_remapping; - if (schema.max_repetition_level > 0) { - generate_depth_remappings(depth_remapping, src_col_schema, *_metadata); - } + std::for_each( + thrust::make_counting_iterator(static_cast(0)), + thrust::make_counting_iterator(_sources.size()), + [&](auto const src_file_idx) { + auto const mapped_schema_idx = _metadata->map_schema_index(src_col_schema, src_file_idx); + if (_metadata->get_schema(mapped_schema_idx, src_file_idx).max_repetition_level > 0) { + generate_depth_remappings( + depth_remapping, src_col_schema, mapped_schema_idx, src_file_idx, *_metadata); + } + }); // fill in host-side nesting info - int schema_idx = src_col_schema; + int schema_idx = src_col_schema; + // This is okay as we only use this to check stubness of cur_schema and + // to get its parent's indices, both of which are one to one mapped. auto cur_schema = _metadata->get_schema(schema_idx); int cur_depth = max_output_depth - 1; while (schema_idx > 0) { @@ -846,6 +879,9 @@ void reader::impl::allocate_nesting_info() if (!cur_schema.is_stub()) { // initialize each page within the chunk for (size_t p_idx = 0; p_idx < subpass.column_page_count[idx]; p_idx++) { + // Source file index for the current page. + auto const src_file_idx = + pass.chunks[pages[target_page_index + p_idx].chunk_idx].src_file_idx; PageNestingInfo* pni = &page_nesting_info[nesting_info_index + (p_idx * per_page_nesting_info_size[idx])]; @@ -853,9 +889,11 @@ void reader::impl::allocate_nesting_info() &page_nesting_decode_info[nesting_info_index + (p_idx * per_page_nesting_info_size[idx])]; + auto const mapped_src_col_schema = + _metadata->map_schema_index(src_col_schema, src_file_idx); // if we have lists, set our start and end depth remappings - if (schema.max_repetition_level > 0) { - auto remap = depth_remapping.find(src_col_schema); + if (_metadata->get_schema(mapped_src_col_schema, src_file_idx).max_repetition_level > 0) { + auto remap = depth_remapping.find({src_col_schema, src_file_idx}); CUDF_EXPECTS(remap != depth_remapping.end(), "Could not find depth remapping for schema"); std::vector const& rep_depth_remap = (remap->second.first); @@ -869,11 +907,15 @@ void reader::impl::allocate_nesting_info() } } + // Get the schema from the current input source. + auto& actual_cur_schema = _metadata->get_schema( + _metadata->map_schema_index(schema_idx, src_file_idx), src_file_idx); + // values indexed by output column index - nesting_info[cur_depth].max_def_level = cur_schema.max_definition_level; + nesting_info[cur_depth].max_def_level = actual_cur_schema.max_definition_level; pni[cur_depth].size = 0; pni[cur_depth].type = - to_type_id(cur_schema, _strings_to_categorical, _options.timestamp_type.id()); + to_type_id(actual_cur_schema, _strings_to_categorical, _options.timestamp_type.id()); pni[cur_depth].nullable = cur_schema.repetition_type == OPTIONAL; } @@ -886,6 +928,8 @@ void reader::impl::allocate_nesting_info() cur_schema = _metadata->get_schema(schema_idx); } + // Offset the page and nesting info indices + target_page_index += subpass.column_page_count[idx]; nesting_info_index += (per_page_nesting_info_size[idx] * subpass.column_page_count[idx]); } @@ -906,7 +950,7 @@ void reader::impl::allocate_level_decode_space() size_t const per_page_decode_buf_size = LEVEL_DECODE_BUF_SIZE * 2 * pass.level_type_size; auto const decode_buf_size = per_page_decode_buf_size * pages.size(); subpass.level_decode_data = - rmm::device_buffer(decode_buf_size, _stream, rmm::mr::get_current_device_resource()); + rmm::device_buffer(decode_buf_size, _stream, cudf::get_current_device_resource_ref()); // distribute the buffers uint8_t* buf = static_cast(subpass.level_decode_data.data()); @@ -1494,6 +1538,11 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num // buffers if they are not part of a list hierarchy. mark down // if we have any list columns that need further processing. bool has_lists = false; + // Casting to std::byte since data buffer pointer is void * + std::vector> memset_bufs; + // Validity Buffer is a uint32_t pointer + std::vector> nullmask_bufs; + for (size_t idx = 0; idx < _input_columns.size(); idx++) { auto const& input_col = _input_columns[idx]; size_t const max_depth = input_col.nesting_depth(); @@ -1514,13 +1563,19 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num // we're going to start null mask as all valid and then turn bits off if necessary out_buf.create_with_mask( out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows, - cudf::mask_state::ALL_VALID, + cudf::mask_state::UNINITIALIZED, + false, _stream, _mr); + memset_bufs.push_back(cudf::device_span(static_cast(out_buf.data()), + out_buf.data_size())); + nullmask_bufs.push_back(cudf::device_span( + out_buf.null_mask(), + cudf::util::round_up_safe(out_buf.null_mask_size(), sizeof(cudf::bitmask_type)) / + sizeof(cudf::bitmask_type))); } } } - // compute output column sizes by examining the pages of the -input- columns if (has_lists) { auto h_cols_info = @@ -1539,7 +1594,7 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num .nesting_depth; auto const d_cols_info = cudf::detail::make_device_uvector_async( - h_cols_info, _stream, rmm::mr::get_current_device_resource()); + h_cols_info, _stream, cudf::get_current_device_resource_ref()); auto const num_keys = _input_columns.size() * max_depth * subpass.pages.size(); // size iterator. indexes pages by sorted order @@ -1593,11 +1648,22 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num // allocate // we're going to start null mask as all valid and then turn bits off if necessary - out_buf.create_with_mask(size, cudf::mask_state::ALL_VALID, _stream, _mr); + out_buf.create_with_mask(size, cudf::mask_state::UNINITIALIZED, false, _stream, _mr); + memset_bufs.push_back(cudf::device_span( + static_cast(out_buf.data()), out_buf.data_size())); + nullmask_bufs.push_back(cudf::device_span( + out_buf.null_mask(), + cudf::util::round_up_safe(out_buf.null_mask_size(), sizeof(cudf::bitmask_type)) / + sizeof(cudf::bitmask_type))); } } } } + + cudf::io::detail::batched_memset(memset_bufs, static_cast(0), _stream); + // Need to set null mask bufs to all high bits + cudf::io::detail::batched_memset( + nullmask_bufs, std::numeric_limits::max(), _stream); } std::vector reader::impl::calculate_page_string_offsets() diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu index 2df71b77301..ec05f35d405 100644 --- a/cpp/src/io/parquet/writer_impl.cu +++ b/cpp/src/io/parquet/writer_impl.cu @@ -22,6 +22,7 @@ #include "arrow_schema_writer.hpp" #include "compact_protocol_reader.hpp" #include "compact_protocol_writer.hpp" +#include "interop/decimal_conversion_utilities.cuh" #include "io/comp/nvcomp_adapter.hpp" #include "io/parquet/parquet.hpp" #include "io/parquet/parquet_gpu.hpp" @@ -42,6 +43,7 @@ #include #include #include +#include #include #include @@ -184,7 +186,7 @@ struct aggregate_writer_metadata { std::vector> column_indexes; }; std::vector files; - thrust::optional> column_orders = thrust::nullopt; + cuda::std::optional> column_orders = cuda::std::nullopt; }; namespace { @@ -470,7 +472,7 @@ struct leaf_schema_fn { std::enable_if_t, void> operator()() { col_schema.type = (timestamp_is_int96) ? Type::INT96 : Type::INT64; - col_schema.converted_type = thrust::nullopt; + col_schema.converted_type = cuda::std::nullopt; col_schema.stats_dtype = statistics_dtype::dtype_timestamp64; if (timestamp_is_int96) { col_schema.ts_scale = -1000; // negative value indicates division by absolute value @@ -748,7 +750,7 @@ std::vector construct_parquet_schema_tree( col_schema.type = Type::BYTE_ARRAY; } - col_schema.converted_type = thrust::nullopt; + col_schema.converted_type = cuda::std::nullopt; col_schema.stats_dtype = statistics_dtype::dtype_byte_array; col_schema.repetition_type = col_nullable ? OPTIONAL : REQUIRED; col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name(); @@ -1047,7 +1049,7 @@ parquet_column_view::parquet_column_view(schema_tree_node const& schema_node, // TODO(cp): Explore doing this for all columns in a single go outside this ctor. Maybe using // hostdevice_vector. Currently this involves a cudaMemcpyAsync for each column. _d_nullability = cudf::detail::make_device_uvector_async( - _nullability, stream, rmm::mr::get_current_device_resource()); + _nullability, stream, cudf::get_current_device_resource_ref()); _is_list = (_max_rep_level > 0); @@ -1119,7 +1121,7 @@ void init_row_group_fragments(cudf::detail::hostdevice_2dvector& f rmm::cuda_stream_view stream) { auto d_partitions = cudf::detail::make_device_uvector_async( - partitions, stream, rmm::mr::get_current_device_resource()); + partitions, stream, cudf::get_current_device_resource_ref()); InitRowGroupFragments(frag, col_desc, d_partitions, part_frag_offset, fragment_size, stream); frag.device_to_host_sync(stream); } @@ -1139,7 +1141,7 @@ void calculate_page_fragments(device_span frag, rmm::cuda_stream_view stream) { auto d_frag_sz = cudf::detail::make_device_uvector_async( - frag_sizes, stream, rmm::mr::get_current_device_resource()); + frag_sizes, stream, cudf::get_current_device_resource_ref()); CalculatePageFragments(frag, d_frag_sz, stream); } @@ -1284,10 +1286,10 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, return std::pair(std::move(dict_data), std::move(dict_index)); } - // Allocate slots for each chunk - std::vector> hash_maps_storage; - hash_maps_storage.reserve(h_chunks.size()); - for (auto& chunk : h_chunks) { + // Variable to keep track of the current total map storage size + size_t total_map_storage_size = 0; + // Populate dict offsets and sizes for each chunk that need to build a dictionary. + std::for_each(h_chunks.begin(), h_chunks.end(), [&](auto& chunk) { auto const& chunk_col_desc = col_desc[chunk.col_desc_id]; auto const is_requested_non_dict = chunk_col_desc.requested_encoding != column_encoding::USE_DEFAULT && @@ -1299,19 +1301,31 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, chunk.use_dictionary = false; } else { chunk.use_dictionary = true; - // cuCollections suggests using a hash map of size N * (1/0.7) = num_values * 1.43 - // https://github.com/NVIDIA/cuCollections/blob/3a49fc71/include/cuco/static_map.cuh#L190-L193 - auto& inserted_map = hash_maps_storage.emplace_back(chunk.num_values * 1.43, stream); - chunk.dict_map_slots = inserted_map.data(); - chunk.dict_map_size = inserted_map.size(); + chunk.dict_map_size = + static_cast(cuco::make_window_extent( + static_cast(occupancy_factor * chunk.num_values))); + chunk.dict_map_offset = total_map_storage_size; + total_map_storage_size += chunk.dict_map_size; } - } + }); - chunks.host_to_device_async(stream); + // No chunk needs to create a dictionary, exit early + if (total_map_storage_size == 0) { return {std::move(dict_data), std::move(dict_index)}; } - initialize_chunk_hash_maps(chunks.device_view().flat_view(), stream); - populate_chunk_hash_maps(frags, stream); + // Create a single bulk storage used by all sub-dictionaries + auto map_storage = storage_type{ + total_map_storage_size, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}}; + // Create a span of non-const map_storage as map_storage_ref takes in a non-const pointer. + device_span const map_storage_data{map_storage.data(), total_map_storage_size}; + // Synchronize + chunks.host_to_device_async(stream); + // Initialize storage with the given sentinel + map_storage.initialize_async({KEY_SENTINEL, VALUE_SENTINEL}, {stream.value()}); + // Populate the hash map for each chunk + populate_chunk_hash_maps(map_storage_data, frags, stream); + // Synchronize again chunks.device_to_host_sync(stream); // Make decision about which chunks have dictionary @@ -1371,8 +1385,8 @@ build_chunk_dictionaries(hostdevice_2dvector& chunks, chunk.dict_index = inserted_dict_index.data(); } chunks.host_to_device_async(stream); - collect_map_entries(chunks.device_view().flat_view(), stream); - get_dictionary_indices(frags, stream); + collect_map_entries(map_storage_data, chunks.device_view().flat_view(), stream); + get_dictionary_indices(map_storage_data, frags, stream); return std::pair(std::move(dict_data), std::move(dict_index)); } @@ -1601,50 +1615,12 @@ size_t column_index_buffer_size(EncColumnChunk* ck, return ck->ck_stat_size * num_pages + column_index_truncate_length + padding + size_struct_size; } -/** - * @brief Convert decimal32 and decimal64 data to decimal128 and return the device vector - * - * @tparam DecimalType to convert from - * - * @param column A view of the input columns - * @param stream CUDA stream used for device memory operations and kernel launches - * - * @return A device vector containing the converted decimal128 data - */ -template -rmm::device_uvector<__int128_t> convert_data_to_decimal128(column_view const& column, - rmm::cuda_stream_view stream) -{ - size_type constexpr BIT_WIDTH_RATIO = sizeof(__int128_t) / sizeof(DecimalType); - - rmm::device_uvector<__int128_t> d128_buffer(column.size(), stream); - - thrust::for_each(rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(column.size()), - [in = column.begin(), - out = reinterpret_cast(d128_buffer.data()), - BIT_WIDTH_RATIO] __device__(auto in_idx) { - auto const out_idx = in_idx * BIT_WIDTH_RATIO; - // The lowest order bits are the value, the remainder - // simply matches the sign bit to satisfy the two's - // complement integer representation of negative numbers. - out[out_idx] = in[in_idx]; -#pragma unroll BIT_WIDTH_RATIO - 1 - for (auto i = 1; i < BIT_WIDTH_RATIO; ++i) { - out[out_idx + i] = in[in_idx] < 0 ? -1 : 0; - } - }); - - return d128_buffer; -} - /** * @brief Function to convert decimal32 and decimal64 columns to decimal128 data, * update the input table metadata, and return a new vector of column views. * * @param[in,out] table_meta The table metadata - * @param[in,out] d128_vectors Vector containing the computed decimal128 data buffers. + * @param[in,out] d128_buffers Buffers containing the converted decimal128 data. * @param input The input table * @param stream CUDA stream used for device memory operations and kernel launches * @@ -1652,7 +1628,7 @@ rmm::device_uvector<__int128_t> convert_data_to_decimal128(column_view const& co */ std::vector convert_decimal_columns_and_metadata( table_input_metadata& table_meta, - std::vector>& d128_vectors, + std::vector>& d128_buffers, table_view const& table, rmm::cuda_stream_view stream) { @@ -1673,28 +1649,30 @@ std::vector convert_decimal_columns_and_metadata( switch (column.type().id()) { case type_id::DECIMAL32: // Convert data to decimal128 type - d128_vectors.emplace_back(convert_data_to_decimal128(column, stream)); + d128_buffers.emplace_back(cudf::detail::convert_decimals_to_decimal128( + column, stream, cudf::get_current_device_resource_ref())); // Update metadata metadata.set_decimal_precision(MAX_DECIMAL32_PRECISION); metadata.set_type_length(size_of(data_type{type_id::DECIMAL128, column.type().scale()})); // Create a new column view from the d128 data vector return {data_type{type_id::DECIMAL128, column.type().scale()}, column.size(), - d128_vectors.back().data(), + d128_buffers.back()->data(), column.null_mask(), column.null_count(), column.offset(), converted_children}; case type_id::DECIMAL64: // Convert data to decimal128 type - d128_vectors.emplace_back(convert_data_to_decimal128(column, stream)); + d128_buffers.emplace_back(cudf::detail::convert_decimals_to_decimal128( + column, stream, cudf::get_current_device_resource_ref())); // Update metadata metadata.set_decimal_precision(MAX_DECIMAL64_PRECISION); metadata.set_type_length(size_of(data_type{type_id::DECIMAL128, column.type().scale()})); // Create a new column view from the d128 data vector return {data_type{type_id::DECIMAL128, column.type().scale()}, column.size(), - d128_vectors.back().data(), + d128_buffers.back()->data(), column.null_mask(), column.null_count(), column.offset(), @@ -1722,6 +1700,9 @@ std::vector convert_decimal_columns_and_metadata( std::back_inserter(converted_column_views), [&](auto elem) { return convert_column(thrust::get<0>(elem), thrust::get<1>(elem)); }); + // Synchronize stream here to ensure all decimal128 buffers are ready. + stream.synchronize(); + return converted_column_views; } @@ -1780,13 +1761,13 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, rmm::cuda_stream_view stream) { // Container to store decimal128 converted data if needed - std::vector> d128_vectors; + std::vector> d128_buffers; // Convert decimal32/decimal64 data to decimal128 if writing arrow schema // and initialize LinkedColVector auto vec = table_to_linked_columns( (write_arrow_schema) - ? table_view({convert_decimal_columns_and_metadata(table_meta, d128_vectors, input, stream)}) + ? table_view({convert_decimal_columns_and_metadata(table_meta, d128_buffers, input, stream)}) : input); auto schema_tree = construct_parquet_schema_tree( @@ -1838,8 +1819,14 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, auto const table_size = std::reduce(column_sizes.begin(), column_sizes.end()); auto const avg_row_len = util::div_rounding_up_safe(table_size, input.num_rows()); if (avg_row_len > 0) { - auto const rg_frag_size = util::div_rounding_up_safe(max_row_group_size, avg_row_len); - max_page_fragment_size = std::min(rg_frag_size, max_page_fragment_size); + // Ensure `rg_frag_size` is not bigger than size_type::max for default max_row_group_size + // value (=uint64::max) to avoid a sign overflow when comparing + auto const rg_frag_size = + std::min(std::numeric_limits::max(), + util::div_rounding_up_safe(max_row_group_size, avg_row_len)); + // Safe comparison as rg_frag_size fits in size_type + max_page_fragment_size = + std::min(static_cast(rg_frag_size), max_page_fragment_size); } // dividing page size by average row length will tend to overshoot the desired @@ -1889,7 +1876,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, part_frag_offset.push_back(part_frag_offset.back() + num_frag_in_part.back()); auto d_part_frag_offset = cudf::detail::make_device_uvector_async( - part_frag_offset, stream, rmm::mr::get_current_device_resource()); + part_frag_offset, stream, cudf::get_current_device_resource_ref()); cudf::detail::hostdevice_2dvector row_group_fragments( num_columns, num_fragments, stream); @@ -2262,20 +2249,20 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, bool need_sync{false}; // need to fetch the histogram data from the device - std::vector h_def_histogram; - std::vector h_rep_histogram; - if (stats_granularity == statistics_freq::STATISTICS_COLUMN) { - if (def_histogram_bfr_size > 0) { - h_def_histogram = - std::move(cudf::detail::make_std_vector_async(def_level_histogram, stream)); + auto const h_def_histogram = [&]() { + if (stats_granularity == statistics_freq::STATISTICS_COLUMN && def_histogram_bfr_size > 0) { need_sync = true; + return cudf::detail::make_host_vector_async(def_level_histogram, stream); } - if (rep_histogram_bfr_size > 0) { - h_rep_histogram = - std::move(cudf::detail::make_std_vector_async(rep_level_histogram, stream)); + return cudf::detail::make_host_vector(0, stream); + }(); + auto const h_rep_histogram = [&]() { + if (stats_granularity == statistics_freq::STATISTICS_COLUMN && rep_histogram_bfr_size > 0) { need_sync = true; + return cudf::detail::make_host_vector_async(rep_level_histogram, stream); } - } + return cudf::detail::make_host_vector(0, stream); + }(); for (int r = 0; r < num_rowgroups; r++) { int p = rg_to_part[r]; @@ -2297,7 +2284,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta, update_chunk_encoding_stats(column_chunk_meta, ck, write_v2_headers); if (ck.ck_stat_size != 0) { - std::vector const stats_blob = cudf::detail::make_std_vector_sync( + auto const stats_blob = cudf::detail::make_host_vector_sync( device_span(dev_bfr, ck.ck_stat_size), stream); CompactProtocolReader cp(stats_blob.data(), stats_blob.size()); cp.read(&column_chunk_meta.statistics); @@ -2808,7 +2795,7 @@ std::unique_ptr> writer::merge_row_group_metadata( // See https://github.com/rapidsai/cudf/pull/14264#issuecomment-1778311615 for (auto& se : md.schema) { if (se.logical_type.has_value() && se.logical_type.value().type == LogicalType::UNKNOWN) { - se.logical_type = thrust::nullopt; + se.logical_type = cuda::std::nullopt; } } diff --git a/cpp/src/io/parquet/writer_impl_helpers.cpp b/cpp/src/io/parquet/writer_impl_helpers.cpp index e2f09f872d3..396d44c0763 100644 --- a/cpp/src/io/parquet/writer_impl_helpers.cpp +++ b/cpp/src/io/parquet/writer_impl_helpers.cpp @@ -62,7 +62,7 @@ uint32_t page_alignment(Compression codec) return 1u; } - return 1u << nvcomp::compress_input_alignment_bits(to_nvcomp_compression_type(codec)); + return nvcomp::required_alignment(to_nvcomp_compression_type(codec)); } size_t max_compression_output_size(Compression codec, uint32_t compression_blocksize) diff --git a/cpp/src/io/text/byte_range_info.cpp b/cpp/src/io/text/byte_range_info.cpp index 6a7836ed4e1..fe811739b97 100644 --- a/cpp/src/io/text/byte_range_info.cpp +++ b/cpp/src/io/text/byte_range_info.cpp @@ -16,6 +16,7 @@ #include #include +#include #include @@ -23,6 +24,12 @@ namespace cudf { namespace io { namespace text { +byte_range_info::byte_range_info(int64_t offset, int64_t size) : _offset(offset), _size(size) +{ + CUDF_EXPECTS(offset >= 0, "offset must be non-negative"); + CUDF_EXPECTS(size >= 0, "size must be non-negative"); +} + byte_range_info create_byte_range_info_max() { return {0, std::numeric_limits::max()}; } std::vector create_byte_range_infos_consecutive(int64_t total_bytes, diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu index be2e2b9a79c..028f922bec3 100644 --- a/cpp/src/io/text/multibyte_split.cu +++ b/cpp/src/io/text/multibyte_split.cu @@ -33,13 +33,12 @@ #include #include #include +#include #include #include #include #include -#include -#include #include #include @@ -310,7 +309,7 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source { CUDF_FUNC_RANGE(); - if (byte_range.empty()) { return make_empty_column(type_id::STRING); } + if (byte_range.is_empty()) { return make_empty_column(type_id::STRING); } auto device_delim = cudf::string_scalar(delimiter, true, stream, mr); @@ -345,9 +344,9 @@ std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source auto const concurrency = 2; auto num_tile_states = std::max(32, TILES_PER_CHUNK * concurrency + 32); auto tile_multistates = - scan_tile_state(num_tile_states, stream, rmm::mr::get_current_device_resource()); + scan_tile_state(num_tile_states, stream, cudf::get_current_device_resource_ref()); auto tile_offsets = scan_tile_state( - num_tile_states, stream, rmm::mr::get_current_device_resource()); + num_tile_states, stream, cudf::get_current_device_resource_ref()); multibyte_split_init_kernel<< multibyte_split(cudf::io::text::data_chunk_source } // namespace detail -// deprecated in 24.08 -std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source const& source, - std::string const& delimiter, - std::optional byte_range, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - return multibyte_split(source, - delimiter, - parse_options{byte_range.value_or(create_byte_range_info_max())}, - stream, - mr); -} - std::unique_ptr multibyte_split(cudf::io::text::data_chunk_source const& source, std::string const& delimiter, parse_options options, diff --git a/cpp/src/io/utilities/arrow_io_source.cpp b/cpp/src/io/utilities/arrow_io_source.cpp deleted file mode 100644 index 157240b8b08..00000000000 --- a/cpp/src/io/utilities/arrow_io_source.cpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include - -#include -#include -#include - -namespace cudf::io { - -/** - * @brief Implementation for an owning buffer where `arrow::Buffer` holds the data. - */ -class arrow_io_buffer : public datasource::buffer { - std::shared_ptr arrow_buffer; - - public: - explicit arrow_io_buffer(std::shared_ptr arrow_buffer) - : arrow_buffer(std::move(arrow_buffer)) - { - } - [[nodiscard]] size_t size() const override { return arrow_buffer->size(); } - [[nodiscard]] uint8_t const* data() const override { return arrow_buffer->data(); } -}; - -arrow_io_source::arrow_io_source(std::string const& arrow_uri) -{ - std::string const uri_start_delimiter = "//"; - std::string const uri_end_delimiter = "?"; - - auto const result = arrow::fs::FileSystemFromUri(arrow_uri); - CUDF_EXPECTS(result.ok(), "Failed to generate Arrow Filesystem instance from URI."); - filesystem = result.ValueOrDie(); - - // Parse the path from the URI - auto const start = [&]() { - auto const delim_start = arrow_uri.find(uri_start_delimiter); - return delim_start == std::string::npos ? 0 : delim_start + uri_start_delimiter.size(); - }(); - auto const end = arrow_uri.find(uri_end_delimiter) - start; - auto const path = arrow_uri.substr(start, end); - - auto const in_stream = filesystem->OpenInputFile(path); - CUDF_EXPECTS(in_stream.ok(), "Failed to open Arrow RandomAccessFile"); - arrow_file = in_stream.ValueOrDie(); -} - -std::unique_ptr arrow_io_source::host_read(size_t offset, size_t size) -{ - auto const result = arrow_file->ReadAt(offset, size); - CUDF_EXPECTS(result.ok(), "Cannot read file data"); - return std::make_unique(result.ValueOrDie()); -} - -size_t arrow_io_source::host_read(size_t offset, size_t size, uint8_t* dst) -{ - auto const result = arrow_file->ReadAt(offset, size, dst); - CUDF_EXPECTS(result.ok(), "Cannot read file data"); - return result.ValueOrDie(); -} - -[[nodiscard]] size_t arrow_io_source::size() const -{ - auto const result = arrow_file->GetSize(); - CUDF_EXPECTS(result.ok(), "Cannot get file size"); - return result.ValueOrDie(); -} - -} // namespace cudf::io diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp index 2f4272b0367..249dc3b5875 100644 --- a/cpp/src/io/utilities/column_buffer.cpp +++ b/cpp/src/io/utilities/column_buffer.cpp @@ -24,16 +24,14 @@ #include #include #include - -#include -#include +#include #include #include namespace cudf::io::detail { -void gather_column_buffer::allocate_strings_data(rmm::cuda_stream_view stream) +void gather_column_buffer::allocate_strings_data(bool memset_data, rmm::cuda_stream_view stream) { CUDF_EXPECTS(type.id() == type_id::STRING, "allocate_strings_data called for non-string column"); // The contents of _strings will never be directly returned to the user. @@ -44,7 +42,7 @@ void gather_column_buffer::allocate_strings_data(rmm::cuda_stream_view stream) // default rmm memory resource. _strings = std::make_unique>( cudf::detail::make_zeroed_device_uvector_async( - size, stream, rmm::mr::get_current_device_resource())); + size, stream, cudf::get_current_device_resource_ref())); } std::unique_ptr gather_column_buffer::make_string_column_impl(rmm::cuda_stream_view stream) @@ -56,11 +54,12 @@ std::unique_ptr gather_column_buffer::make_string_column_impl(rmm::cuda_ return make_strings_column(*_strings, stream, _mr); } -void cudf::io::detail::inline_column_buffer::allocate_strings_data(rmm::cuda_stream_view stream) +void cudf::io::detail::inline_column_buffer::allocate_strings_data(bool memset_data, + rmm::cuda_stream_view stream) { CUDF_EXPECTS(type.id() == type_id::STRING, "allocate_strings_data called for non-string column"); // size + 1 for final offset. _string_data will be initialized later. - _data = create_data(data_type{type_id::INT32}, size + 1, stream, _mr); + _data = create_data(data_type{type_to_id()}, size + 1, memset_data, stream, _mr); } void cudf::io::detail::inline_column_buffer::create_string_data(size_t num_bytes, @@ -93,6 +92,7 @@ void copy_buffer_data(string_policy const& buff, string_policy& new_buff) template void column_buffer_base::create_with_mask(size_type _size, cudf::mask_state null_mask_state, + bool memset_data, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { @@ -100,16 +100,20 @@ void column_buffer_base::create_with_mask(size_type _size, _mr = mr; switch (type.id()) { - case type_id::STRING: static_cast(this)->allocate_strings_data(stream); break; + case type_id::STRING: + static_cast(this)->allocate_strings_data(memset_data, stream); + break; // list columns store a buffer of int32's as offsets to represent // their individual rows - case type_id::LIST: _data = create_data(data_type{type_id::INT32}, size, stream, _mr); break; + case type_id::LIST: + _data = create_data(data_type{type_to_id()}, size, memset_data, stream, _mr); + break; // struct columns store no data themselves. just validity and children. case type_id::STRUCT: break; - default: _data = create_data(type, size, stream, _mr); break; + default: _data = create_data(type, size, memset_data, stream, _mr); break; } if (is_nullable) { _null_mask = @@ -117,12 +121,21 @@ void column_buffer_base::create_with_mask(size_type _size, } } +template +void column_buffer_base::create(size_type _size, + bool memset_data, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + create_with_mask(_size, mask_state::ALL_NULL, memset_data, stream, mr); +} + template void column_buffer_base::create(size_type _size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - create_with_mask(_size, mask_state::ALL_NULL, stream, mr); + create_with_mask(_size, mask_state::ALL_NULL, true, stream, mr); } template diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp index ed6bb8bbdca..e73b2bc88de 100644 --- a/cpp/src/io/utilities/column_buffer.hpp +++ b/cpp/src/io/utilities/column_buffer.hpp @@ -26,13 +26,12 @@ #include #include #include +#include #include #include #include #include -#include -#include #include @@ -44,6 +43,7 @@ namespace detail { * * @param type The intended data type to populate * @param size The number of elements to be represented by the mask + * @param memset_data Defines whether data should be memset to 0 * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned device_buffer * @@ -51,17 +51,25 @@ namespace detail { */ inline rmm::device_buffer create_data(data_type type, size_type size, + bool memset_data, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { std::size_t data_size = size_of(type) * size; rmm::device_buffer data(data_size, stream, mr); - CUDF_CUDA_TRY(cudaMemsetAsync(data.data(), 0, data_size, stream.value())); - + if (memset_data) { CUDF_CUDA_TRY(cudaMemsetAsync(data.data(), 0, data_size, stream.value())); } return data; } +inline rmm::device_buffer create_data(data_type type, + size_type size, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + return create_data(type, size, true, stream, mr); +} + using string_index_pair = thrust::pair; // forward declare friend functions @@ -113,12 +121,18 @@ class column_buffer_base { // instantiate a column of known type with a specified size. Allows deferred creation for // preprocessing steps such as in the Parquet reader + void create(size_type _size, + bool memset_data, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + void create(size_type _size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); // like create(), but also takes a `cudf::mask_state` to allow initializing the null mask as // something other than `ALL_NULL` void create_with_mask(size_type _size, cudf::mask_state null_mask_state, + bool memset_data, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); @@ -152,7 +166,7 @@ class column_buffer_base { rmm::device_buffer _data{}; rmm::device_buffer _null_mask{}; size_type _null_count{0}; - rmm::device_async_resource_ref _mr{rmm::mr::get_current_device_resource()}; + rmm::device_async_resource_ref _mr{cudf::get_current_device_resource_ref()}; public: data_type type{type_id::EMPTY}; @@ -192,7 +206,7 @@ class gather_column_buffer : public column_buffer_base { create(_size, stream, mr); } - void allocate_strings_data(rmm::cuda_stream_view stream); + void allocate_strings_data(bool memset_data, rmm::cuda_stream_view stream); [[nodiscard]] void* data_impl() { return _strings ? _strings->data() : _data.data(); } [[nodiscard]] void const* data_impl() const { return _strings ? _strings->data() : _data.data(); } @@ -226,7 +240,7 @@ class inline_column_buffer : public column_buffer_base { create(_size, stream, mr); } - void allocate_strings_data(rmm::cuda_stream_view stream); + void allocate_strings_data(bool memset_data, rmm::cuda_stream_view stream); void* data_impl() { return _data.data(); } [[nodiscard]] void const* data_impl() const { return _data.data(); } diff --git a/cpp/src/io/utilities/data_casting.cu b/cpp/src/io/utilities/data_casting.cu index 73362334e26..f70171eef68 100644 --- a/cpp/src/io/utilities/data_casting.cu +++ b/cpp/src/io/utilities/data_casting.cu @@ -28,11 +28,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp index 91be154e09d..e4313eba454 100644 --- a/cpp/src/io/utilities/datasource.cpp +++ b/cpp/src/io/utilities/datasource.cpp @@ -297,10 +297,10 @@ class device_buffer_source final : public datasource { { auto const count = std::min(size, this->size() - offset); auto const stream = cudf::get_default_stream(); - auto h_data = cudf::detail::make_std_vector_async( + auto h_data = cudf::detail::make_host_vector_async( cudf::device_span{_d_buffer.data() + offset, count}, stream); stream.synchronize(); - return std::make_unique>>(std::move(h_data)); + return std::make_unique>>(std::move(h_data)); } [[nodiscard]] bool supports_device_read() const override { return true; } diff --git a/cpp/src/io/utilities/output_builder.cuh b/cpp/src/io/utilities/output_builder.cuh index 3bc5ccf41ef..f7e6de03354 100644 --- a/cpp/src/io/utilities/output_builder.cuh +++ b/cpp/src/io/utilities/output_builder.cuh @@ -16,12 +16,12 @@ #include #include +#include #include #include #include #include -#include #include @@ -207,7 +207,7 @@ class output_builder { output_builder(size_type max_write_size, size_type max_growth, rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) : _max_write_size{max_write_size}, _max_growth{max_growth} { CUDF_EXPECTS(max_write_size > 0, "Internal error"); diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh index bc2722441d0..734067582f7 100644 --- a/cpp/src/io/utilities/parsing_utils.cuh +++ b/cpp/src/io/utilities/parsing_utils.cuh @@ -67,6 +67,8 @@ struct parse_options_view { bool doublequote; bool dayfirst; bool skipblanklines; + bool normalize_whitespace; + bool mixed_types_as_string; cudf::detail::trie_view trie_true; cudf::detail::trie_view trie_false; cudf::detail::trie_view trie_na; @@ -85,6 +87,8 @@ struct parse_options { bool doublequote; bool dayfirst; bool skipblanklines; + bool normalize_whitespace; + bool mixed_types_as_string; cudf::detail::optional_trie trie_true; cudf::detail::optional_trie trie_false; cudf::detail::optional_trie trie_na; @@ -111,6 +115,8 @@ struct parse_options { doublequote, dayfirst, skipblanklines, + normalize_whitespace, + mixed_types_as_string, cudf::detail::make_trie_view(trie_true), cudf::detail::make_trie_view(trie_false), cudf::detail::make_trie_view(trie_na), diff --git a/cpp/src/io/utilities/string_parsing.hpp b/cpp/src/io/utilities/string_parsing.hpp index 0d9e7e40e4e..1d6d5a0a570 100644 --- a/cpp/src/io/utilities/string_parsing.hpp +++ b/cpp/src/io/utilities/string_parsing.hpp @@ -19,10 +19,10 @@ #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/io/utilities/trie.cu b/cpp/src/io/utilities/trie.cu index 3be1a8332ca..504e72147e5 100644 --- a/cpp/src/io/utilities/trie.cu +++ b/cpp/src/io/utilities/trie.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include "trie.cuh" #include +#include #include #include @@ -104,7 +105,7 @@ rmm::device_uvector create_serialized_trie(std::vector #include #include -#include +#include #include -#include #include @@ -377,16 +376,12 @@ conditional_inner_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, std::optional output_size, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::conditional_join(left, - right, - binary_predicate, - detail::join_kind::INNER_JOIN, - output_size, - cudf::get_default_stream(), - mr); + return detail::conditional_join( + left, right, binary_predicate, detail::join_kind::INNER_JOIN, output_size, stream, mr); } std::pair>, @@ -395,16 +390,12 @@ conditional_left_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, std::optional output_size, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::conditional_join(left, - right, - binary_predicate, - detail::join_kind::LEFT_JOIN, - output_size, - cudf::get_default_stream(), - mr); + return detail::conditional_join( + left, right, binary_predicate, detail::join_kind::LEFT_JOIN, output_size, stream, mr); } std::pair>, @@ -412,16 +403,12 @@ std::pair>, conditional_full_join(table_view const& left, table_view const& right, ast::expression const& binary_predicate, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::conditional_join(left, - right, - binary_predicate, - detail::join_kind::FULL_JOIN, - {}, - cudf::get_default_stream(), - mr); + return detail::conditional_join( + left, right, binary_predicate, detail::join_kind::FULL_JOIN, {}, stream, mr); } std::unique_ptr> conditional_left_semi_join( @@ -429,16 +416,12 @@ std::unique_ptr> conditional_left_semi_join( table_view const& right, ast::expression const& binary_predicate, std::optional output_size, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return std::move(detail::conditional_join_anti_semi(left, - right, - binary_predicate, - detail::join_kind::LEFT_SEMI_JOIN, - output_size, - cudf::get_default_stream(), - mr)); + return detail::conditional_join_anti_semi( + left, right, binary_predicate, detail::join_kind::LEFT_SEMI_JOIN, output_size, stream, mr); } std::unique_ptr> conditional_left_anti_join( @@ -446,64 +429,56 @@ std::unique_ptr> conditional_left_anti_join( table_view const& right, ast::expression const& binary_predicate, std::optional output_size, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return std::move(detail::conditional_join_anti_semi(left, - right, - binary_predicate, - detail::join_kind::LEFT_ANTI_JOIN, - output_size, - cudf::get_default_stream(), - mr)); + return detail::conditional_join_anti_semi( + left, right, binary_predicate, detail::join_kind::LEFT_ANTI_JOIN, output_size, stream, mr); } std::size_t conditional_inner_join_size(table_view const& left, table_view const& right, ast::expression const& binary_predicate, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::compute_conditional_join_output_size( - left, right, binary_predicate, detail::join_kind::INNER_JOIN, cudf::get_default_stream(), mr); + left, right, binary_predicate, detail::join_kind::INNER_JOIN, stream, mr); } std::size_t conditional_left_join_size(table_view const& left, table_view const& right, ast::expression const& binary_predicate, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::compute_conditional_join_output_size( - left, right, binary_predicate, detail::join_kind::LEFT_JOIN, cudf::get_default_stream(), mr); + left, right, binary_predicate, detail::join_kind::LEFT_JOIN, stream, mr); } std::size_t conditional_left_semi_join_size(table_view const& left, table_view const& right, ast::expression const& binary_predicate, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return std::move(detail::compute_conditional_join_output_size(left, - right, - binary_predicate, - detail::join_kind::LEFT_SEMI_JOIN, - cudf::get_default_stream(), - mr)); + return detail::compute_conditional_join_output_size( + left, right, binary_predicate, detail::join_kind::LEFT_SEMI_JOIN, stream, mr); } std::size_t conditional_left_anti_join_size(table_view const& left, table_view const& right, ast::expression const& binary_predicate, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return std::move(detail::compute_conditional_join_output_size(left, - right, - binary_predicate, - detail::join_kind::LEFT_ANTI_JOIN, - cudf::get_default_stream(), - mr)); + return detail::compute_conditional_join_output_size( + left, right, binary_predicate, detail::join_kind::LEFT_ANTI_JOIN, stream, mr); } } // namespace cudf diff --git a/cpp/src/join/conditional_join.hpp b/cpp/src/join/conditional_join.hpp index 06eb83d6ba8..303442e79ef 100644 --- a/cpp/src/join/conditional_join.hpp +++ b/cpp/src/join/conditional_join.hpp @@ -19,11 +19,9 @@ #include #include -#include +#include #include -#include -#include #include diff --git a/cpp/src/join/cross_join.cu b/cpp/src/join/cross_join.cu index a2ee3a7796b..15594fb60e3 100644 --- a/cpp/src/join/cross_join.cu +++ b/cpp/src/join/cross_join.cu @@ -25,11 +25,10 @@ #include #include #include -#include #include +#include #include -#include namespace cudf { namespace detail { @@ -75,10 +74,11 @@ std::unique_ptr cross_join(cudf::table_view const& left, std::unique_ptr cross_join(cudf::table_view const& left, cudf::table_view const& right, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::cross_join(left, right, cudf::get_default_stream(), mr); + return detail::cross_join(left, right, stream, mr); } } // namespace cudf diff --git a/cpp/src/join/distinct_hash_join.cu b/cpp/src/join/distinct_hash_join.cu index daa1bf17c0d..c7294152982 100644 --- a/cpp/src/join/distinct_hash_join.cu +++ b/cpp/src/join/distinct_hash_join.cu @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -119,7 +119,7 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, {}, cuco::thread_scope_device, cuco_storage_type{}, - cudf::detail::cuco_allocator{stream}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, stream.value()} { CUDF_FUNC_RANGE(); @@ -139,7 +139,8 @@ distinct_hash_join::distinct_hash_join(cudf::table_view const& build, } else { auto stencil = thrust::counting_iterator{0}; auto const row_bitmask = - cudf::detail::bitmask_and(this->_build, stream, rmm::mr::get_current_device_resource()).first; + cudf::detail::bitmask_and(this->_build, stream, cudf::get_current_device_resource_ref()) + .first; auto const pred = cudf::detail::row_is_valid{reinterpret_cast(row_bitmask.data())}; diff --git a/cpp/src/join/hash_join.cu b/cpp/src/join/hash_join.cu index eb9b687630b..beeaabfdaab 100644 --- a/cpp/src/join/hash_join.cu +++ b/cpp/src/join/hash_join.cu @@ -22,13 +22,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -374,7 +374,7 @@ hash_join::hash_join(cudf::table_view const& build, cuco::empty_key{std::numeric_limits::max()}, cuco::empty_value{cudf::detail::JoinNoneValue}, stream.value(), - cudf::detail::cuco_allocator{stream}}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}}, _build{build}, _preprocessed_build{ cudf::experimental::row::equality::preprocessed_table::create(_build, stream)} @@ -385,7 +385,7 @@ hash_join::hash_join(cudf::table_view const& build, if (_is_empty) { return; } auto const row_bitmask = - cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first; + cudf::detail::bitmask_and(build, stream, cudf::get_current_device_resource_ref()).first; cudf::detail::build_join_hash_table(_build, _preprocessed_build, _hash_table, diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu index bc7f09763ec..7b13c260364 100644 --- a/cpp/src/join/join.cu +++ b/cpp/src/join/join.cu @@ -20,10 +20,9 @@ #include #include #include -#include +#include #include -#include namespace cudf { namespace detail { @@ -41,7 +40,7 @@ inner_join(table_view const& left_input, auto matched = cudf::dictionary::detail::match_dictionaries( {left_input, right_input}, stream, - rmm::mr::get_current_device_resource()); // temporary objects returned + cudf::get_current_device_resource_ref()); // temporary objects returned // now rebuild the table views with the updated ones auto const left = matched.second.front(); @@ -76,7 +75,7 @@ left_join(table_view const& left_input, auto matched = cudf::dictionary::detail::match_dictionaries( {left_input, right_input}, // these should match stream, - rmm::mr::get_current_device_resource()); // temporary objects returned + cudf::get_current_device_resource_ref()); // temporary objects returned // now rebuild the table views with the updated ones table_view const left = matched.second.front(); table_view const right = matched.second.back(); @@ -101,7 +100,7 @@ full_join(table_view const& left_input, auto matched = cudf::dictionary::detail::match_dictionaries( {left_input, right_input}, // these should match stream, - rmm::mr::get_current_device_resource()); // temporary objects returned + cudf::get_current_device_resource_ref()); // temporary objects returned // now rebuild the table views with the updated ones table_view const left = matched.second.front(); table_view const right = matched.second.back(); @@ -120,10 +119,11 @@ std::pair>, inner_join(table_view const& left, table_view const& right, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::inner_join(left, right, compare_nulls, cudf::get_default_stream(), mr); + return detail::inner_join(left, right, compare_nulls, stream, mr); } std::pair>, @@ -131,10 +131,11 @@ std::pair>, left_join(table_view const& left, table_view const& right, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::left_join(left, right, compare_nulls, cudf::get_default_stream(), mr); + return detail::left_join(left, right, compare_nulls, stream, mr); } std::pair>, @@ -142,10 +143,11 @@ std::pair>, full_join(table_view const& left, table_view const& right, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::full_join(left, right, compare_nulls, cudf::get_default_stream(), mr); + return detail::full_join(left, right, compare_nulls, stream, mr); } } // namespace cudf diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index 3d0f3e4340d..4f75908fe72 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp index 4157100b67e..86402a0e7de 100644 --- a/cpp/src/join/join_common_utils.hpp +++ b/cpp/src/join/join_common_utils.hpp @@ -48,11 +48,13 @@ using mixed_multimap_type = cuco::static_multimap, cuco::legacy::double_hashing<1, hash_type, hash_type>>; -using semi_map_type = cuco::legacy:: - static_map; +using semi_map_type = cuco::legacy::static_map>; using row_hash_legacy = cudf::row_hasher; diff --git a/cpp/src/join/join_utils.cu b/cpp/src/join/join_utils.cu index 8d916da9f2c..16302657ac2 100644 --- a/cpp/src/join/join_utils.cu +++ b/cpp/src/join/join_utils.cu @@ -16,8 +16,9 @@ #include "join_common_utils.cuh" +#include + #include -#include #include #include diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu index 90748e6f322..820b81ee309 100644 --- a/cpp/src/join/mixed_join.cu +++ b/cpp/src/join/mixed_join.cu @@ -16,7 +16,8 @@ #include "join_common_utils.cuh" #include "join_common_utils.hpp" -#include "mixed_join_kernels.cuh" +#include "mixed_join_kernel.hpp" +#include "mixed_join_size_kernel.hpp" #include #include @@ -27,12 +28,11 @@ #include #include #include -#include +#include #include #include #include -#include #include #include @@ -126,17 +126,18 @@ mixed_join( auto build_view = table_device_view::create(build, stream); // Don't use multimap_type because we want a CG size of 1. - mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()), - cuco::empty_key{std::numeric_limits::max()}, - cuco::empty_value{cudf::detail::JoinNoneValue}, - stream.value(), - cudf::detail::cuco_allocator{stream}}; + mixed_multimap_type hash_table{ + compute_hash_table_size(build.num_rows()), + cuco::empty_key{std::numeric_limits::max()}, + cuco::empty_value{cudf::detail::JoinNoneValue}, + stream.value(), + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}}; // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. auto const row_bitmask = - cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first; + cudf::detail::bitmask_and(build, stream, cudf::get_current_device_resource_ref()).first; auto const preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); build_join_hash_table(build, @@ -177,9 +178,6 @@ mixed_join( join_size = output_size_data->first; matches_per_row_span = output_size_data->second; } else { - // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); - matches_per_row = rmm::device_uvector{static_cast(outer_num_rows), stream, mr}; // Note that the view goes out of scope after this else statement, but the @@ -189,37 +187,38 @@ mixed_join( matches_per_row_span = cudf::device_span{ matches_per_row->begin(), static_cast(outer_num_rows)}; if (has_nulls) { - compute_mixed_join_output_size - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - kernel_join_type, - hash_table_view, - parser.device_expression_data, - swap_tables, - size.data(), - mutable_matches_per_row_span); + join_size = launch_compute_mixed_join_output_size(*left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + hash_probe, + equality_probe, + kernel_join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + mutable_matches_per_row_span, + config, + shmem_size_per_block, + stream, + mr); } else { - compute_mixed_join_output_size - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - kernel_join_type, - hash_table_view, - parser.device_expression_data, - swap_tables, - size.data(), - mutable_matches_per_row_span); + join_size = launch_compute_mixed_join_output_size(*left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + hash_probe, + equality_probe, + kernel_join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + mutable_matches_per_row_span, + config, + shmem_size_per_block, + stream, + mr); } - join_size = size.value(stream); } // The initial early exit clauses guarantee that we will not reach this point @@ -248,37 +247,39 @@ mixed_join( auto const& join_output_r = right_indices->data(); if (has_nulls) { - mixed_join - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - kernel_join_type, - hash_table_view, - join_output_l, - join_output_r, - parser.device_expression_data, - join_result_offsets.data(), - swap_tables); + launch_mixed_join(*left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + hash_probe, + equality_probe, + kernel_join_type, + hash_table_view, + join_output_l, + join_output_r, + parser.device_expression_data, + join_result_offsets.data(), + swap_tables, + config, + shmem_size_per_block, + stream); } else { - mixed_join - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - kernel_join_type, - hash_table_view, - join_output_l, - join_output_r, - parser.device_expression_data, - join_result_offsets.data(), - swap_tables); + launch_mixed_join(*left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + hash_probe, + equality_probe, + kernel_join_type, + hash_table_view, + join_output_l, + join_output_r, + parser.device_expression_data, + join_result_offsets.data(), + swap_tables, + config, + shmem_size_per_block, + stream); } auto join_indices = std::pair(std::move(left_indices), std::move(right_indices)); @@ -391,17 +392,18 @@ compute_mixed_join_output_size(table_view const& left_equality, auto build_view = table_device_view::create(build, stream); // Don't use multimap_type because we want a CG size of 1. - mixed_multimap_type hash_table{compute_hash_table_size(build.num_rows()), - cuco::empty_key{std::numeric_limits::max()}, - cuco::empty_value{cudf::detail::JoinNoneValue}, - stream.value(), - cudf::detail::cuco_allocator{stream}}; + mixed_multimap_type hash_table{ + compute_hash_table_size(build.num_rows()), + cuco::empty_key{std::numeric_limits::max()}, + cuco::empty_value{cudf::detail::JoinNoneValue}, + stream.value(), + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}}; // TODO: To add support for nested columns we will need to flatten in many // places. However, this probably isn't worth adding any time soon since we // won't be able to support AST conditions for those types anyway. auto const row_bitmask = - cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()).first; + cudf::detail::bitmask_and(build, stream, cudf::get_current_device_resource_ref()).first; auto const preprocessed_build = experimental::row::equality::preprocessed_table::create(build, stream); build_join_hash_table(build, @@ -421,9 +423,6 @@ compute_mixed_join_output_size(table_view const& left_equality, detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE); auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block; - // Allocate storage for the counter used to get the size of the join output - rmm::device_scalar size(0, stream, mr); - auto const preprocessed_probe = experimental::row::equality::preprocessed_table::create(probe, stream); auto const row_hash = cudf::experimental::row::hash::row_hasher{preprocessed_probe}; @@ -434,39 +433,42 @@ compute_mixed_join_output_size(table_view const& left_equality, // Determine number of output rows without actually building the output to simply // find what the size of the output will be. + std::size_t size = 0; if (has_nulls) { - compute_mixed_join_output_size - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - join_type, - hash_table_view, - parser.device_expression_data, - swap_tables, - size.data(), - matches_per_row_span); + size = launch_compute_mixed_join_output_size(*left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + hash_probe, + equality_probe, + join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + matches_per_row_span, + config, + shmem_size_per_block, + stream, + mr); } else { - compute_mixed_join_output_size - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - join_type, - hash_table_view, - parser.device_expression_data, - swap_tables, - size.data(), - matches_per_row_span); + size = launch_compute_mixed_join_output_size(*left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + hash_probe, + equality_probe, + join_type, + hash_table_view, + parser.device_expression_data, + swap_tables, + matches_per_row_span, + config, + shmem_size_per_block, + stream, + mr); } - return {size.value(stream), std::move(matches_per_row)}; + return {size, std::move(matches_per_row)}; } } // namespace detail @@ -481,6 +483,7 @@ mixed_inner_join( ast::expression const& binary_predicate, null_equality compare_nulls, std::optional>> const output_size_data, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -492,7 +495,7 @@ mixed_inner_join( compare_nulls, detail::join_kind::INNER_JOIN, output_size_data, - cudf::get_default_stream(), + stream, mr); } @@ -503,6 +506,7 @@ std::pair>> mixed_in table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -513,7 +517,7 @@ std::pair>> mixed_in binary_predicate, compare_nulls, detail::join_kind::INNER_JOIN, - cudf::get_default_stream(), + stream, mr); } @@ -527,6 +531,7 @@ mixed_left_join( ast::expression const& binary_predicate, null_equality compare_nulls, std::optional>> const output_size_data, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -538,7 +543,7 @@ mixed_left_join( compare_nulls, detail::join_kind::LEFT_JOIN, output_size_data, - cudf::get_default_stream(), + stream, mr); } @@ -549,6 +554,7 @@ std::pair>> mixed_le table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -559,7 +565,7 @@ std::pair>> mixed_le binary_predicate, compare_nulls, detail::join_kind::LEFT_JOIN, - cudf::get_default_stream(), + stream, mr); } @@ -573,6 +579,7 @@ mixed_full_join( ast::expression const& binary_predicate, null_equality compare_nulls, std::optional>> const output_size_data, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -584,7 +591,7 @@ mixed_full_join( compare_nulls, detail::join_kind::FULL_JOIN, output_size_data, - cudf::get_default_stream(), + stream, mr); } diff --git a/cpp/src/join/mixed_join_kernel.cu b/cpp/src/join/mixed_join_kernel.cu index 61cfa168b03..cd4016837cc 100644 --- a/cpp/src/join/mixed_join_kernel.cu +++ b/cpp/src/join/mixed_join_kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,12 @@ */ #include "mixed_join_kernel.cuh" +#include "mixed_join_kernel.hpp" namespace cudf { namespace detail { -template __global__ void mixed_join( +template void launch_mixed_join( table_device_view left_table, table_device_view right_table, table_device_view probe, @@ -32,7 +33,10 @@ template __global__ void mixed_join( size_type* join_output_r, cudf::ast::detail::expression_device_view device_expression_data, cudf::size_type const* join_result_offsets, - bool const swap_tables); + bool const swap_tables, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream); } // namespace detail diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh index ea59f23c77f..368b1fba870 100644 --- a/cpp/src/join/mixed_join_kernel.cuh +++ b/cpp/src/join/mixed_join_kernel.cuh @@ -19,6 +19,7 @@ #include "join_common_utils.cuh" #include "join_common_utils.hpp" #include "mixed_join_common_utils.cuh" +#include "mixed_join_kernel.hpp" #include #include @@ -39,20 +40,20 @@ namespace cg = cooperative_groups; #pragma GCC diagnostic ignored "-Wattributes" template -CUDF_HIDDEN __launch_bounds__(block_size) __global__ - void mixed_join(table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - size_type* join_output_l, - size_type* join_output_r, - cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const* join_result_offsets, - bool const swap_tables) +CUDF_KERNEL void __launch_bounds__(block_size) + mixed_join(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables) { // Normally the casting of a shared memory array is used to create multiple // arrays of different types from the shared memory buffer, but here it is @@ -111,6 +112,41 @@ CUDF_HIDDEN __launch_bounds__(block_size) __global__ } } +template +void launch_mixed_join(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream) +{ + mixed_join + <<>>( + left_table, + right_table, + probe, + build, + hash_probe, + equality_probe, + join_type, + hash_table_view, + join_output_l, + join_output_r, + device_expression_data, + join_result_offsets, + swap_tables); +} + } // namespace detail } // namespace cudf diff --git a/cpp/src/join/mixed_join_kernel.hpp b/cpp/src/join/mixed_join_kernel.hpp new file mode 100644 index 00000000000..cc92e9d8ba4 --- /dev/null +++ b/cpp/src/join/mixed_join_kernel.hpp @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "join/join_common_utils.hpp" +#include "join/mixed_join_common_utils.cuh" + +#include +#include +#include + +namespace CUDF_EXPORT cudf { +namespace detail { + +/** + * @brief Performs a join using the combination of a hash lookup to identify + * equal rows between one pair of tables and the evaluation of an expression + * containing an arbitrary expression. + * + * This method probes the hash table with each row in the probe table using a + * custom equality comparator that also checks that the conditional expression + * evaluates to true between the left/right tables when a match is found + * between probe and build rows. + * + * @tparam block_size The number of threads per block for this kernel + * @tparam has_nulls Whether or not the inputs may contain nulls. + * + * @param[in] left_table The left table + * @param[in] right_table The right table + * @param[in] probe The table with which to probe the hash table for matches. + * @param[in] build The table with which the hash table was built. + * @param[in] hash_probe The hasher used for the probe table. + * @param[in] equality_probe The equality comparator used when probing the hash table. + * @param[in] join_type The type of join to be performed + * @param[in] hash_table_view The hash table built from `build`. + * @param[out] join_output_l The left result of the join operation + * @param[out] join_output_r The right result of the join operation + * @param[in] device_expression_data Container of device data required to evaluate the desired + * expression. + * @param[in] join_result_offsets The starting indices in join_output[l|r] + * where the matches for each row begin. Equivalent to a prefix sum of + * matches_per_row. + * @param[in] swap_tables If true, the kernel was launched with one thread per right row and + * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. + */ +template +void launch_mixed_join(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + size_type* join_output_l, + size_type* join_output_r, + cudf::ast::detail::expression_device_view device_expression_data, + cudf::size_type const* join_result_offsets, + bool const swap_tables, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream); + +} // namespace detail + +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/join/mixed_join_kernel_nulls.cu b/cpp/src/join/mixed_join_kernel_nulls.cu index 518f8ed8555..185aa133f2d 100644 --- a/cpp/src/join/mixed_join_kernel_nulls.cu +++ b/cpp/src/join/mixed_join_kernel_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,12 @@ */ #include "mixed_join_kernel.cuh" +#include "mixed_join_kernel.hpp" namespace cudf { namespace detail { -template __global__ void mixed_join( +template void launch_mixed_join( table_device_view left_table, table_device_view right_table, table_device_view probe, @@ -32,7 +33,10 @@ template __global__ void mixed_join( size_type* join_output_r, cudf::ast::detail::expression_device_view device_expression_data, cudf::size_type const* join_result_offsets, - bool const swap_tables); + bool const swap_tables, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream); } // namespace detail diff --git a/cpp/src/join/mixed_join_kernels.cuh b/cpp/src/join/mixed_join_kernels.cuh deleted file mode 100644 index 037c02666d4..00000000000 --- a/cpp/src/join/mixed_join_kernels.cuh +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "join/join_common_utils.hpp" -#include "join/mixed_join_common_utils.cuh" - -#include -#include -#include - -namespace cudf { -namespace detail { - -/** - * @brief Computes the output size of joining the left table to the right table. - * - * This method probes the hash table with each row in the probe table using a - * custom equality comparator that also checks that the conditional expression - * evaluates to true between the left/right tables when a match is found - * between probe and build rows. - * - * @tparam block_size The number of threads per block for this kernel - * @tparam has_nulls Whether or not the inputs may contain nulls. - * - * @param[in] left_table The left table - * @param[in] right_table The right table - * @param[in] probe The table with which to probe the hash table for matches. - * @param[in] build The table with which the hash table was built. - * @param[in] hash_probe The hasher used for the probe table. - * @param[in] equality_probe The equality comparator used when probing the hash table. - * @param[in] join_type The type of join to be performed - * @param[in] hash_table_view The hash table built from `build`. - * @param[in] device_expression_data Container of device data required to evaluate the desired - * expression. - * @param[in] swap_tables If true, the kernel was launched with one thread per right row and - * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. - * @param[out] output_size The resulting output size - * @param[out] matches_per_row The number of matches in one pair of - * equality/conditional tables for each row in the other pair of tables. If - * swap_tables is true, matches_per_row corresponds to the right_table, - * otherwise it corresponds to the left_table. Note that corresponding swap of - * left/right tables to determine which is the build table and which is the - * probe table has already happened on the host. - */ - -template -__global__ void compute_mixed_join_output_size( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - ast::detail::expression_device_view device_expression_data, - bool const swap_tables, - std::size_t* output_size, - cudf::device_span matches_per_row); - -/** - * @brief Performs a join using the combination of a hash lookup to identify - * equal rows between one pair of tables and the evaluation of an expression - * containing an arbitrary expression. - * - * This method probes the hash table with each row in the probe table using a - * custom equality comparator that also checks that the conditional expression - * evaluates to true between the left/right tables when a match is found - * between probe and build rows. - * - * @tparam block_size The number of threads per block for this kernel - * @tparam has_nulls Whether or not the inputs may contain nulls. - * - * @param[in] left_table The left table - * @param[in] right_table The right table - * @param[in] probe The table with which to probe the hash table for matches. - * @param[in] build The table with which the hash table was built. - * @param[in] hash_probe The hasher used for the probe table. - * @param[in] equality_probe The equality comparator used when probing the hash table. - * @param[in] join_type The type of join to be performed - * @param[in] hash_table_view The hash table built from `build`. - * @param[out] join_output_l The left result of the join operation - * @param[out] join_output_r The right result of the join operation - * @param[in] device_expression_data Container of device data required to evaluate the desired - * expression. - * @param[in] join_result_offsets The starting indices in join_output[l|r] - * where the matches for each row begin. Equivalent to a prefix sum of - * matches_per_row. - * @param[in] swap_tables If true, the kernel was launched with one thread per right row and - * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. - */ -template -__global__ void mixed_join(table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - size_type* join_output_l, - size_type* join_output_r, - cudf::ast::detail::expression_device_view device_expression_data, - cudf::size_type const* join_result_offsets, - bool const swap_tables); - -} // namespace detail - -} // namespace cudf diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu index 1f31eaa7878..7459ac3e99c 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cu +++ b/cpp/src/join/mixed_join_kernels_semi.cu @@ -14,9 +14,7 @@ * limitations under the License. */ -#include "join/join_common_utils.cuh" -#include "join/join_common_utils.hpp" -#include "join/mixed_join_common_utils.cuh" +#include "join/mixed_join_kernels_semi.cuh" #include #include @@ -35,16 +33,16 @@ namespace cg = cooperative_groups; #pragma GCC diagnostic ignored "-Wattributes" template -CUDF_HIDDEN __launch_bounds__(block_size) __global__ - void mixed_join_semi(table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - cudf::detail::semi_map_type::device_view hash_table_view, - cudf::device_span left_table_keep_mask, - cudf::ast::detail::expression_device_view device_expression_data) +CUDF_KERNEL void __launch_bounds__(block_size) + mixed_join_semi(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + cudf::detail::semi_map_type::device_view hash_table_view, + cudf::device_span left_table_keep_mask, + cudf::ast::detail::expression_device_view device_expression_data) { // Normally the casting of a shared memory array is used to create multiple // arrays of different types from the shared memory buffer, but here it is @@ -75,28 +73,46 @@ CUDF_HIDDEN __launch_bounds__(block_size) __global__ } } -template __global__ void mixed_join_semi( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - cudf::detail::semi_map_type::device_view hash_table_view, - cudf::device_span left_table_keep_mask, - cudf::ast::detail::expression_device_view device_expression_data); - -template __global__ void mixed_join_semi( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - cudf::detail::semi_map_type::device_view hash_table_view, - cudf::device_span left_table_keep_mask, - cudf::ast::detail::expression_device_view device_expression_data); +void launch_mixed_join_semi(bool has_nulls, + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + cudf::detail::semi_map_type::device_view hash_table_view, + cudf::device_span left_table_keep_mask, + cudf::ast::detail::expression_device_view device_expression_data, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream) +{ + if (has_nulls) { + mixed_join_semi + <<>>( + left_table, + right_table, + probe, + build, + hash_probe, + equality_probe, + hash_table_view, + left_table_keep_mask, + device_expression_data); + } else { + mixed_join_semi + <<>>( + left_table, + right_table, + probe, + build, + hash_probe, + equality_probe, + hash_table_view, + left_table_keep_mask, + device_expression_data); + } +} } // namespace detail - } // namespace cudf diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh index 4ea404d451c..43714ffb36a 100644 --- a/cpp/src/join/mixed_join_kernels_semi.cuh +++ b/cpp/src/join/mixed_join_kernels_semi.cuh @@ -16,8 +16,9 @@ #pragma once -#include "join/join_common_utils.hpp" -#include "join/mixed_join_common_utils.cuh" +#include "join_common_utils.cuh" +#include "join_common_utils.hpp" +#include "mixed_join_common_utils.cuh" #include #include @@ -39,6 +40,7 @@ namespace detail { * @tparam block_size The number of threads per block for this kernel * @tparam has_nulls Whether or not the inputs may contain nulls. * + * @param[in] has_nulls If the input has nulls * @param[in] left_table The left table * @param[in] right_table The right table * @param[in] probe The table with which to probe the hash table for matches. @@ -51,16 +53,19 @@ namespace detail { * @param[in] device_expression_data Container of device data required to evaluate the desired * expression. */ -template -__global__ void mixed_join_semi(table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - cudf::detail::semi_map_type::device_view hash_table_view, - cudf::device_span left_table_keep_mask, - cudf::ast::detail::expression_device_view device_expression_data); +void launch_mixed_join_semi(bool has_nulls, + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + cudf::detail::semi_map_type::device_view hash_table_view, + cudf::device_span left_table_keep_mask, + cudf::ast::detail::expression_device_view device_expression_data, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream); } // namespace detail diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu index c147ea3c253..aa4fa281159 100644 --- a/cpp/src/join/mixed_join_semi.cu +++ b/cpp/src/join/mixed_join_semi.cu @@ -29,12 +29,11 @@ #include #include #include -#include +#include #include #include #include -#include #include #include @@ -163,11 +162,12 @@ std::unique_ptr> mixed_join_semi( cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build}; auto const equality_probe = row_comparator.equal_to(has_nulls, compare_nulls); - semi_map_type hash_table{compute_hash_table_size(build.num_rows()), - cuco::empty_key{std::numeric_limits::max()}, - cuco::empty_value{cudf::detail::JoinNoneValue}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + semi_map_type hash_table{ + compute_hash_table_size(build.num_rows()), + cuco::empty_key{std::numeric_limits::max()}, + cuco::empty_value{cudf::detail::JoinNoneValue}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; // Create hash table containing all keys found in right table // TODO: To add support for nested columns we will need to flatten in many @@ -207,7 +207,7 @@ std::unique_ptr> mixed_join_semi( } else { thrust::counting_iterator stencil(0); auto const [row_bitmask, _] = - cudf::detail::bitmask_and(build, stream, rmm::mr::get_current_device_resource()); + cudf::detail::bitmask_and(build, stream, cudf::get_current_device_resource_ref()); row_is_valid pred{static_cast(row_bitmask.data())}; // insert valid rows @@ -226,31 +226,19 @@ std::unique_ptr> mixed_join_semi( // Vector used to indicate indices from left/probe table which are present in output auto left_table_keep_mask = rmm::device_uvector(probe.num_rows(), stream); - if (has_nulls) { - mixed_join_semi - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - hash_table_view, - cudf::device_span(left_table_keep_mask), - parser.device_expression_data); - } else { - mixed_join_semi - <<>>( - *left_conditional_view, - *right_conditional_view, - *probe_view, - *build_view, - hash_probe, - equality_probe, - hash_table_view, - cudf::device_span(left_table_keep_mask), - parser.device_expression_data); - } + launch_mixed_join_semi(has_nulls, + *left_conditional_view, + *right_conditional_view, + *probe_view, + *build_view, + hash_probe, + equality_probe, + hash_table_view, + cudf::device_span(left_table_keep_mask), + parser.device_expression_data, + config, + shmem_size_per_block, + stream); auto gather_map = std::make_unique>(probe.num_rows(), stream, mr); @@ -278,6 +266,7 @@ std::unique_ptr> mixed_left_semi_join( table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -288,7 +277,7 @@ std::unique_ptr> mixed_left_semi_join( binary_predicate, compare_nulls, detail::join_kind::LEFT_SEMI_JOIN, - cudf::get_default_stream(), + stream, mr); } @@ -299,6 +288,7 @@ std::unique_ptr> mixed_left_anti_join( table_view const& right_conditional, ast::expression const& binary_predicate, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); @@ -309,7 +299,7 @@ std::unique_ptr> mixed_left_anti_join( binary_predicate, compare_nulls, detail::join_kind::LEFT_ANTI_JOIN, - cudf::get_default_stream(), + stream, mr); } diff --git a/cpp/src/join/mixed_join_size_kernel.cu b/cpp/src/join/mixed_join_size_kernel.cu index 4011acb65d6..4882c8769e6 100644 --- a/cpp/src/join/mixed_join_size_kernel.cu +++ b/cpp/src/join/mixed_join_size_kernel.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,11 +15,12 @@ */ #include "mixed_join_size_kernel.cuh" +#include "mixed_join_size_kernel.hpp" namespace cudf { namespace detail { -template __global__ void compute_mixed_join_output_size( +template std::size_t launch_compute_mixed_join_output_size( table_device_view left_table, table_device_view right_table, table_device_view probe, @@ -30,8 +31,11 @@ template __global__ void compute_mixed_join_output_size matches_per_row); + cudf::device_span matches_per_row, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh index 00a90f8273f..84e9be45030 100644 --- a/cpp/src/join/mixed_join_size_kernel.cuh +++ b/cpp/src/join/mixed_join_size_kernel.cuh @@ -36,19 +36,19 @@ namespace cg = cooperative_groups; #pragma GCC diagnostic ignored "-Wattributes" template -CUDF_HIDDEN __launch_bounds__(block_size) __global__ void compute_mixed_join_output_size( - table_device_view left_table, - table_device_view right_table, - table_device_view probe, - table_device_view build, - row_hash const hash_probe, - row_equality const equality_probe, - join_kind const join_type, - cudf::detail::mixed_multimap_type::device_view hash_table_view, - ast::detail::expression_device_view device_expression_data, - bool const swap_tables, - std::size_t* output_size, - cudf::device_span matches_per_row) +CUDF_KERNEL void __launch_bounds__(block_size) + compute_mixed_join_output_size(table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + std::size_t* output_size, + cudf::device_span matches_per_row) { // The (required) extern storage of the shared memory array leads to // conflicting declarations between different templates. The easiest @@ -103,5 +103,43 @@ CUDF_HIDDEN __launch_bounds__(block_size) __global__ void compute_mixed_join_out } } +template +std::size_t launch_compute_mixed_join_output_size( + table_device_view left_table, + table_device_view right_table, + table_device_view probe, + table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + cudf::device_span matches_per_row, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + // Allocate storage for the counter used to get the size of the join output + rmm::device_scalar size(0, stream, mr); + + compute_mixed_join_output_size + <<>>( + left_table, + right_table, + probe, + build, + hash_probe, + equality_probe, + join_type, + hash_table_view, + device_expression_data, + swap_tables, + size.data(), + matches_per_row); + return size.value(stream); +} + } // namespace detail } // namespace cudf diff --git a/cpp/src/join/mixed_join_size_kernel.hpp b/cpp/src/join/mixed_join_size_kernel.hpp new file mode 100644 index 00000000000..0f570c601d7 --- /dev/null +++ b/cpp/src/join/mixed_join_size_kernel.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "join_common_utils.cuh" +#include "join_common_utils.hpp" +#include "mixed_join_common_utils.cuh" + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +namespace CUDF_EXPORT cudf { +namespace detail { + +/** + * @brief Computes the output size of joining the left table to the right table. + * + * This method probes the hash table with each row in the probe table using a + * custom equality comparator that also checks that the conditional expression + * evaluates to true between the left/right tables when a match is found + * between probe and build rows. + * + * @tparam block_size The number of threads per block for this kernel + * @tparam has_nulls Whether or not the inputs may contain nulls. + * + * @param[in] left_table The left table + * @param[in] right_table The right table + * @param[in] probe The table with which to probe the hash table for matches. + * @param[in] build The table with which the hash table was built. + * @param[in] hash_probe The hasher used for the probe table. + * @param[in] equality_probe The equality comparator used when probing the hash table. + * @param[in] join_type The type of join to be performed + * @param[in] hash_table_view The hash table built from `build`. + * @param[in] device_expression_data Container of device data required to evaluate the desired + * expression. + * @param[in] swap_tables If true, the kernel was launched with one thread per right row and + * the kernel needs to internally loop over left rows. Otherwise, loop over right rows. + * @param[out] output_size The resulting output size + * @param[out] matches_per_row The number of matches in one pair of + * equality/conditional tables for each row in the other pair of tables. If + * swap_tables is true, matches_per_row corresponds to the right_table, + * otherwise it corresponds to the left_table. Note that corresponding swap of + * left/right tables to determine which is the build table and which is the + * probe table has already happened on the host. + */ + +template +std::size_t launch_compute_mixed_join_output_size( + cudf::table_device_view left_table, + cudf::table_device_view right_table, + cudf::table_device_view probe, + cudf::table_device_view build, + row_hash const hash_probe, + row_equality const equality_probe, + join_kind const join_type, + cudf::detail::mixed_multimap_type::device_view hash_table_view, + ast::detail::expression_device_view device_expression_data, + bool const swap_tables, + cudf::device_span matches_per_row, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); +} // namespace detail +} // namespace CUDF_EXPORT cudf diff --git a/cpp/src/join/mixed_join_size_kernel_nulls.cu b/cpp/src/join/mixed_join_size_kernel_nulls.cu index 2868113bf33..11f9103da4d 100644 --- a/cpp/src/join/mixed_join_size_kernel_nulls.cu +++ b/cpp/src/join/mixed_join_size_kernel_nulls.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ namespace cudf { namespace detail { -template __global__ void compute_mixed_join_output_size( +template std::size_t launch_compute_mixed_join_output_size( table_device_view left_table, table_device_view right_table, table_device_view probe, @@ -30,8 +30,10 @@ template __global__ void compute_mixed_join_output_size matches_per_row); - + cudf::device_span matches_per_row, + detail::grid_1d const config, + int64_t shmem_size_per_block, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); } // namespace detail } // namespace cudf diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index 91d98d5e8d3..d2ab2122c75 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -23,13 +23,12 @@ #include #include #include -#include #include +#include #include #include #include -#include #include #include @@ -72,7 +71,7 @@ std::unique_ptr> left_semi_anti_join( compare_nulls, nan_equality::ALL_EQUAL, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const left_num_rows = left_keys.num_rows(); auto gather_map = @@ -98,22 +97,24 @@ std::unique_ptr> left_semi_join( cudf::table_view const& left, cudf::table_view const& right, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::left_semi_anti_join( - detail::join_kind::LEFT_SEMI_JOIN, left, right, compare_nulls, cudf::get_default_stream(), mr); + detail::join_kind::LEFT_SEMI_JOIN, left, right, compare_nulls, stream, mr); } std::unique_ptr> left_anti_join( cudf::table_view const& left, cudf::table_view const& right, null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); return detail::left_semi_anti_join( - detail::join_kind::LEFT_ANTI_JOIN, left, right, compare_nulls, cudf::get_default_stream(), mr); + detail::join_kind::LEFT_ANTI_JOIN, left, right, compare_nulls, stream, mr); } } // namespace cudf diff --git a/cpp/src/json/json_path.cu b/cpp/src/json/json_path.cu index d1a1097de35..59fdbedf089 100644 --- a/cpp/src/json/json_path.cu +++ b/cpp/src/json/json_path.cu @@ -34,12 +34,12 @@ #include #include #include +#include #include #include -#include -#include +#include #include #include #include @@ -207,7 +207,7 @@ class parser { struct json_output { size_t output_max_len; char* output; - thrust::optional output_len; + cuda::std::optional output_len; __device__ void add_output(char const* str, size_t len) { @@ -656,7 +656,7 @@ class path_state : private parser { * @param stream Cuda stream to perform any gpu actions on * @returns A pair containing the command buffer, and maximum stack depth required. */ -std::pair>, int> build_command_buffer( +std::pair>, int> build_command_buffer( cudf::string_scalar const& json_path, rmm::cuda_stream_view stream) { std::string h_json_path = json_path.to_string(stream); @@ -690,9 +690,9 @@ std::pair>, int> build_comma } while (op.type != path_operator_type::END); auto const is_empty = h_operators.size() == 1 && h_operators[0].type == path_operator_type::END; - return is_empty ? std::pair(thrust::nullopt, 0) - : std::pair(thrust::make_optional(cudf::detail::make_device_uvector_sync( - h_operators, stream, rmm::mr::get_current_device_resource())), + return is_empty ? std::pair(cuda::std::nullopt, 0) + : std::pair(cuda::std::make_optional(cudf::detail::make_device_uvector_sync( + h_operators, stream, cudf::get_current_device_resource_ref())), max_stack_depth); } @@ -920,9 +920,9 @@ __launch_bounds__(block_size) CUDF_KERNEL path_operator const* const commands, size_type* d_sizes, cudf::detail::input_offsetalator output_offsets, - thrust::optional out_buf, - thrust::optional out_validity, - thrust::optional out_valid_count, + cuda::std::optional out_buf, + cuda::std::optional out_validity, + cuda::std::optional out_valid_count, get_json_object_options options) { auto tid = cudf::detail::grid_1d::global_thread_id(); @@ -999,7 +999,7 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c // compute output sizes auto sizes = - rmm::device_uvector(col.size(), stream, rmm::mr::get_current_device_resource()); + rmm::device_uvector(col.size(), stream, cudf::get_current_device_resource_ref()); auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(col.offsets()); constexpr int block_size = 512; @@ -1012,9 +1012,9 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c std::get<0>(preprocess).value().data(), sizes.data(), d_offsets, - thrust::nullopt, - thrust::nullopt, - thrust::nullopt, + cuda::std::nullopt, + cuda::std::nullopt, + cuda::std::nullopt, options); // convert sizes to offsets diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu index 7ee1d540831..18a500069ad 100644 --- a/cpp/src/labeling/label_bins.cu +++ b/cpp/src/labeling/label_bins.cu @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu index 58ec053712d..7ae5db3e84b 100644 --- a/cpp/src/lists/combine/concatenate_list_elements.cu +++ b/cpp/src/lists/combine/concatenate_list_elements.cu @@ -27,10 +27,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu index bc1b48b11cd..790c99c494d 100644 --- a/cpp/src/lists/combine/concatenate_rows.cu +++ b/cpp/src/lists/combine/concatenate_rows.cu @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -219,7 +219,7 @@ std::unique_ptr concatenate_rows(table_view const& input, // concatenate the input table into one column. std::vector cols(input.num_columns()); std::copy(input.begin(), input.end(), cols.begin()); - auto concat = cudf::detail::concatenate(cols, stream, rmm::mr::get_current_device_resource()); + auto concat = cudf::detail::concatenate(cols, stream, cudf::get_current_device_resource_ref()); // whether or not we should be generating a null mask at all auto const build_null_mask = concat->has_nulls(); @@ -251,7 +251,7 @@ std::unique_ptr concatenate_rows(table_view const& input, return row_null_counts[row_index] != num_columns; }), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } // NULLIFY_OUTPUT_ROW. Output row is nullfied if any input row is null return cudf::detail::valid_if( @@ -264,7 +264,7 @@ std::unique_ptr concatenate_rows(table_view const& input, return row_null_counts[row_index] == 0; }), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); }(); concat->set_null_mask(std::move(null_mask), null_count); } diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 30c03a8cd68..9556ef23784 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -28,11 +28,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -317,7 +316,7 @@ std::unique_ptr contains(lists_column_view const& lists, search_key, duplicate_find_option::FIND_FIRST, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return to_contains(std::move(key_indices), stream, mr); } @@ -333,7 +332,7 @@ std::unique_ptr contains(lists_column_view const& lists, search_keys, duplicate_find_option::FIND_FIRST, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return to_contains(std::move(key_indices), stream, mr); } diff --git a/cpp/src/lists/copying/concatenate.cu b/cpp/src/lists/copying/concatenate.cu index 8cd58e7eff2..c8bc4799688 100644 --- a/cpp/src/lists/copying/concatenate.cu +++ b/cpp/src/lists/copying/concatenate.cu @@ -25,10 +25,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/lists/copying/copying.cu b/cpp/src/lists/copying/copying.cu index 162c6140656..b4c0fb12b8e 100644 --- a/cpp/src/lists/copying/copying.cu +++ b/cpp/src/lists/copying/copying.cu @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/lists/copying/gather.cu b/cpp/src/lists/copying/gather.cu index cadeb273a65..0df1801b99b 100644 --- a/cpp/src/lists/copying/gather.cu +++ b/cpp/src/lists/copying/gather.cu @@ -16,9 +16,9 @@ #include #include +#include #include -#include #include #include diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu index b754fef24e5..9cbb3c59510 100644 --- a/cpp/src/lists/copying/scatter_helper.cu +++ b/cpp/src/lists/copying/scatter_helper.cu @@ -21,10 +21,9 @@ #include #include #include +#include #include -#include - #include #include #include diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu index 90f7994b21d..f6e48f141e1 100644 --- a/cpp/src/lists/copying/segmented_gather.cu +++ b/cpp/src/lists/copying/segmented_gather.cu @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/lists/count_elements.cu b/cpp/src/lists/count_elements.cu index 19c434d10e1..78f78ff6246 100644 --- a/cpp/src/lists/count_elements.cu +++ b/cpp/src/lists/count_elements.cu @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu index 50f40924478..469442d46d4 100644 --- a/cpp/src/lists/dremel.cu +++ b/cpp/src/lists/dremel.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -267,7 +268,7 @@ dremel_data get_encoding(column_view h_col, } auto d_nullability = cudf::detail::make_device_uvector_async( - nullability, stream, rmm::mr::get_current_device_resource()); + nullability, stream, cudf::get_current_device_resource_ref()); rmm::device_uvector rep_level(max_vals_size, stream); rmm::device_uvector def_level(max_vals_size, stream); diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu index 46c4fc78a6f..00e19e2e2cb 100644 --- a/cpp/src/lists/explode.cu +++ b/cpp/src/lists/explode.cu @@ -21,14 +21,15 @@ #include #include #include +#include #include #include #include #include -#include #include +#include #include #include #include @@ -36,7 +37,6 @@ #include #include #include -#include #include #include @@ -57,8 +57,8 @@ std::unique_ptr
build_table( size_type const explode_column_idx, column_view const& sliced_child, cudf::device_span gather_map, - thrust::optional> explode_col_gather_map, - thrust::optional> position_array, + cuda::std::optional> explode_col_gather_map, + cuda::std::optional> position_array, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { @@ -143,8 +143,8 @@ std::unique_ptr
explode(table_view const& input_table, explode_column_idx, sliced_child, gather_map, - thrust::nullopt, - thrust::nullopt, + cuda::std::nullopt, + cuda::std::nullopt, stream, mr); } @@ -193,7 +193,7 @@ std::unique_ptr
explode_position(table_view const& input_table, explode_column_idx, sliced_child, gather_map, - thrust::nullopt, + cuda::std::nullopt, std::move(pos), stream, mr); @@ -292,7 +292,7 @@ std::unique_ptr
explode_outer(table_view const& input_table, sliced_child, gather_map, explode_col_gather_map, - include_position ? std::move(pos) : thrust::optional>{}, + include_position ? std::move(pos) : cuda::std::optional>{}, stream, mr); } diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu index c0ce86fb56e..b6d22955e67 100644 --- a/cpp/src/lists/extract.cu +++ b/cpp/src/lists/extract.cu @@ -26,10 +26,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -105,7 +105,7 @@ std::unique_ptr make_index_offsets(size_type num_lists, rmm::cuda_ return cudf::detail::sequence(num_lists + 1, cudf::scalar_type_t(0, true, stream), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } } // namespace diff --git a/cpp/src/lists/interleave_columns.cu b/cpp/src/lists/interleave_columns.cu index 45ae3671d4e..3d6fdda957b 100644 --- a/cpp/src/lists/interleave_columns.cu +++ b/cpp/src/lists/interleave_columns.cu @@ -24,12 +24,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -104,7 +104,7 @@ std::unique_ptr concatenate_and_gather_lists(host_span #include #include +#include #include #include -#include #include #include @@ -48,7 +48,7 @@ std::unique_ptr make_lists_column_from_scalar(list_scalar const& v stream, mr); } - auto mr_final = size == 1 ? mr : rmm::mr::get_current_device_resource(); + auto mr_final = size == 1 ? mr : cudf::get_current_device_resource_ref(); // Handcraft a 1-row column auto sizes_itr = thrust::constant_iterator(value.view().size()); diff --git a/cpp/src/lists/reverse.cu b/cpp/src/lists/reverse.cu index d913ce070ae..b80f6c882c8 100644 --- a/cpp/src/lists/reverse.cu +++ b/cpp/src/lists/reverse.cu @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -45,7 +45,7 @@ std::unique_ptr reverse(lists_column_view const& input, // The labels are also a map from each list element to its corresponding zero-based list index. auto const labels = - generate_labels(input, child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(input, child.size(), stream, cudf::get_current_device_resource_ref()); // The offsets of the output lists column. auto out_offsets = get_normalized_offsets(input, stream, mr); diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu index f920fb916eb..c78b6d793d4 100644 --- a/cpp/src/lists/segmented_sort.cu +++ b/cpp/src/lists/segmented_sort.cu @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu index 7d57d8ddb60..4b50bf626f2 100644 --- a/cpp/src/lists/sequences.cu +++ b/cpp/src/lists/sequences.cu @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu index 5c7ab68d64b..c0bc10dd266 100644 --- a/cpp/src/lists/set_operations.cu +++ b/cpp/src/lists/set_operations.cu @@ -27,12 +27,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -78,15 +78,15 @@ std::unique_ptr have_overlap(lists_column_view const& lhs, auto const lhs_child = lhs.get_sliced_child(stream); auto const rhs_child = rhs.get_sliced_child(stream); auto const lhs_labels = - generate_labels(lhs, lhs_child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(lhs, lhs_child.size(), stream, cudf::get_current_device_resource_ref()); auto const rhs_labels = - generate_labels(rhs, rhs_child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(rhs, rhs_child.size(), stream, cudf::get_current_device_resource_ref()); auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; // Check existence for each row of the rhs_table in lhs_table. auto const contained = cudf::detail::contains( - lhs_table, rhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); + lhs_table, rhs_table, nulls_equal, nans_equal, stream, cudf::get_current_device_resource_ref()); auto const num_rows = lhs.size(); @@ -148,20 +148,20 @@ std::unique_ptr intersect_distinct(lists_column_view const& lhs, auto const lhs_child = lhs.get_sliced_child(stream); auto const rhs_child = rhs.get_sliced_child(stream); auto const lhs_labels = - generate_labels(lhs, lhs_child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(lhs, lhs_child.size(), stream, cudf::get_current_device_resource_ref()); auto const rhs_labels = - generate_labels(rhs, rhs_child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(rhs, rhs_child.size(), stream, cudf::get_current_device_resource_ref()); auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; auto const contained = cudf::detail::contains( - lhs_table, rhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); + lhs_table, rhs_table, nulls_equal, nans_equal, stream, cudf::get_current_device_resource_ref()); auto const intersect_table = cudf::detail::copy_if( rhs_table, [contained = contained.begin()] __device__(auto const idx) { return contained[idx]; }, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // A stable algorithm is required to ensure that list labels remain contiguous. auto out_table = cudf::detail::stable_distinct(intersect_table->view(), @@ -205,7 +205,7 @@ std::unique_ptr union_distinct(lists_column_view const& lhs, lists::detail::concatenate_rows(table_view{{lhs.parent(), rhs.parent()}}, concatenate_null_policy::NULLIFY_OUTPUT_ROW, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return cudf::lists::detail::distinct( lists_column_view{union_col->view()}, nulls_equal, nans_equal, stream, mr); @@ -231,20 +231,20 @@ std::unique_ptr difference_distinct(lists_column_view const& lhs, auto const lhs_child = lhs.get_sliced_child(stream); auto const rhs_child = rhs.get_sliced_child(stream); auto const lhs_labels = - generate_labels(lhs, lhs_child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(lhs, lhs_child.size(), stream, cudf::get_current_device_resource_ref()); auto const rhs_labels = - generate_labels(rhs, rhs_child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(rhs, rhs_child.size(), stream, cudf::get_current_device_resource_ref()); auto const lhs_table = table_view{{lhs_labels->view(), lhs_child}}; auto const rhs_table = table_view{{rhs_labels->view(), rhs_child}}; auto const contained = cudf::detail::contains( - rhs_table, lhs_table, nulls_equal, nans_equal, stream, rmm::mr::get_current_device_resource()); + rhs_table, lhs_table, nulls_equal, nans_equal, stream, cudf::get_current_device_resource_ref()); auto const difference_table = cudf::detail::copy_if( lhs_table, [contained = contained.begin()] __device__(auto const idx) { return !contained[idx]; }, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // A stable algorithm is required to ensure that list labels remain contiguous. auto out_table = cudf::detail::stable_distinct(difference_table->view(), diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu index 71aafa3ce12..c78e9c22e2a 100644 --- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include #include #include @@ -73,7 +73,7 @@ std::unique_ptr apply_boolean_mask(lists_column_view const& input, null_policy::EXCLUDE, std::nullopt, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const d_sizes = column_device_view::create(*sizes, stream); auto const sizes_begin = cudf::detail::make_null_replacement_iterator(*d_sizes, size_type{0}); auto const sizes_end = sizes_begin + sizes->size(); diff --git a/cpp/src/lists/stream_compaction/distinct.cu b/cpp/src/lists/stream_compaction/distinct.cu index cdcb4aa957f..ab750de9ef2 100644 --- a/cpp/src/lists/stream_compaction/distinct.cu +++ b/cpp/src/lists/stream_compaction/distinct.cu @@ -25,9 +25,9 @@ #include #include #include +#include #include -#include #include #include @@ -50,7 +50,7 @@ std::unique_ptr distinct(lists_column_view const& input, auto const child = input.get_sliced_child(stream); auto const labels = - generate_labels(input, child.size(), stream, rmm::mr::get_current_device_resource()); + generate_labels(input, child.size(), stream, cudf::get_current_device_resource_ref()); auto const distinct_table = cudf::detail::stable_distinct(table_view{{labels->view(), child}}, // input table diff --git a/cpp/src/lists/utilities.cu b/cpp/src/lists/utilities.cu index 7fb960f02ca..53ddc27a8a5 100644 --- a/cpp/src/lists/utilities.cu +++ b/cpp/src/lists/utilities.cu @@ -19,8 +19,7 @@ #include #include #include - -#include +#include namespace cudf::lists::detail { diff --git a/cpp/src/lists/utilities.hpp b/cpp/src/lists/utilities.hpp index 218ad7872e9..c0fcf7b7182 100644 --- a/cpp/src/lists/utilities.hpp +++ b/cpp/src/lists/utilities.hpp @@ -18,10 +18,10 @@ #include #include +#include #include #include -#include namespace cudf::lists::detail { diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu index e2c8d49a4ab..b9e0da0a3fe 100644 --- a/cpp/src/merge/merge.cu +++ b/cpp/src/merge/merge.cu @@ -34,13 +34,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -247,7 +247,7 @@ index_vector generate_merged_indices(table_view const& left_table, auto rhs_device_view = table_device_view::create(right_table, stream); auto d_column_order = cudf::detail::make_device_uvector_async( - column_order, stream, rmm::mr::get_current_device_resource()); + column_order, stream, cudf::get_current_device_resource_ref()); if (has_nulls) { auto const new_null_precedence = [&]() { @@ -261,7 +261,7 @@ index_vector generate_merged_indices(table_view const& left_table, }(); auto d_null_precedence = cudf::detail::make_device_uvector_async( - new_null_precedence, stream, rmm::mr::get_current_device_resource()); + new_null_precedence, stream, cudf::get_current_device_resource_ref()); auto ineq_op = detail::row_lexicographic_tagged_comparator( *lhs_device_view, *rhs_device_view, d_column_order, d_null_precedence); @@ -307,7 +307,7 @@ index_vector generate_merged_indices_nested(table_view const& left_table, column_order, null_precedence, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const left_indices = left_indices_col->view(); auto left_indices_mutable = left_indices_col->mutable_view(); auto const left_indices_begin = left_indices.begin(); @@ -647,7 +647,7 @@ table_ptr_type merge(std::vector const& tables_to_merge, // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views. auto matched = cudf::dictionary::detail::match_dictionaries( - tables_to_merge, stream, rmm::mr::get_current_device_resource()); + tables_to_merge, stream, cudf::get_current_device_resource_ref()); auto merge_tables = matched.second; // A queue of (table view, table) pairs @@ -673,7 +673,7 @@ table_ptr_type merge(std::vector const& tables_to_merge, auto const right_table = top_and_pop(merge_queue); // Only use mr for the output table - auto const& new_tbl_mr = merge_queue.empty() ? mr : rmm::mr::get_current_device_resource(); + auto const& new_tbl_mr = merge_queue.empty() ? mr : cudf::get_current_device_resource_ref(); auto merged_table = merge(left_table.view, right_table.view, key_cols, diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu index f10388794fc..17008e80e79 100644 --- a/cpp/src/partitioning/partitioning.cu +++ b/cpp/src/partitioning/partitioning.cu @@ -27,11 +27,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -501,10 +501,10 @@ std::pair, std::vector> hash_partition_table( // Holds the total number of rows in each partition auto global_partition_sizes = cudf::detail::make_zeroed_device_uvector_async( - num_partitions, stream, rmm::mr::get_current_device_resource()); + num_partitions, stream, cudf::get_current_device_resource_ref()); auto row_partition_offset = cudf::detail::make_zeroed_device_uvector_async( - num_rows, stream, rmm::mr::get_current_device_resource()); + num_rows, stream, cudf::get_current_device_resource_ref()); auto const row_hasher = experimental::row::hash::row_hasher(table_to_hash, stream); auto const hasher = diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu index 9810373b751..5a4c90a67a5 100644 --- a/cpp/src/partitioning/round_robin.cu +++ b/cpp/src/partitioning/round_robin.cu @@ -26,12 +26,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 5d748de0019..80fd72a3088 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -30,11 +30,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -89,7 +89,7 @@ struct quantile_functor { auto d_output = mutable_column_device_view::create(output->mutable_view(), stream); auto q_device = - cudf::detail::make_device_uvector_sync(q, stream, rmm::mr::get_current_device_resource()); + cudf::detail::make_device_uvector_sync(q, stream, cudf::get_current_device_resource_ref()); if (!cudf::is_dictionary(input.type())) { auto sorted_data = diff --git a/cpp/src/quantiles/quantiles.cu b/cpp/src/quantiles/quantiles.cu index 0b0e6701304..69421f3bfc4 100644 --- a/cpp/src/quantiles/quantiles.cu +++ b/cpp/src/quantiles/quantiles.cu @@ -26,9 +26,9 @@ #include #include #include +#include #include -#include #include #include @@ -55,7 +55,7 @@ std::unique_ptr
quantiles(table_view const& input, }); auto const q_device = - cudf::detail::make_device_uvector_async(q, stream, rmm::mr::get_current_device_resource()); + cudf::detail::make_device_uvector_async(q, stream, cudf::get_current_device_resource_ref()); auto quantile_idx_iter = thrust::make_transform_iterator(q_device.begin(), quantile_idx_lookup); @@ -90,7 +90,7 @@ std::unique_ptr
quantiles(table_view const& input, input, thrust::make_counting_iterator(0), q, interp, stream, mr); } else { auto sorted_idx = detail::sorted_order( - input, column_order, null_precedence, stream, rmm::mr::get_current_device_resource()); + input, column_order, null_precedence, stream, cudf::get_current_device_resource_ref()); return detail::quantiles(input, sorted_idx->view().data(), q, interp, stream, mr); } } diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu index 421ed26e26d..0d017cf1f13 100644 --- a/cpp/src/quantiles/tdigest/tdigest.cu +++ b/cpp/src/quantiles/tdigest/tdigest.cu @@ -25,10 +25,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -199,7 +199,7 @@ std::unique_ptr compute_approx_percentiles(tdigest_column_view const& in weight.size(), mask_state::UNALLOCATED, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto keys = cudf::detail::make_counting_transform_iterator( 0, cuda::proclaim_return_type( diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu index 229af89fc46..e1c1d2e3002 100644 --- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu +++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu @@ -29,11 +29,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -1021,6 +1021,76 @@ struct group_key_func { } }; +// merges all the tdigests within each group. returns a table containing 2 columns: +// the sorted means and weights. +template +std::pair, rmm::device_uvector> generate_merged_centroids( + tdigest_column_view const& tdv, + GroupOffsetIter group_offsets, + size_type num_groups, + rmm::cuda_stream_view stream) +{ + auto temp_mr = cudf::get_current_device_resource_ref(); + + auto const total_merged_centroids = tdv.means().size(); + + // output is the merged centroids (means, weights) + rmm::device_uvector output_means(total_merged_centroids, stream, temp_mr); + rmm::device_uvector output_weights(total_merged_centroids, stream, temp_mr); + + // each group represents a collection of tdigest columns. each row is 1 tdigest. + // within each group, we want to sort all the centroids within all the tdigests + // in that group, using the means as the key. the "outer offsets" represent the indices of the + // tdigests, and the "inner offsets" represents the list of centroids for a particular tdigest. + // + // rows + // ---- centroid 0 --------- + // tdigest 0 centroid 1 + // ---- centroid 2 group 0 + // tdigest 1 centroid 3 + // ---- centroid 4 --------- + // tdigest 2 centroid 5 + // ---- centroid 6 group 1 + // tdigest 3 centroid 7 + // centroid 8 + // ---- centroid 9 -------- + auto inner_offsets = tdv.centroids().offsets(); + auto centroid_offsets = cudf::detail::make_counting_transform_iterator( + 0, + cuda::proclaim_return_type( + [group_offsets, inner_offsets = tdv.centroids().offsets().begin()] __device__( + size_type i) { return inner_offsets[group_offsets[i]]; })); + + // perform the sort using the means as the key + size_t temp_size; + CUDF_CUDA_TRY(cub::DeviceSegmentedSort::SortPairs(nullptr, + temp_size, + tdv.means().begin(), + output_means.begin(), + tdv.weights().begin(), + output_weights.begin(), + total_merged_centroids, + num_groups, + centroid_offsets, + centroid_offsets + 1, + stream.value())); + + rmm::device_buffer temp_mem(temp_size, stream, temp_mr); + CUDF_CUDA_TRY(cub::DeviceSegmentedSort::SortPairs(temp_mem.data(), + temp_size, + tdv.means().begin(), + output_means.begin(), + tdv.weights().begin(), + output_weights.begin(), + total_merged_centroids, + num_groups, + centroid_offsets, + centroid_offsets + 1, + stream.value())); + + return {std::move(output_means), std::move(output_weights)}; +} + template std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, HGroupOffsetIter h_outer_offsets, @@ -1032,59 +1102,6 @@ std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - // thrust::merge and thrust::merge_by_key don't provide what we need. What we would need is an - // algorithm like a super-merge that takes two layers of keys: one which identifies the outer - // grouping of tdigests, and one which identifies the inner groupings of the tdigests within the - // outer groups. - // TODO: investigate replacing the iterative merge with a single stable_sort_by_key. - - // bring tdigest offsets back to the host - auto tdigest_offsets = tdv.centroids().offsets(); - std::vector h_inner_offsets(tdigest_offsets.size()); - cudaMemcpyAsync(h_inner_offsets.data(), - tdigest_offsets.begin(), - sizeof(size_type) * tdigest_offsets.size(), - cudaMemcpyDefault, - stream); - - stream.synchronize(); - - // extract all means and weights into a table - cudf::table_view tdigests_unsliced({tdv.means(), tdv.weights()}); - - // generate the merged (but not yet compressed) tdigests for each group. - std::vector> tdigests; - tdigests.reserve(num_groups); - std::transform(h_outer_offsets, - h_outer_offsets + num_groups, - std::next(h_outer_offsets), - std::back_inserter(tdigests), - [&](auto tdigest_start, auto tdigest_end) { - // the range of tdigests in this group - auto const num_tdigests = tdigest_end - tdigest_start; - - // slice each tdigest from the input - std::vector unmerged_tdigests; - unmerged_tdigests.reserve(num_tdigests); - auto offset_iter = std::next(h_inner_offsets.begin(), tdigest_start); - std::transform( - offset_iter, - offset_iter + num_tdigests, - std::next(offset_iter), - std::back_inserter(unmerged_tdigests), - [&](size_type start, size_type end) { - return cudf::detail::slice(tdigests_unsliced, {start, end}, stream); - }); - - // merge - return cudf::detail::merge(unmerged_tdigests, - {0}, - {order::ASCENDING}, - {}, - stream, - rmm::mr::get_current_device_resource()); - }); - // generate min and max values auto merged_min_col = cudf::make_numeric_column( data_type{type_id::FLOAT64}, num_groups, mask_state::UNALLOCATED, stream, mr); @@ -1121,7 +1138,7 @@ std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, auto group_num_weights = cudf::detail::make_counting_transform_iterator( 0, group_num_weights_func{group_offsets, - tdigest_offsets.begin()}); + tdv.centroids().offsets().begin()}); thrust::replace_if(rmm::exec_policy(stream), merged_min_col->mutable_view().begin(), merged_min_col->mutable_view().end(), @@ -1135,29 +1152,33 @@ std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, group_is_empty{}, 0); - // concatenate all the merged tdigests back into one table. - std::vector tdigest_views; - tdigest_views.reserve(num_groups); - std::transform(tdigests.begin(), - tdigests.end(), - std::back_inserter(tdigest_views), - [](std::unique_ptr
const& t) { return t->view(); }); - auto merged = - cudf::detail::concatenate(tdigest_views, stream, rmm::mr::get_current_device_resource()); + auto temp_mr = cudf::get_current_device_resource_ref(); + + // merge the centroids + auto [merged_means, merged_weights] = + generate_merged_centroids(tdv, group_offsets, num_groups, stream); + size_t const num_centroids = tdv.means().size(); + CUDF_EXPECTS(merged_means.size() == num_centroids, + "Unexpected number of centroids in merged result"); // generate cumulative weights - auto merged_weights = merged->get_column(1).view(); - auto cumulative_weights = cudf::make_numeric_column( - data_type{type_id::FLOAT64}, merged_weights.size(), mask_state::UNALLOCATED, stream); - auto keys = cudf::detail::make_counting_transform_iterator( - 0, - group_key_func{ - group_labels, tdigest_offsets.begin(), tdigest_offsets.size()}); + rmm::device_uvector cumulative_weights(merged_weights.size(), stream, temp_mr); + + // generate group keys for all centroids in the entire column + rmm::device_uvector group_keys(num_centroids, stream, temp_mr); + auto iter = thrust::make_counting_iterator(0); + auto inner_offsets = tdv.centroids().offsets(); + thrust::transform(rmm::exec_policy(stream), + iter, + iter + num_centroids, + group_keys.begin(), + group_key_func{ + group_labels, inner_offsets.begin(), inner_offsets.size()}); thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - keys, - keys + cumulative_weights->size(), - merged_weights.begin(), - cumulative_weights->mutable_view().begin()); + group_keys.begin(), + group_keys.begin() + num_centroids, + merged_weights.begin(), + cumulative_weights.begin()); auto const delta = max_centroids; @@ -1166,37 +1187,32 @@ std::unique_ptr merge_tdigests(tdigest_column_view const& tdv, delta, num_groups, nearest_value_centroid_weights{ - cumulative_weights->view().begin(), - group_offsets, - tdigest_offsets.begin()}, - centroid_group_info{cumulative_weights->view().begin(), - group_offsets, - tdigest_offsets.begin()}, + cumulative_weights.begin(), group_offsets, inner_offsets.begin()}, + centroid_group_info{ + cumulative_weights.begin(), group_offsets, inner_offsets.begin()}, cumulative_centroid_weight{ - cumulative_weights->view().begin(), + cumulative_weights.begin(), group_labels, group_offsets, - {tdigest_offsets.begin(), static_cast(tdigest_offsets.size())}}, + {inner_offsets.begin(), static_cast(inner_offsets.size())}}, false, stream, mr); // input centroid values auto centroids = cudf::detail::make_counting_transform_iterator( - 0, - make_weighted_centroid{merged->get_column(0).view().begin(), - merged_weights.begin()}); + 0, make_weighted_centroid{merged_means.begin(), merged_weights.begin()}); // compute the tdigest return compute_tdigests( delta, centroids, - centroids + merged->num_rows(), + centroids + merged_means.size(), cumulative_centroid_weight{ - cumulative_weights->view().begin(), + cumulative_weights.begin(), group_labels, group_offsets, - {tdigest_offsets.begin(), static_cast(tdigest_offsets.size())}}, + {inner_offsets.begin(), static_cast(inner_offsets.size())}}, std::move(merged_min_col), std::move(merged_max_col), group_cluster_wl, @@ -1220,7 +1236,7 @@ std::unique_ptr reduce_tdigest(column_view const& col, // order with nulls at the end. table_view t({col}); auto sorted = cudf::detail::sort( - t, {order::ASCENDING}, {null_order::AFTER}, stream, rmm::mr::get_current_device_resource()); + t, {order::ASCENDING}, {null_order::AFTER}, stream, cudf::get_current_device_resource_ref()); auto const delta = max_centroids; return cudf::type_dispatcher( diff --git a/cpp/src/reductions/all.cu b/cpp/src/reductions/all.cu index 11b0e2732fe..67ea29a2cb1 100644 --- a/cpp/src/reductions/all.cu +++ b/cpp/src/reductions/all.cu @@ -18,8 +18,7 @@ #include #include - -#include +#include #include #include @@ -66,7 +65,7 @@ struct all_fn { cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); }(); - auto d_result = rmm::device_scalar(1, stream, rmm::mr::get_current_device_resource()); + auto d_result = rmm::device_scalar(1, stream, cudf::get_current_device_resource_ref()); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), input.size(), diff --git a/cpp/src/reductions/any.cu b/cpp/src/reductions/any.cu index 0ebeb7a48b9..057f038c622 100644 --- a/cpp/src/reductions/any.cu +++ b/cpp/src/reductions/any.cu @@ -18,8 +18,7 @@ #include #include - -#include +#include #include #include @@ -66,7 +65,7 @@ struct any_fn { cudf::dictionary::detail::make_dictionary_pair_iterator(*d_dict, input.has_nulls()); return thrust::make_transform_iterator(pair_iter, null_iter); }(); - auto d_result = rmm::device_scalar(0, stream, rmm::mr::get_current_device_resource()); + auto d_result = rmm::device_scalar(0, stream, cudf::get_current_device_resource_ref()); thrust::for_each_n(rmm::exec_policy(stream), thrust::make_counting_iterator(0), input.size(), diff --git a/cpp/src/reductions/collect_ops.cu b/cpp/src/reductions/collect_ops.cu index c1a1f117ee1..01dfb8f2c7d 100644 --- a/cpp/src/reductions/collect_ops.cu +++ b/cpp/src/reductions/collect_ops.cu @@ -22,8 +22,7 @@ #include #include #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh index aa71546f049..6bc8b48832f 100644 --- a/cpp/src/reductions/compound.cuh +++ b/cpp/src/reductions/compound.cuh @@ -19,11 +19,10 @@ #include #include #include +#include #include #include -#include - #include namespace cudf { diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu index bebb9d14923..362b5f74c46 100644 --- a/cpp/src/reductions/histogram.cu +++ b/cpp/src/reductions/histogram.cu @@ -20,8 +20,7 @@ #include #include #include - -#include +#include #include #include @@ -164,11 +163,13 @@ compute_row_frequencies(table_view const& input, "Nested types are not yet supported in histogram aggregation.", std::invalid_argument); - auto map = cudf::detail::hash_map_type{compute_hash_table_size(input.num_rows()), - cuco::empty_key{-1}, - cuco::empty_value{std::numeric_limits::min()}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto map = cudf::detail::hash_map_type{ + compute_hash_table_size(input.num_rows()), + cuco::empty_key{-1}, + cuco::empty_value{std::numeric_limits::min()}, + + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; auto const preprocessed_input = cudf::experimental::row::hash::preprocessed_table::create(input, stream); @@ -221,7 +222,7 @@ compute_row_frequencies(table_view const& input, partial_counts ? partial_counts.value().begin() : nullptr}, histogram_count_type{0}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const input_it = thrust::make_zip_iterator( thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin())); diff --git a/cpp/src/reductions/max.cu b/cpp/src/reductions/max.cu index 682889f0fee..0434d043240 100644 --- a/cpp/src/reductions/max.cu +++ b/cpp/src/reductions/max.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/mean.cu b/cpp/src/reductions/mean.cu index e8a10f02cc1..c5ab501f607 100644 --- a/cpp/src/reductions/mean.cu +++ b/cpp/src/reductions/mean.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/min.cu b/cpp/src/reductions/min.cu index 7986bda5751..26b91ebe868 100644 --- a/cpp/src/reductions/min.cu +++ b/cpp/src/reductions/min.cu @@ -18,8 +18,7 @@ #include #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu index 2c1181972c5..139de068050 100644 --- a/cpp/src/reductions/minmax.cu +++ b/cpp/src/reductions/minmax.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include @@ -107,8 +107,7 @@ rmm::device_scalar reduce_device(InputIterator d_in, * respectively of the minimums and maximums of the input pairs. */ template -struct minmax_binary_op - : public thrust::binary_function, minmax_pair, minmax_pair> { +struct minmax_binary_op { __device__ minmax_pair operator()(minmax_pair const& lhs, minmax_pair const& rhs) const { return minmax_pair{thrust::min(lhs.min_val, rhs.min_val), diff --git a/cpp/src/reductions/nested_type_minmax_util.cuh b/cpp/src/reductions/nested_type_minmax_util.cuh index 3cf390d3574..6a2c4c44553 100644 --- a/cpp/src/reductions/nested_type_minmax_util.cuh +++ b/cpp/src/reductions/nested_type_minmax_util.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include #include +#include namespace cudf { namespace reduction { @@ -104,7 +105,7 @@ class comparison_binop_generator { std::vector{DEFAULT_NULL_ORDER}, cudf::structs::detail::column_nullability::MATCH_INCOMING, stream, - rmm::mr::get_current_device_resource())}, + cudf::get_current_device_resource_ref())}, row_comparator{[&input_, &input_tview = input_tview, &flattened_input = flattened_input, diff --git a/cpp/src/reductions/nth_element.cu b/cpp/src/reductions/nth_element.cu index e266f477c5d..4f6198696bd 100644 --- a/cpp/src/reductions/nth_element.cu +++ b/cpp/src/reductions/nth_element.cu @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/reductions/product.cu b/cpp/src/reductions/product.cu index 28ff8db3708..f5fd735a9f4 100644 --- a/cpp/src/reductions/product.cu +++ b/cpp/src/reductions/product.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index d4ea84742c7..d187375b69f 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -29,10 +29,10 @@ #include #include #include +#include #include #include -#include #include @@ -78,7 +78,7 @@ struct reduce_dispatch_functor { return standard_deviation(col, output_dtype, var_agg._ddof, stream, mr); } case aggregation::MEDIAN: { - auto current_mr = rmm::mr::get_current_device_resource(); + auto current_mr = cudf::get_current_device_resource_ref(); auto sorted_indices = cudf::detail::sorted_order( table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr); auto valid_sorted_indices = @@ -91,7 +91,7 @@ struct reduce_dispatch_functor { auto quantile_agg = static_cast(agg); CUDF_EXPECTS(quantile_agg._quantiles.size() == 1, "Reduction quantile accepts only one quantile value"); - auto current_mr = rmm::mr::get_current_device_resource(); + auto current_mr = cudf::get_current_device_resource_ref(); auto sorted_indices = cudf::detail::sorted_order( table_view{{col}}, {}, {null_order::AFTER}, stream, current_mr); auto valid_sorted_indices = diff --git a/cpp/src/reductions/scan/rank_scan.cu b/cpp/src/reductions/scan/rank_scan.cu index 0dbfc271a25..6d0adc83359 100644 --- a/cpp/src/reductions/scan/rank_scan.cu +++ b/cpp/src/reductions/scan/rank_scan.cu @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -135,7 +135,7 @@ std::unique_ptr inclusive_one_normalized_percent_rank_scan( column_view const& order_by, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { auto const rank_column = - inclusive_rank_scan(order_by, stream, rmm::mr::get_current_device_resource()); + inclusive_rank_scan(order_by, stream, cudf::get_current_device_resource_ref()); auto const rank_view = rank_column->view(); // Result type for min 0-index percent rank is independent of input type. diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp index de4dcf1de52..d3c0b54f286 100644 --- a/cpp/src/reductions/scan/scan.cpp +++ b/cpp/src/reductions/scan/scan.cpp @@ -20,8 +20,7 @@ #include #include #include - -#include +#include namespace cudf { diff --git a/cpp/src/reductions/scan/scan.cuh b/cpp/src/reductions/scan/scan.cuh index 6c237741ac3..76f98fe9a28 100644 --- a/cpp/src/reductions/scan/scan.cuh +++ b/cpp/src/reductions/scan/scan.cuh @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu index 7224bf47390..38ed0a68901 100644 --- a/cpp/src/reductions/scan/scan_exclusive.cu +++ b/cpp/src/reductions/scan/scan_exclusive.cu @@ -23,10 +23,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index ee35d716d6e..a876d54d45f 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -28,10 +28,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/reductions/segmented/all.cu b/cpp/src/reductions/segmented/all.cu index 489fc6a283c..e59e6a6896b 100644 --- a/cpp/src/reductions/segmented/all.cu +++ b/cpp/src/reductions/segmented/all.cu @@ -17,8 +17,7 @@ #include "simple.cuh" #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/any.cu b/cpp/src/reductions/segmented/any.cu index a9a8528548a..444ab689c39 100644 --- a/cpp/src/reductions/segmented/any.cu +++ b/cpp/src/reductions/segmented/any.cu @@ -17,8 +17,7 @@ #include "simple.cuh" #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/compound.cuh b/cpp/src/reductions/segmented/compound.cuh index 035a8bdcd75..77fabbe485f 100644 --- a/cpp/src/reductions/segmented/compound.cuh +++ b/cpp/src/reductions/segmented/compound.cuh @@ -22,11 +22,10 @@ #include #include #include +#include #include #include -#include - #include #include @@ -73,7 +72,7 @@ std::unique_ptr compound_segmented_reduction(column_view const& col, offsets, null_handling, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Run segmented reduction if (col.has_nulls()) { diff --git a/cpp/src/reductions/segmented/counts.cu b/cpp/src/reductions/segmented/counts.cu index 79737828678..5a072d6ca0a 100644 --- a/cpp/src/reductions/segmented/counts.cu +++ b/cpp/src/reductions/segmented/counts.cu @@ -17,8 +17,7 @@ #include "counts.hpp" #include - -#include +#include #include diff --git a/cpp/src/reductions/segmented/counts.hpp b/cpp/src/reductions/segmented/counts.hpp index f249644e564..c3f3e935f9a 100644 --- a/cpp/src/reductions/segmented/counts.hpp +++ b/cpp/src/reductions/segmented/counts.hpp @@ -17,11 +17,11 @@ #pragma once #include +#include #include #include #include -#include namespace cudf { class column_device_view; diff --git a/cpp/src/reductions/segmented/max.cu b/cpp/src/reductions/segmented/max.cu index 1c79edcc08c..49d0fe5f01c 100644 --- a/cpp/src/reductions/segmented/max.cu +++ b/cpp/src/reductions/segmented/max.cu @@ -17,8 +17,7 @@ #include "simple.cuh" #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/mean.cu b/cpp/src/reductions/segmented/mean.cu index 8df6bee97e9..a9919086c8d 100644 --- a/cpp/src/reductions/segmented/mean.cu +++ b/cpp/src/reductions/segmented/mean.cu @@ -17,9 +17,9 @@ #include "compound.cuh" #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/min.cu b/cpp/src/reductions/segmented/min.cu index ae1d5ae42a4..052c81bc2c7 100644 --- a/cpp/src/reductions/segmented/min.cu +++ b/cpp/src/reductions/segmented/min.cu @@ -17,8 +17,7 @@ #include "simple.cuh" #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/nunique.cu b/cpp/src/reductions/segmented/nunique.cu index d4fcf89e161..9b7e6f9fe57 100644 --- a/cpp/src/reductions/segmented/nunique.cu +++ b/cpp/src/reductions/segmented/nunique.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/reductions/segmented/product.cu b/cpp/src/reductions/segmented/product.cu index 1b82e7e5aec..84e54ce6b6c 100644 --- a/cpp/src/reductions/segmented/product.cu +++ b/cpp/src/reductions/segmented/product.cu @@ -17,8 +17,7 @@ #include "simple.cuh" #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index e6de065dabb..40d1d8a0a53 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/simple.cuh b/cpp/src/reductions/segmented/simple.cuh index da59df6b314..6c35e750e6b 100644 --- a/cpp/src/reductions/segmented/simple.cuh +++ b/cpp/src/reductions/segmented/simple.cuh @@ -28,12 +28,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -243,7 +243,7 @@ std::unique_ptr fixed_point_segmented_reduction( offsets, null_policy::EXCLUDE, // do not count nulls stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const max_count = thrust::reduce(rmm::exec_policy(stream), counts.begin(), diff --git a/cpp/src/reductions/segmented/std.cu b/cpp/src/reductions/segmented/std.cu index 0a7eb007f68..1d1a26e5176 100644 --- a/cpp/src/reductions/segmented/std.cu +++ b/cpp/src/reductions/segmented/std.cu @@ -17,9 +17,9 @@ #include "compound.cuh" #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/sum.cu b/cpp/src/reductions/segmented/sum.cu index bb06f6d7c8e..220148a7841 100644 --- a/cpp/src/reductions/segmented/sum.cu +++ b/cpp/src/reductions/segmented/sum.cu @@ -17,8 +17,7 @@ #include "simple.cuh" #include - -#include +#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/sum_of_squares.cu b/cpp/src/reductions/segmented/sum_of_squares.cu index 25d52f9bc79..6f3c1abd942 100644 --- a/cpp/src/reductions/segmented/sum_of_squares.cu +++ b/cpp/src/reductions/segmented/sum_of_squares.cu @@ -17,9 +17,9 @@ #include "simple.cuh" #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/segmented/update_validity.cu b/cpp/src/reductions/segmented/update_validity.cu index 92cfe5417ef..f0c3f0a0f0b 100644 --- a/cpp/src/reductions/segmented/update_validity.cu +++ b/cpp/src/reductions/segmented/update_validity.cu @@ -18,10 +18,9 @@ #include #include +#include #include -#include - namespace cudf { namespace reduction { namespace detail { diff --git a/cpp/src/reductions/segmented/update_validity.hpp b/cpp/src/reductions/segmented/update_validity.hpp index c143e1a4761..d60be8e92f4 100644 --- a/cpp/src/reductions/segmented/update_validity.hpp +++ b/cpp/src/reductions/segmented/update_validity.hpp @@ -19,10 +19,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/reductions/segmented/var.cu b/cpp/src/reductions/segmented/var.cu index 35f2771dfcf..f70943c19fc 100644 --- a/cpp/src/reductions/segmented/var.cu +++ b/cpp/src/reductions/segmented/var.cu @@ -17,9 +17,9 @@ #include "compound.cuh" #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index 372ceccf60b..e897deee8a3 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -27,12 +27,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -344,7 +344,7 @@ struct same_element_type_dispatcher { dictionary_column_view(col).get_indices_annotated(), init, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return resolve_key(dictionary_column_view(col).keys(), *index, stream, mr); } diff --git a/cpp/src/reductions/std.cu b/cpp/src/reductions/std.cu index 9c78b35313b..38076b52b14 100644 --- a/cpp/src/reductions/std.cu +++ b/cpp/src/reductions/std.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/sum.cu b/cpp/src/reductions/sum.cu index 51b251a836e..898eadb8435 100644 --- a/cpp/src/reductions/sum.cu +++ b/cpp/src/reductions/sum.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/sum_of_squares.cu b/cpp/src/reductions/sum_of_squares.cu index dc0eae56e98..49917f3009e 100644 --- a/cpp/src/reductions/sum_of_squares.cu +++ b/cpp/src/reductions/sum_of_squares.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/reductions/var.cu b/cpp/src/reductions/var.cu index aaab9dd4604..0e7b2fea9f8 100644 --- a/cpp/src/reductions/var.cu +++ b/cpp/src/reductions/var.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include namespace cudf { namespace reduction { diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu index cb3caf9d068..7f605f08d8d 100644 --- a/cpp/src/replace/clamp.cu +++ b/cpp/src/replace/clamp.cu @@ -34,11 +34,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -258,7 +258,7 @@ std::unique_ptr dispatch_clamp::operator()( return result; }(); auto matched_view = dictionary_column_view(matched_column->view()); - auto default_mr = rmm::mr::get_current_device_resource(); + auto default_mr = cudf::get_current_device_resource_ref(); // get the indexes for lo_replace and for hi_replace auto lo_replace_index = diff --git a/cpp/src/replace/nans.cu b/cpp/src/replace/nans.cu index eba6f6b436e..394c2a2de80 100644 --- a/cpp/src/replace/nans.cu +++ b/cpp/src/replace/nans.cu @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu index 13e130588c1..1df1549432f 100644 --- a/cpp/src/replace/nulls.cu +++ b/cpp/src/replace/nulls.cu @@ -37,13 +37,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu index c2cd03cd761..86ec8cfc91e 100644 --- a/cpp/src/replace/replace.cu +++ b/cpp/src/replace/replace.cu @@ -48,12 +48,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -262,14 +262,14 @@ std::unique_ptr replace_kernel_forwarder::operator()({values.keys(), replacements.keys()}), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return cudf::dictionary::detail::add_keys(input, new_keys->view(), stream, mr); }(); auto matched_view = cudf::dictionary_column_view(matched_input->view()); auto matched_values = cudf::dictionary::detail::set_keys( - values, matched_view.keys(), stream, rmm::mr::get_current_device_resource()); + values, matched_view.keys(), stream, cudf::get_current_device_resource_ref()); auto matched_replacements = cudf::dictionary::detail::set_keys( - replacements, matched_view.keys(), stream, rmm::mr::get_current_device_resource()); + replacements, matched_view.keys(), stream, cudf::get_current_device_resource_ref()); auto indices_type = matched_view.indices().type(); auto new_indices = cudf::type_dispatcher( diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu index 3dfa0b65814..0526594cbef 100644 --- a/cpp/src/reshape/byte_cast.cu +++ b/cpp/src/reshape/byte_cast.cu @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -167,11 +167,6 @@ struct byte_list_conversion_fn byte_cast(column_view const& input, flip_endianness endian_configuration, rmm::cuda_stream_view stream, @@ -183,15 +178,13 @@ std::unique_ptr byte_cast(column_view const& input, } // namespace detail -/** - * @copydoc cudf::byte_cast(column_view const&, flip_endianness, rmm::device_async_resource_ref) - */ std::unique_ptr byte_cast(column_view const& input, flip_endianness endian_configuration, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::byte_cast(input, endian_configuration, cudf::get_default_stream(), mr); + return detail::byte_cast(input, endian_configuration, stream, mr); } } // namespace cudf diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu index 79124508b11..6c47d6f2216 100644 --- a/cpp/src/reshape/interleave_columns.cu +++ b/cpp/src/reshape/interleave_columns.cu @@ -29,10 +29,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -264,10 +264,11 @@ std::unique_ptr interleave_columns(table_view const& input, } // namespace detail std::unique_ptr interleave_columns(table_view const& input, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::interleave_columns(input, cudf::get_default_stream(), mr); + return detail::interleave_columns(input, stream, mr); } } // namespace cudf diff --git a/cpp/src/reshape/tile.cu b/cpp/src/reshape/tile.cu index 29996aa2152..45c40df3aeb 100644 --- a/cpp/src/reshape/tile.cu +++ b/cpp/src/reshape/tile.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include @@ -64,10 +64,11 @@ std::unique_ptr
tile(table_view const& in, std::unique_ptr
tile(table_view const& in, size_type count, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::tile(in, count, cudf::get_default_stream(), mr); + return detail::tile(in, count, stream, mr); } } // namespace cudf diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh index cfedcac8ae4..5d5fe9e4aa3 100644 --- a/cpp/src/rolling/detail/lead_lag_nested.cuh +++ b/cpp/src/rolling/detail/lead_lag_nested.cuh @@ -24,12 +24,11 @@ #include #include #include +#include #include #include #include -#include -#include #include #include @@ -200,7 +199,7 @@ std::unique_ptr compute_lead_lag_for_nested(aggregation::Kind op, out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Scatter defaults into locations where LEAD/LAG computed nulls. auto scattered_results = cudf::detail::scatter( diff --git a/cpp/src/rolling/detail/nth_element.cuh b/cpp/src/rolling/detail/nth_element.cuh index 571f4c02cb5..ce1e666d5a0 100644 --- a/cpp/src/rolling/detail/nth_element.cuh +++ b/cpp/src/rolling/detail/nth_element.cuh @@ -21,9 +21,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/rolling/detail/optimized_unbounded_window.cpp b/cpp/src/rolling/detail/optimized_unbounded_window.cpp index 4175c6e34c1..72c23395a93 100644 --- a/cpp/src/rolling/detail/optimized_unbounded_window.cpp +++ b/cpp/src/rolling/detail/optimized_unbounded_window.cpp @@ -25,8 +25,7 @@ #include #include #include - -#include +#include namespace cudf::detail { @@ -143,7 +142,7 @@ std::unique_ptr reduction_based_rolling_window(column_view const& input, return_dtype, std::nullopt, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); } }(); // Blow up results into separate column. diff --git a/cpp/src/rolling/detail/optimized_unbounded_window.hpp b/cpp/src/rolling/detail/optimized_unbounded_window.hpp index 153586b187f..5adba764e9d 100644 --- a/cpp/src/rolling/detail/optimized_unbounded_window.hpp +++ b/cpp/src/rolling/detail/optimized_unbounded_window.hpp @@ -16,9 +16,9 @@ #include #include +#include #include -#include namespace rmm::mr { class device_memory_resource; diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh index c18bb9d9885..528700137bf 100644 --- a/cpp/src/rolling/detail/rolling.cuh +++ b/cpp/src/rolling/detail/rolling.cuh @@ -44,13 +44,13 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -928,7 +928,7 @@ class rolling_aggregation_postprocessor final : public cudf::detail::aggregation min_periods, agg._null_handling, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); result = lists::detail::distinct( lists_column_view{collected_list->view()}, agg._nulls_equal, agg._nans_equal, stream, mr); diff --git a/cpp/src/rolling/detail/rolling.hpp b/cpp/src/rolling/detail/rolling.hpp index 2624d982712..8820a6264e7 100644 --- a/cpp/src/rolling/detail/rolling.hpp +++ b/cpp/src/rolling/detail/rolling.hpp @@ -18,10 +18,9 @@ #include #include +#include #include -#include - namespace cudf { // helper functions - used in the rolling window implementation and tests diff --git a/cpp/src/rolling/detail/rolling_collect_list.cu b/cpp/src/rolling/detail/rolling_collect_list.cu index b259bd51fc4..8a98b65b406 100644 --- a/cpp/src/rolling/detail/rolling_collect_list.cu +++ b/cpp/src/rolling/detail/rolling_collect_list.cu @@ -18,9 +18,9 @@ #include #include +#include #include -#include #include #include diff --git a/cpp/src/rolling/detail/rolling_collect_list.cuh b/cpp/src/rolling/detail/rolling_collect_list.cuh index 7630898f820..f3eff6b0689 100644 --- a/cpp/src/rolling/detail/rolling_collect_list.cuh +++ b/cpp/src/rolling/detail/rolling_collect_list.cuh @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/rolling/detail/rolling_fixed_window.cu b/cpp/src/rolling/detail/rolling_fixed_window.cu index df0e72748ce..23424da13cd 100644 --- a/cpp/src/rolling/detail/rolling_fixed_window.cu +++ b/cpp/src/rolling/detail/rolling_fixed_window.cu @@ -20,8 +20,7 @@ #include #include - -#include +#include #include #include diff --git a/cpp/src/rolling/detail/rolling_variable_window.cu b/cpp/src/rolling/detail/rolling_variable_window.cu index 83e8faec291..c2324947ef6 100644 --- a/cpp/src/rolling/detail/rolling_variable_window.cu +++ b/cpp/src/rolling/detail/rolling_variable_window.cu @@ -18,8 +18,7 @@ #include #include - -#include +#include #include #include diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu index 1158bf22494..ac6c7b11ef5 100644 --- a/cpp/src/rolling/grouped_rolling.cu +++ b/cpp/src/rolling/grouped_rolling.cu @@ -28,8 +28,7 @@ #include #include #include - -#include +#include #include #include @@ -605,9 +604,9 @@ get_null_bounds_for_orderby_column(column_view const& orderby_column, // When there are no nulls, just copy the input group offsets to the output. return std::make_tuple(cudf::detail::make_device_uvector_async( - group_offsets_span, stream, rmm::mr::get_current_device_resource()), + group_offsets_span, stream, cudf::get_current_device_resource_ref()), cudf::detail::make_device_uvector_async( - group_offsets_span, stream, rmm::mr::get_current_device_resource())); + group_offsets_span, stream, cudf::get_current_device_resource_ref())); } } diff --git a/cpp/src/rolling/jit/operation.hpp b/cpp/src/rolling/jit/operation.hpp index f8a52c03d4e..3be739ec5bf 100644 --- a/cpp/src/rolling/jit/operation.hpp +++ b/cpp/src/rolling/jit/operation.hpp @@ -14,12 +14,12 @@ * limitations under the License. */ +#pragma once + #include "rolling/jit/operation-udf.hpp" #include -#pragma once - struct rolling_udf_ptx { template static OutType operate(InType const* in_col, cudf::size_type start, cudf::size_type count) diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu index 5dff40a3396..651bf26b8d9 100644 --- a/cpp/src/rolling/rolling.cu +++ b/cpp/src/rolling/rolling.cu @@ -20,8 +20,7 @@ #include #include #include - -#include +#include namespace cudf { diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu index 369ed039b66..8988d73fb02 100644 --- a/cpp/src/round/round.cu +++ b/cpp/src/round/round.cu @@ -30,11 +30,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp index 83209c55c8a..31535198c58 100644 --- a/cpp/src/scalar/scalar.cpp +++ b/cpp/src/scalar/scalar.cpp @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include @@ -591,7 +591,7 @@ table struct_scalar::init_data(table&& data, // push validity mask down auto const validity = cudf::detail::create_null_mask( - 1, mask_state::ALL_NULL, stream, rmm::mr::get_current_device_resource()); + 1, mask_state::ALL_NULL, stream, cudf::get_current_device_resource_ref()); for (auto& col : data_cols) { col = cudf::structs::detail::superimpose_nulls( static_cast(validity.data()), 1, std::move(col), stream, mr); diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index d59c5c9fc85..656fe61fbbe 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -19,11 +19,11 @@ #include #include #include +#include #include #include #include -#include namespace cudf { namespace { diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu index 57f2c59de40..5d21e8f662c 100644 --- a/cpp/src/search/contains_column.cu +++ b/cpp/src/search/contains_column.cu @@ -21,9 +21,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace detail { @@ -59,10 +59,10 @@ std::unique_ptr contains_column_dispatch::operator()( dictionary_column_view const needles(needles_in); // first combine keys so both dictionaries have the same set auto needles_matched = dictionary::detail::add_keys( - needles, haystack.keys(), stream, rmm::mr::get_current_device_resource()); + needles, haystack.keys(), stream, cudf::get_current_device_resource_ref()); auto const needles_view = dictionary_column_view(needles_matched->view()); auto haystack_matched = dictionary::detail::set_keys( - haystack, needles_view.keys(), stream, rmm::mr::get_current_device_resource()); + haystack, needles_view.keys(), stream, cudf::get_current_device_resource_ref()); auto const haystack_view = dictionary_column_view(haystack_matched->view()); // now just use the indices for the contains diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu index 2aa9e24174b..21f2d601d6b 100644 --- a/cpp/src/search/contains_scalar.cu +++ b/cpp/src/search/contains_scalar.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -146,7 +147,7 @@ bool contains_scalar_dispatch::operator()(column_view const& auto const dict_col = cudf::dictionary_column_view(haystack); // first, find the needle in the dictionary's key set auto const index = cudf::dictionary::detail::get_index( - dict_col, needle, stream, rmm::mr::get_current_device_resource()); + dict_col, needle, stream, cudf::get_current_device_resource_ref()); // if found, check the index is actually in the indices column return index->is_valid(stream) && cudf::type_dispatcher(dict_col.indices().type(), contains_scalar_dispatch{}, diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu index 81227cb9a2d..2f6d23b7f7d 100644 --- a/cpp/src/search/contains_table.cu +++ b/cpp/src/search/contains_table.cu @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -119,7 +119,7 @@ std::pair build_row_bitmask(table_view if (nullable_columns.size() > 1) { auto row_bitmask = cudf::detail::bitmask_and( - table_view{nullable_columns}, stream, rmm::mr::get_current_device_resource()) + table_view{nullable_columns}, stream, cudf::get_current_device_resource_ref()) .first; auto const row_bitmask_ptr = static_cast(row_bitmask.data()); return std::pair(std::move(row_bitmask), row_bitmask_ptr); @@ -229,14 +229,15 @@ rmm::device_uvector contains(table_view const& haystack, [&](auto const& d_self_equal, auto const& d_two_table_equal, auto const& probing_scheme) { auto const d_equal = comparator_adapter{d_self_equal, d_two_table_equal}; - auto set = cuco::static_set{cuco::extent{compute_hash_table_size(haystack.num_rows())}, - cuco::empty_key{rhs_index_type{-1}}, - d_equal, - probing_scheme, - {}, - {}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto set = cuco::static_set{ + cuco::extent{compute_hash_table_size(haystack.num_rows())}, + cuco::empty_key{rhs_index_type{-1}}, + d_equal, + probing_scheme, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { auto const bitmask_buffer_and_ptr = build_row_bitmask(haystack, stream); diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu index 80651a4ec44..ac93e24b254 100644 --- a/cpp/src/search/search_ordered.cu +++ b/cpp/src/search/search_ordered.cu @@ -23,10 +23,10 @@ #include #include #include +#include #include #include -#include #include @@ -64,7 +64,7 @@ std::unique_ptr search_ordered(table_view const& haystack, // This utility will ensure all corresponding dictionary columns have matching keys. // It will return any new dictionary columns created as well as updated table_views. auto const matched = dictionary::detail::match_dictionaries( - {haystack, needles}, stream, rmm::mr::get_current_device_resource()); + {haystack, needles}, stream, cudf::get_current_device_resource_ref()); auto const& matched_haystack = matched.second.front(); auto const& matched_needles = matched.second.back(); diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu index c5dcc7c240d..cbde87198bd 100644 --- a/cpp/src/sort/rank.cu +++ b/cpp/src/sort/rank.cu @@ -27,10 +27,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu index 408ac29b8a9..5dc5c39f2bc 100644 --- a/cpp/src/sort/segmented_sort.cu +++ b/cpp/src/sort/segmented_sort.cu @@ -20,10 +20,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/sort/segmented_sort_impl.cuh b/cpp/src/sort/segmented_sort_impl.cuh index 6d472925b30..a397d4c6630 100644 --- a/cpp/src/sort/segmented_sort_impl.cuh +++ b/cpp/src/sort/segmented_sort_impl.cuh @@ -23,11 +23,10 @@ #include #include #include +#include #include #include -#include -#include #include @@ -77,8 +76,10 @@ struct column_fast_sort_fn { input.size(), mask_allocation_policy::NEVER, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); mutable_column_view output_view = temp_col->mutable_view(); + auto temp_indices = cudf::column( + cudf::column_view(indices.type(), indices.size(), indices.head(), nullptr, 0), stream); // DeviceSegmentedSort is faster than DeviceSegmentedRadixSort at this time auto fast_sort_impl = [stream](bool ascending, [[maybe_unused]] auto&&... args) { @@ -118,7 +119,7 @@ struct column_fast_sort_fn { fast_sort_impl(ascending, input.begin(), output_view.begin(), - indices.begin(), + temp_indices.view().begin(), indices.begin(), input.size(), segment_offsets.size() - 1, @@ -309,12 +310,13 @@ std::unique_ptr
segmented_sort_by_key_common(table_view const& values, { CUDF_EXPECTS(values.num_rows() == keys.num_rows(), "Mismatch in number of rows for values and keys"); - auto sorted_order = segmented_sorted_order_common(keys, - segment_offsets, - column_order, - null_precedence, - stream, - rmm::mr::get_current_device_resource()); + auto sorted_order = + segmented_sorted_order_common(keys, + segment_offsets, + column_order, + null_precedence, + stream, + cudf::get_current_device_resource_ref()); // Gather segmented sort of child value columns return detail::gather(values, sorted_order->view(), diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu index 7216bc99e08..ac6fef17952 100644 --- a/cpp/src/sort/sort.cu +++ b/cpp/src/sort/sort.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include @@ -53,7 +53,7 @@ std::unique_ptr
sort_by_key(table_view const& values, "Mismatch in number of rows for values and keys"); auto sorted_order = detail::sorted_order( - keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource()); + keys, column_order, null_precedence, stream, cudf::get_current_device_resource_ref()); return detail::gather(values, sorted_order->view(), diff --git a/cpp/src/sort/sort_column.cu b/cpp/src/sort/sort_column.cu index 99a45bf91a3..212f4728c05 100644 --- a/cpp/src/sort/sort_column.cu +++ b/cpp/src/sort/sort_column.cu @@ -19,10 +19,9 @@ #include #include +#include #include -#include - #include namespace cudf { diff --git a/cpp/src/sort/sort_column_impl.cuh b/cpp/src/sort/sort_column_impl.cuh index 564791e0b49..906cfb23894 100644 --- a/cpp/src/sort/sort_column_impl.cuh +++ b/cpp/src/sort/sort_column_impl.cuh @@ -21,11 +21,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh index 20e977e9fd5..d5efebf26e2 100644 --- a/cpp/src/sort/sort_impl.cuh +++ b/cpp/src/sort/sort_impl.cuh @@ -20,8 +20,7 @@ #include "sort_column_impl.cuh" #include - -#include +#include namespace cudf { namespace detail { diff --git a/cpp/src/sort/stable_segmented_sort.cu b/cpp/src/sort/stable_segmented_sort.cu index 61e37205c98..e814386db66 100644 --- a/cpp/src/sort/stable_segmented_sort.cu +++ b/cpp/src/sort/stable_segmented_sort.cu @@ -20,8 +20,7 @@ #include #include #include - -#include +#include namespace cudf { namespace detail { diff --git a/cpp/src/sort/stable_sort.cu b/cpp/src/sort/stable_sort.cu index ce05a755756..6ce4dfbead8 100644 --- a/cpp/src/sort/stable_sort.cu +++ b/cpp/src/sort/stable_sort.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace detail { @@ -69,7 +69,7 @@ std::unique_ptr
stable_sort_by_key(table_view const& values, "Mismatch in number of rows for values and keys"); auto sorted_order = detail::stable_sorted_order( - keys, column_order, null_precedence, stream, rmm::mr::get_current_device_resource()); + keys, column_order, null_precedence, stream, cudf::get_current_device_resource_ref()); return detail::gather(values, sorted_order->view(), diff --git a/cpp/src/sort/stable_sort_column.cu b/cpp/src/sort/stable_sort_column.cu index bdb631a8154..e1aca9d9fe3 100644 --- a/cpp/src/sort/stable_sort_column.cu +++ b/cpp/src/sort/stable_sort_column.cu @@ -19,10 +19,9 @@ #include #include +#include #include -#include - #include namespace cudf { diff --git a/cpp/src/stream_compaction/apply_boolean_mask.cu b/cpp/src/stream_compaction/apply_boolean_mask.cu index cdca9517d94..2c60687b92c 100644 --- a/cpp/src/stream_compaction/apply_boolean_mask.cu +++ b/cpp/src/stream_compaction/apply_boolean_mask.cu @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include @@ -91,9 +91,10 @@ std::unique_ptr
apply_boolean_mask(table_view const& input, */ std::unique_ptr
apply_boolean_mask(table_view const& input, column_view const& boolean_mask, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr); + return detail::apply_boolean_mask(input, boolean_mask, stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu index e5cf29f3ebf..7d11b02d3e1 100644 --- a/cpp/src/stream_compaction/distinct.cu +++ b/cpp/src/stream_compaction/distinct.cu @@ -26,11 +26,10 @@ #include #include #include +#include #include #include -#include -#include #include #include @@ -51,7 +50,7 @@ namespace { * @param func The input functor to invoke */ template -rmm::device_uvector dipatch_row_equal( +rmm::device_uvector dispatch_row_equal( null_equality compare_nulls, nan_equality compare_nans, bool has_nulls, @@ -97,22 +96,23 @@ rmm::device_uvector distinct_indices(table_view const& input, auto const helper_func = [&](auto const& d_equal) { using RowHasher = std::decay_t; - auto set = hash_set_type{num_rows, - 0.5, // desired load factor - cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, - d_equal, - {row_hash.device_hasher(has_nulls)}, - {}, - {}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto set = hash_set_type{ + num_rows, + 0.5, // desired load factor + cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL}, + d_equal, + {row_hash.device_hasher(has_nulls)}, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; return detail::reduce_by_row(set, num_rows, keep, stream, mr); }; if (cudf::detail::has_nested_columns(input)) { - return dipatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); + return dispatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } else { - return dipatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); + return dispatch_row_equal(nulls_equal, nans_equal, has_nulls, row_equal, helper_func); } } @@ -133,7 +133,7 @@ std::unique_ptr
distinct(table_view const& input, nulls_equal, nans_equal, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return detail::gather(input, gather_map, out_of_bounds_policy::DONT_CHECK, @@ -149,11 +149,11 @@ std::unique_ptr
distinct(table_view const& input, duplicate_keep_option keep, null_equality nulls_equal, nan_equality nans_equal, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::distinct( - input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr); + return detail::distinct(input, keys, keep, nulls_equal, nans_equal, stream, mr); } std::unique_ptr distinct_indices(table_view const& input, diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu index 9843bb889f4..46a7f088298 100644 --- a/cpp/src/stream_compaction/distinct_count.cu +++ b/cpp/src/stream_compaction/distinct_count.cu @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -141,14 +142,15 @@ cudf::size_type distinct_count(table_view const& keys, auto const comparator_helper = [&](auto const row_equal) { using hasher_type = decltype(hash_key); - auto key_set = cuco::static_set{cuco::extent{compute_hash_table_size(num_rows)}, - cuco::empty_key{-1}, - row_equal, - cuco::linear_probing<1, hasher_type>{hash_key}, - {}, - {}, - cudf::detail::cuco_allocator{stream}, - stream.value()}; + auto key_set = cuco::static_set{ + cuco::extent{compute_hash_table_size(num_rows)}, + cuco::empty_key{-1}, + row_equal, + cuco::linear_probing<1, hasher_type>{hash_key}, + {}, + {}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()}; auto const iter = thrust::counting_iterator(0); // when nulls are equal, we skip hashing any row that has a null @@ -158,7 +160,7 @@ cudf::size_type distinct_count(table_view const& keys, // We must consider a row if any of its column entries is valid, // hence OR together the validities of the columns. auto const [row_bitmask, null_count] = - cudf::detail::bitmask_or(keys, stream, rmm::mr::get_current_device_resource()); + cudf::detail::bitmask_or(keys, stream, cudf::get_current_device_resource_ref()); // Unless all columns have a null mask, row_bitmask will be // null, and null_count will be zero. Equally, unless there is @@ -217,15 +219,18 @@ cudf::size_type distinct_count(column_view const& input, cudf::size_type distinct_count(column_view const& input, null_policy null_handling, - nan_policy nan_handling) + nan_policy nan_handling, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - return detail::distinct_count(input, null_handling, nan_handling, cudf::get_default_stream()); + return detail::distinct_count(input, null_handling, nan_handling, stream); } -cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal) +cudf::size_type distinct_count(table_view const& input, + null_equality nulls_equal, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - return detail::distinct_count(input, nulls_equal, cudf::get_default_stream()); + return detail::distinct_count(input, nulls_equal, stream); } } // namespace cudf diff --git a/cpp/src/stream_compaction/distinct_helpers.hpp b/cpp/src/stream_compaction/distinct_helpers.hpp index fca67c98873..f15807c2434 100644 --- a/cpp/src/stream_compaction/distinct_helpers.hpp +++ b/cpp/src/stream_compaction/distinct_helpers.hpp @@ -18,10 +18,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -57,7 +57,7 @@ using hash_set_type = cudf::experimental::row::hash::device_row_hasher< cudf::hashing::detail::default_hash, cudf::nullate::DYNAMIC>>, - cudf::detail::cuco_allocator, + cudf::detail::cuco_allocator, cuco::storage<1>>; /** diff --git a/cpp/src/stream_compaction/drop_nans.cu b/cpp/src/stream_compaction/drop_nans.cu index b46381c8ff6..8a53a2e8360 100644 --- a/cpp/src/stream_compaction/drop_nans.cu +++ b/cpp/src/stream_compaction/drop_nans.cu @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -117,20 +117,22 @@ std::unique_ptr
drop_nans(table_view const& input, std::unique_ptr
drop_nans(table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nans(input, keys, keep_threshold, cudf::get_default_stream(), mr); + return detail::drop_nans(input, keys, keep_threshold, stream, mr); } /* * Filters a table to remove nan elements. */ std::unique_ptr
drop_nans(table_view const& input, std::vector const& keys, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nans(input, keys, keys.size(), cudf::get_default_stream(), mr); + return detail::drop_nans(input, keys, keys.size(), stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/drop_nulls.cu b/cpp/src/stream_compaction/drop_nulls.cu index cb7cd61bf02..22da762a0dd 100644 --- a/cpp/src/stream_compaction/drop_nulls.cu +++ b/cpp/src/stream_compaction/drop_nulls.cu @@ -22,9 +22,9 @@ #include #include #include +#include #include -#include #include #include @@ -90,20 +90,22 @@ std::unique_ptr
drop_nulls(table_view const& input, std::unique_ptr
drop_nulls(table_view const& input, std::vector const& keys, cudf::size_type keep_threshold, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nulls(input, keys, keep_threshold, cudf::get_default_stream(), mr); + return detail::drop_nulls(input, keys, keep_threshold, stream, mr); } /* * Filters a table to remove null elements. */ std::unique_ptr
drop_nulls(table_view const& input, std::vector const& keys, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::drop_nulls(input, keys, keys.size(), cudf::get_default_stream(), mr); + return detail::drop_nulls(input, keys, keys.size(), stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/stable_distinct.cu b/cpp/src/stream_compaction/stable_distinct.cu index 074d4fd7d1a..2097b7bd3d2 100644 --- a/cpp/src/stream_compaction/stable_distinct.cu +++ b/cpp/src/stream_compaction/stable_distinct.cu @@ -19,10 +19,9 @@ #include #include #include +#include #include -#include - #include #include #include @@ -47,7 +46,7 @@ std::unique_ptr
stable_distinct(table_view const& input, nulls_equal, nans_equal, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // The only difference between this implementation and the unstable version // is that the stable implementation must retain the input order. The diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu index edb47984d13..eaabc6f1272 100644 --- a/cpp/src/stream_compaction/unique.cu +++ b/cpp/src/stream_compaction/unique.cu @@ -31,11 +31,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -119,10 +119,11 @@ std::unique_ptr
unique(table_view const& input, std::vector const& keys, duplicate_keep_option const keep, null_equality nulls_equal, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::unique(input, keys, keep, nulls_equal, cudf::get_default_stream(), mr); + return detail::unique(input, keys, keep, nulls_equal, stream, mr); } } // namespace cudf diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu index 19607fe8105..d842f63cd7b 100644 --- a/cpp/src/stream_compaction/unique_count.cu +++ b/cpp/src/stream_compaction/unique_count.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,10 +67,12 @@ cudf::size_type unique_count(table_view const& keys, } // namespace detail -cudf::size_type unique_count(table_view const& input, null_equality nulls_equal) +cudf::size_type unique_count(table_view const& input, + null_equality nulls_equal, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - return detail::unique_count(input, nulls_equal, cudf::get_default_stream()); + return detail::unique_count(input, nulls_equal, stream); } } // namespace cudf diff --git a/cpp/src/stream_compaction/unique_count_column.cu b/cpp/src/stream_compaction/unique_count_column.cu index 16758b6e3a7..89ce2391a7b 100644 --- a/cpp/src/stream_compaction/unique_count_column.cu +++ b/cpp/src/stream_compaction/unique_count_column.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,10 +101,11 @@ cudf::size_type unique_count(column_view const& input, cudf::size_type unique_count(column_view const& input, null_policy null_handling, - nan_policy nan_handling) + nan_policy nan_handling, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); - return detail::unique_count(input, null_handling, nan_handling, cudf::get_default_stream()); + return detail::unique_count(input, null_handling, nan_handling, stream); } } // namespace cudf diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu index 778f546990d..c56d25fde2b 100644 --- a/cpp/src/strings/attributes.cu +++ b/cpp/src/strings/attributes.cu @@ -26,11 +26,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu index 3f7a98381b8..45e80cc780d 100644 --- a/cpp/src/strings/capitalize.cu +++ b/cpp/src/strings/capitalize.cu @@ -25,9 +25,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu index 27befdea209..4c015f3cbed 100644 --- a/cpp/src/strings/case.cu +++ b/cpp/src/strings/case.cu @@ -29,10 +29,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index 58137aced0f..c3b4938da1a 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/strings/combine/concatenate.cu b/cpp/src/strings/combine/concatenate.cu index a2c77c5e77f..617ff41a043 100644 --- a/cpp/src/strings/combine/concatenate.cu +++ b/cpp/src/strings/combine/concatenate.cu @@ -29,11 +29,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu index b534e9b2e5b..07e659e380e 100644 --- a/cpp/src/strings/combine/join.cu +++ b/cpp/src/strings/combine/join.cu @@ -29,11 +29,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu index f5dfc1a2012..663dc9dda73 100644 --- a/cpp/src/strings/combine/join_list_elements.cu +++ b/cpp/src/strings/combine/join_list_elements.cu @@ -27,10 +27,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu index 718ac41e36c..67531fea579 100644 --- a/cpp/src/strings/contains.cu +++ b/cpp/src/strings/contains.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace strings { @@ -112,7 +112,7 @@ std::unique_ptr count_re(strings_column_view const& input, auto const d_strings = column_device_view::create(input.parent(), stream); - auto result = count_matches(*d_strings, *d_prog, input.size(), stream, mr); + auto result = count_matches(*d_strings, *d_prog, stream, mr); if (input.has_nulls()) { result->set_null_mask(cudf::detail::copy_bitmask(input.parent(), stream, mr), input.null_count()); diff --git a/cpp/src/strings/convert/convert_booleans.cu b/cpp/src/strings/convert/convert_booleans.cu index d4ccb685061..3ba17fdb872 100644 --- a/cpp/src/strings/convert/convert_booleans.cu +++ b/cpp/src/strings/convert/convert_booleans.cu @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index 64a2107e17a..4c9eba5b526 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -28,19 +28,19 @@ #include #include #include +#include #include #include #include #include #include -#include +#include #include #include #include #include -#include #include #include @@ -161,7 +161,7 @@ struct format_compiler { // copy format_items to device memory d_items = cudf::detail::make_device_uvector_async( - items, stream, rmm::mr::get_current_device_resource()); + items, stream, cudf::get_current_device_resource_ref()); } device_span format_items() { return device_span(d_items); } @@ -519,7 +519,7 @@ struct check_datetime_format { * The checking here is a little more strict than the actual * parser used for conversion. */ - __device__ thrust::optional check_string(string_view const& d_string) + __device__ cuda::std::optional check_string(string_view const& d_string) { timestamp_components dateparts = {1970, 1, 1, 0}; // init to epoch time @@ -529,7 +529,7 @@ struct check_datetime_format { // eliminate static character values first if (item.item_type == format_char_type::literal) { // check static character matches - if (*ptr != item.value) return thrust::nullopt; + if (*ptr != item.value) return cuda::std::nullopt; ptr += item.length; length -= item.length; continue; @@ -645,7 +645,7 @@ struct check_datetime_format { case 'Z': result = true; // skip default: break; } - if (!result) return thrust::nullopt; + if (!result) return cuda::std::nullopt; ptr += bytes_read; length -= bytes_read; } @@ -821,7 +821,7 @@ struct datetime_formatter_fn { // We only dissect the timestamp into components if needed // by a specifier. And then we only do it once and reuse it. // This can improve performance when not using uncommon specifiers. - thrust::optional days; + cuda::std::optional days; auto days_from_timestamp = [tstamp]() { auto const count = tstamp.time_since_epoch().count(); diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 514ab965fc5..0db1adf1223 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 73089ad407e..9848c1f605e 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -27,11 +27,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index bd7b411d3c3..d3d90104252 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -25,12 +25,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu index a34b148a951..fce83e87645 100644 --- a/cpp/src/strings/convert/convert_hex.cu +++ b/cpp/src/strings/convert/convert_hex.cu @@ -24,12 +24,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index aeabc71d300..b4eead05ce5 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -27,11 +27,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu index 68a24e000ae..c0c890341ae 100644 --- a/cpp/src/strings/convert/convert_ipv4.cu +++ b/cpp/src/strings/convert/convert_ipv4.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include #include #include @@ -46,7 +46,7 @@ namespace { struct ipv4_to_integers_fn { column_device_view const d_strings; - __device__ int64_t operator()(size_type idx) + __device__ uint32_t operator()(size_type idx) { if (d_strings.is_null(idx)) return 0; string_view d_str = d_strings.element(idx); @@ -66,7 +66,7 @@ struct ipv4_to_integers_fn { } } uint32_t result = (ipvals[0] << 24) + (ipvals[1] << 16) + (ipvals[2] << 8) + ipvals[3]; - return static_cast(result); + return result; } }; @@ -79,18 +79,18 @@ std::unique_ptr ipv4_to_integers(strings_column_view const& input, { size_type strings_count = input.size(); if (strings_count == 0) { - return make_numeric_column(data_type{type_id::INT64}, 0, mask_state::UNALLOCATED, stream); + return make_numeric_column(data_type{type_id::UINT32}, 0, mask_state::UNALLOCATED, stream); } auto strings_column = column_device_view::create(input.parent(), stream); // create output column copying the strings' null-mask - auto results = make_numeric_column(data_type{type_id::INT64}, + auto results = make_numeric_column(data_type{type_id::UINT32}, strings_count, cudf::detail::copy_bitmask(input.parent(), stream, mr), input.null_count(), stream, mr); - auto d_results = results->mutable_view().data(); + auto d_results = results->mutable_view().data(); // fill output column with ipv4 integers thrust::transform(rmm::exec_policy(stream), thrust::make_counting_iterator(0), @@ -135,7 +135,7 @@ struct integers_to_ipv4_fn { return; } - auto const ip_number = d_column.element(idx); + auto const ip_number = d_column.element(idx); char* out_ptr = d_chars ? d_chars + d_offsets[idx] : nullptr; int shift_bits = 24; @@ -165,7 +165,7 @@ std::unique_ptr integers_to_ipv4(column_view const& integers, { if (integers.is_empty()) return make_empty_column(type_id::STRING); - CUDF_EXPECTS(integers.type().id() == type_id::INT64, "Input column must be type_id::INT64 type"); + CUDF_EXPECTS(integers.type().id() == type_id::UINT32, "Input column must be UINT32 type"); auto d_column = column_device_view::create(integers, stream); auto [offsets_column, chars] = diff --git a/cpp/src/strings/convert/convert_lists.cu b/cpp/src/strings/convert/convert_lists.cu index 604f928430b..f574f091ab5 100644 --- a/cpp/src/strings/convert/convert_lists.cu +++ b/cpp/src/strings/convert/convert_lists.cu @@ -21,9 +21,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace strings { diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index 39907a38f2f..520f5897415 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -28,10 +28,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu index 352e0f9f41a..1d9d12686eb 100644 --- a/cpp/src/strings/copying/concatenate.cu +++ b/cpp/src/strings/copying/concatenate.cu @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -87,7 +87,7 @@ auto create_strings_device_views(host_span views, rmm::cuda_s }); thrust::inclusive_scan(thrust::host, offset_it, input_offsets.end(), offset_it); auto d_input_offsets = cudf::detail::make_device_uvector_async( - input_offsets, stream, rmm::mr::get_current_device_resource()); + input_offsets, stream, cudf::get_current_device_resource_ref()); auto const output_size = input_offsets.back(); // Compute the partition offsets and size of chars column diff --git a/cpp/src/strings/copying/copy_range.cu b/cpp/src/strings/copying/copy_range.cu index 9f8c47602f8..90865a4b73e 100644 --- a/cpp/src/strings/copying/copy_range.cu +++ b/cpp/src/strings/copying/copy_range.cu @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -40,20 +40,14 @@ struct compute_element_size { size_type source_begin; size_type target_begin; size_type target_end; - bool source_has_nulls; - bool target_has_nulls; __device__ cudf::size_type operator()(cudf::size_type idx) { if (idx >= target_begin && idx < target_end) { auto const str_idx = source_begin + (idx - target_begin); - return source_has_nulls && d_source.is_null_nocheck(str_idx) - ? 0 - : d_source.element(str_idx).size_bytes(); + return d_source.is_null(str_idx) ? 0 : d_source.element(str_idx).size_bytes(); } else { - return target_has_nulls && d_target.is_null_nocheck(idx) - ? 0 - : d_target.element(idx).size_bytes(); + return d_target.is_null(idx) ? 0 : d_target.element(idx).size_bytes(); } } }; @@ -97,20 +91,9 @@ std::unique_ptr copy_range(strings_column_view const& source, mr); }(); - auto [check_source, check_target] = [target, null_count = null_count] { - // check validities for both source & target - if (target.has_nulls()) { return std::make_pair(true, true); } - // check validities for source only - if (null_count > 0) { return std::make_pair(true, false); } - // no need to check validities - return std::make_pair(false, false); - }(); - // create offsets auto sizes_begin = cudf::detail::make_counting_transform_iterator( - 0, - compute_element_size{ - d_source, d_target, source_begin, target_begin, target_end, check_source, check_target}); + 0, compute_element_size{d_source, d_target, source_begin, target_begin, target_end}); auto [offsets_column, chars_bytes] = cudf::strings::detail::make_offsets_child_column( sizes_begin, sizes_begin + target.size(), stream, mr); auto d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets_column->view()); diff --git a/cpp/src/strings/copying/copying.cu b/cpp/src/strings/copying/copying.cu index e8b411d50a6..f923f99c131 100644 --- a/cpp/src/strings/copying/copying.cu +++ b/cpp/src/strings/copying/copying.cu @@ -21,11 +21,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/copying/shift.cu b/cpp/src/strings/copying/shift.cu index b386c0860d1..e36d5f9f14e 100644 --- a/cpp/src/strings/copying/shift.cu +++ b/cpp/src/strings/copying/shift.cu @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/count_matches.cu b/cpp/src/strings/count_matches.cu index e8672ea5335..ae4e623a9e8 100644 --- a/cpp/src/strings/count_matches.cu +++ b/cpp/src/strings/count_matches.cu @@ -20,8 +20,7 @@ #include #include #include - -#include +#include namespace cudf { namespace strings { @@ -60,18 +59,15 @@ struct count_fn { std::unique_ptr count_matches(column_device_view const& d_strings, reprog_device& d_prog, - size_type output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - assert(output_size >= d_strings.size() and "Unexpected output size"); - auto results = make_numeric_column( - data_type{type_to_id()}, output_size, mask_state::UNALLOCATED, stream, mr); + data_type{type_to_id()}, d_strings.size(), mask_state::UNALLOCATED, stream, mr); - if (d_strings.size() == 0) return results; + if (d_strings.size() == 0) { return results; } - auto d_results = results->mutable_view().data(); + auto d_results = results->mutable_view().data(); launch_transform_kernel(count_fn{d_strings}, d_prog, d_results, d_strings.size(), stream); diff --git a/cpp/src/strings/count_matches.hpp b/cpp/src/strings/count_matches.hpp index 4a5efac37fd..f46168a3389 100644 --- a/cpp/src/strings/count_matches.hpp +++ b/cpp/src/strings/count_matches.hpp @@ -17,9 +17,9 @@ #pragma once #include +#include #include -#include namespace cudf { @@ -37,14 +37,12 @@ class reprog_device; * * @param d_strings Device view of the input strings column. * @param d_prog Regex instance to evaluate on each string. - * @param output_size Number of rows for the output column. * @param stream CUDA stream used for device memory operations and kernel launches. * @param mr Device memory resource used to allocate the returned column's device memory. * @return Integer column of match counts */ std::unique_ptr count_matches(column_device_view const& d_strings, reprog_device& d_prog, - size_type output_size, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu index b18b50d1b43..7323918dcff 100644 --- a/cpp/src/strings/extract/extract.cu +++ b/cpp/src/strings/extract/extract.cu @@ -26,10 +26,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu index 27691068d5a..a9fbb375e37 100644 --- a/cpp/src/strings/extract/extract_all.cu +++ b/cpp/src/strings/extract/extract_all.cu @@ -27,10 +27,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -119,7 +119,7 @@ std::unique_ptr extract_all_record(strings_column_view const& input, // Get the match counts for each string. // This column will become the output lists child offsets column. - auto counts = count_matches(*d_strings, *d_prog, strings_count, stream, mr); + auto counts = count_matches(*d_strings, *d_prog, stream, mr); auto d_counts = counts->mutable_view().data(); // Compute null output rows diff --git a/cpp/src/strings/filling/fill.cu b/cpp/src/strings/filling/fill.cu index 878d0fe11ba..6a2da3542c7 100644 --- a/cpp/src/strings/filling/fill.cu +++ b/cpp/src/strings/filling/fill.cu @@ -20,9 +20,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu index 48620af8cad..3e8b5e2af57 100644 --- a/cpp/src/strings/filter_chars.cu +++ b/cpp/src/strings/filter_chars.cu @@ -28,10 +28,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -134,8 +134,8 @@ std::unique_ptr filter_characters( characters_to_filter.begin(), characters_to_filter.end(), htable.begin(), [](auto entry) { return char_range{entry.first, entry.second}; }); - rmm::device_uvector table = - cudf::detail::make_device_uvector_async(htable, stream, rmm::mr::get_current_device_resource()); + rmm::device_uvector table = cudf::detail::make_device_uvector_async( + htable, stream, cudf::get_current_device_resource_ref()); auto d_strings = column_device_view::create(strings.parent(), stream); diff --git a/cpp/src/strings/like.cu b/cpp/src/strings/like.cu index 4df1b9b4ffe..f8db66f998b 100644 --- a/cpp/src/strings/like.cu +++ b/cpp/src/strings/like.cu @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu index 0d146108436..fb2ce9a251a 100644 --- a/cpp/src/strings/padding.cu +++ b/cpp/src/strings/padding.cu @@ -24,9 +24,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace strings { diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index adf650a4f27..7c4c89bd3fb 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -539,15 +539,26 @@ class regex_parser { : static_cast(LBRA); case ')': return RBRA; case '^': { - _chr = is_multiline(_flags) ? chr : '\n'; + if (is_ext_newline(_flags)) { + _chr = is_multiline(_flags) ? 'S' : 'N'; + } else { + _chr = is_multiline(_flags) ? chr : '\n'; + } return BOL; } case '$': { - _chr = is_multiline(_flags) ? chr : '\n'; + if (is_ext_newline(_flags)) { + _chr = is_multiline(_flags) ? 'S' : 'N'; + } else { + _chr = is_multiline(_flags) ? chr : '\n'; + } return EOL; } case '[': return build_cclass(); - case '.': return dot_type; + case '.': { + _chr = is_ext_newline(_flags) ? 'N' : chr; + return dot_type; + } } if (std::find(quantifiers.begin(), quantifiers.end(), static_cast(chr)) == @@ -959,7 +970,7 @@ class regex_compiler { _prog.inst_at(inst_id).u1.cls_id = class_id; } else if (token == CHAR) { _prog.inst_at(inst_id).u1.c = yy; - } else if (token == BOL || token == EOL) { + } else if (token == BOL || token == EOL || token == ANY) { _prog.inst_at(inst_id).u1.c = yy; } push_and(inst_id, inst_id); @@ -1194,7 +1205,7 @@ void reprog::print(regex_flags const flags) case STAR: printf(" STAR next=%d", inst.u2.next_id); break; case PLUS: printf(" PLUS next=%d", inst.u2.next_id); break; case QUEST: printf(" QUEST next=%d", inst.u2.next_id); break; - case ANY: printf(" ANY next=%d", inst.u2.next_id); break; + case ANY: printf(" ANY '%c', next=%d", inst.u1.c, inst.u2.next_id); break; case ANYNL: printf(" ANYNL next=%d", inst.u2.next_id); break; case NOP: printf(" NOP next=%d", inst.u2.next_id); break; case BOL: { diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh index e6134296e45..2df404048f7 100644 --- a/cpp/src/strings/regex/regex.cuh +++ b/cpp/src/strings/regex/regex.cuh @@ -23,8 +23,8 @@ #include +#include #include -#include #include #include @@ -36,7 +36,7 @@ namespace detail { struct relist; using match_pair = thrust::pair; -using match_result = thrust::optional; +using match_result = cuda::std::optional; constexpr int32_t MAX_SHARED_MEM = 2048; ///< Memory size for storing prog instruction data constexpr std::size_t MAX_WORKING_MEM = 0x01'FFFF'FFFF; ///< Memory size for state data diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index 23e1944cda4..e34a1e12015 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -126,6 +126,16 @@ __device__ __forceinline__ void reprog_device::reljunk::swaplist() list2 = tmp; } +/** + * @brief Check for supported new-line characters + * + * '\n, \r, \u0085, \u2028, or \u2029' + */ +constexpr bool is_newline(char32_t const ch) +{ + return (ch == '\n' || ch == '\r' || ch == 0x00c285 || ch == 0x00e280a8 || ch == 0x00e280a9); +} + /** * @brief Utility to check a specific character against this class instance. * @@ -258,14 +268,17 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const if (checkstart) { auto startchar = static_cast(jnk.startchar); switch (jnk.starttype) { - case BOL: - if (pos == 0) break; - if (jnk.startchar != '^') { return thrust::nullopt; } + case BOL: { + if (pos == 0) { break; } + if (startchar != '^' && startchar != 'S') { return cuda::std::nullopt; } + if (startchar != '\n') { break; } --itr; startchar = static_cast('\n'); + [[fallthrough]]; + } case CHAR: { auto const find_itr = find_char(startchar, dstr, itr); - if (find_itr.byte_offset() >= dstr.size_bytes()) { return thrust::nullopt; } + if (find_itr.byte_offset() >= dstr.size_bytes()) { return cuda::std::nullopt; } itr = find_itr + (jnk.starttype == BOL); pos = itr.position(); break; @@ -312,26 +325,34 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const id_activate = inst.u2.next_id; expanded = true; break; - case BOL: - if ((pos == 0) || ((inst.u1.c == '^') && (dstr[pos - 1] == '\n'))) { + case BOL: { + auto titr = itr; + auto const prev_c = pos > 0 ? *(--titr) : 0; + if ((pos == 0) || ((inst.u1.c == '^') && (prev_c == '\n')) || + ((inst.u1.c == 'S') && (is_newline(prev_c)))) { id_activate = inst.u2.next_id; expanded = true; } break; - case EOL: + } + case EOL: { // after the last character OR: // - for MULTILINE, if current character is new-line // - for non-MULTILINE, the very last character of the string can also be a new-line + bool const nl = (inst.u1.c == 'S' || inst.u1.c == 'N') ? is_newline(c) : (c == '\n'); if (last_character || - ((c == '\n') && (inst.u1.c != 'Z') && - ((inst.u1.c == '$') || (itr.byte_offset() + 1 == dstr.size_bytes())))) { + (nl && (inst.u1.c != 'Z') && + ((inst.u1.c == '$' || inst.u1.c == 'S') || + (itr.byte_offset() + bytes_in_char_utf8(c) == dstr.size_bytes())))) { id_activate = inst.u2.next_id; expanded = true; } break; + } case BOW: case NBOW: { - auto const prev_c = pos > 0 ? dstr[pos - 1] : 0; + auto titr = itr; + auto const prev_c = pos > 0 ? *(--titr) : 0; auto const word_class = reclass_device{CCLASS_W}; bool const curr_is_word = word_class.is_match(c, _codepoint_flags); bool const prev_is_word = word_class.is_match(prev_c, _codepoint_flags); @@ -366,9 +387,10 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const case CHAR: if (inst.u1.c == c) id_activate = inst.u2.next_id; break; - case ANY: - if (c != '\n') id_activate = inst.u2.next_id; - break; + case ANY: { + if ((c == '\n') || ((inst.u1.c == 'N') && is_newline(c))) { break; } + [[fallthrough]]; + } case ANYNL: id_activate = inst.u2.next_id; break; case NCCLASS: case CCLASS: { @@ -396,7 +418,7 @@ __device__ __forceinline__ match_result reprog_device::regexec(string_view const checkstart = jnk.list1->get_size() == 0; } while (!last_character && (!checkstart || !match)); - return match ? match_result({begin, end}) : thrust::nullopt; + return match ? match_result({begin, end}) : cuda::std::nullopt; } __device__ __forceinline__ match_result reprog_device::find(int32_t const thread_idx, diff --git a/cpp/src/strings/regex/utilities.cuh b/cpp/src/strings/regex/utilities.cuh index afbfe9de049..679907788bb 100644 --- a/cpp/src/strings/regex/utilities.cuh +++ b/cpp/src/strings/regex/utilities.cuh @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu index 022f1eb3232..eae4839b3e4 100644 --- a/cpp/src/strings/repeat_strings.cu +++ b/cpp/src/strings/repeat_strings.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu index 86afe4c8b9b..a46b5ebad4f 100644 --- a/cpp/src/strings/replace/backref_re.cu +++ b/cpp/src/strings/replace/backref_re.cu @@ -28,9 +28,9 @@ #include #include #include +#include #include -#include #include @@ -120,7 +120,7 @@ std::unique_ptr replace_with_backrefs(strings_column_view const& input, auto group_count = std::min(99, d_prog->group_counts()); // group count should NOT exceed 99 auto const parse_result = parse_backrefs(replacement, group_count); rmm::device_uvector backrefs = cudf::detail::make_device_uvector_async( - parse_result.second, stream, rmm::mr::get_current_device_resource()); + parse_result.second, stream, cudf::get_current_device_resource_ref()); string_scalar repl_scalar(parse_result.first, true, stream); string_view const d_repl_template = repl_scalar.value(stream); diff --git a/cpp/src/strings/replace/find_replace.cu b/cpp/src/strings/replace/find_replace.cu index 79bf6e3c910..8a8001dd81a 100644 --- a/cpp/src/strings/replace/find_replace.cu +++ b/cpp/src/strings/replace/find_replace.cu @@ -18,10 +18,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu index 2ca22f0e017..352d883bdc5 100644 --- a/cpp/src/strings/replace/multi.cu +++ b/cpp/src/strings/replace/multi.cu @@ -30,10 +30,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -321,9 +321,9 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in get_offset_value(input.offsets(), input.offset(), stream); auto d_targets = - create_string_vector_from_column(targets, stream, rmm::mr::get_current_device_resource()); + create_string_vector_from_column(targets, stream, cudf::get_current_device_resource_ref()); auto d_replacements = - create_string_vector_from_column(repls, stream, rmm::mr::get_current_device_resource()); + create_string_vector_from_column(repls, stream, cudf::get_current_device_resource_ref()); replace_multi_parallel_fn fn{ *d_strings, @@ -361,7 +361,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // create a vector of offsets to each string's set of target positions auto const targets_offsets = create_offsets_from_positions( - input, targets_positions, stream, rmm::mr::get_current_device_resource()); + input, targets_positions, stream, cudf::get_current_device_resource_ref()); auto const d_targets_offsets = cudf::detail::offsetalator_factory::make_input_iterator(targets_offsets->view()); @@ -533,16 +533,5 @@ std::unique_ptr replace_multiple(strings_column_view const& strings, return detail::replace_multiple(strings, targets, repls, stream, mr); } -// deprecated in 24.08 -std::unique_ptr replace(strings_column_view const& strings, - strings_column_view const& targets, - strings_column_view const& repls, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) -{ - CUDF_FUNC_RANGE(); - return detail::replace_multiple(strings, targets, repls, stream, mr); -} - } // namespace strings } // namespace cudf diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu index 31234ea42ec..0777253bb38 100644 --- a/cpp/src/strings/replace/multi_re.cu +++ b/cpp/src/strings/replace/multi_re.cu @@ -29,9 +29,9 @@ #include #include #include +#include #include -#include #include #include @@ -92,7 +92,7 @@ struct replace_multi_regex_fn { } reprog_device prog = progs[ptn_idx]; - auto const result = !prog.is_empty() ? prog.find(idx, d_str, itr) : thrust::nullopt; + auto const result = !prog.is_empty() ? prog.find(idx, d_str, itr) : cuda::std::nullopt; d_ranges[ptn_idx] = result ? found_range{result->first, result->second} : found_range{nchars, nchars}; } @@ -180,7 +180,7 @@ std::unique_ptr replace_re(strings_column_view const& input, return *prog; }); auto d_progs = - cudf::detail::make_device_uvector_async(progs, stream, rmm::mr::get_current_device_resource()); + cudf::detail::make_device_uvector_async(progs, stream, cudf::get_current_device_resource_ref()); auto const d_strings = column_device_view::create(input.parent(), stream); auto const d_repls = column_device_view::create(replacements.parent(), stream); diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu index f7a3a3aea5c..16df0dbabdf 100644 --- a/cpp/src/strings/replace/replace.cu +++ b/cpp/src/strings/replace/replace.cu @@ -29,10 +29,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -312,7 +312,7 @@ std::unique_ptr replace_character_parallel(strings_column_view const& in // create a vector of offsets to each string's set of target positions auto const targets_offsets = create_offsets_from_positions( - input, targets_positions, stream, rmm::mr::get_current_device_resource()); + input, targets_positions, stream, cudf::get_current_device_resource_ref()); auto const d_targets_offsets = cudf::detail::offsetalator_factory::make_input_iterator(targets_offsets->view()); diff --git a/cpp/src/strings/replace/replace_nulls.cu b/cpp/src/strings/replace/replace_nulls.cu index ffd9e6c2553..ff86501f02c 100644 --- a/cpp/src/strings/replace/replace_nulls.cu +++ b/cpp/src/strings/replace/replace_nulls.cu @@ -25,10 +25,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu index fd988855424..19d660e312e 100644 --- a/cpp/src/strings/replace/replace_re.cu +++ b/cpp/src/strings/replace/replace_re.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include namespace cudf { namespace strings { diff --git a/cpp/src/strings/replace/replace_slice.cu b/cpp/src/strings/replace/replace_slice.cu index 04d81218a16..938e3c0270b 100644 --- a/cpp/src/strings/replace/replace_slice.cu +++ b/cpp/src/strings/replace/replace_slice.cu @@ -25,9 +25,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/src/strings/reverse.cu b/cpp/src/strings/reverse.cu index cbd231bc5f3..a207215523d 100644 --- a/cpp/src/strings/reverse.cu +++ b/cpp/src/strings/reverse.cu @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include namespace cudf { namespace strings { diff --git a/cpp/src/strings/scan/scan_inclusive.cu b/cpp/src/strings/scan/scan_inclusive.cu index b3e45f65a21..84cc87bad3e 100644 --- a/cpp/src/strings/scan/scan_inclusive.cu +++ b/cpp/src/strings/scan/scan_inclusive.cu @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu index 45eba39f413..9bd1abb5542 100644 --- a/cpp/src/strings/search/find.cu +++ b/cpp/src/strings/search/find.cu @@ -27,10 +27,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/search/find_multiple.cu b/cpp/src/strings/search/find_multiple.cu index 223a941a88a..ec7015878dd 100644 --- a/cpp/src/strings/search/find_multiple.cu +++ b/cpp/src/strings/search/find_multiple.cu @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu index 0d0962258cf..067a513af96 100644 --- a/cpp/src/strings/search/findall.cu +++ b/cpp/src/strings/search/findall.cu @@ -28,10 +28,10 @@ #include #include #include +#include #include #include -#include #include @@ -104,7 +104,7 @@ std::unique_ptr findall(strings_column_view const& input, auto d_prog = regex_device_builder::create_prog_device(prog, stream); // Create lists offsets column - auto const sizes = count_matches(*d_strings, *d_prog, strings_count, stream, mr); + auto const sizes = count_matches(*d_strings, *d_prog, stream, mr); auto [offsets, total_matches] = cudf::detail::make_offsets_child_column( sizes->view().begin(), sizes->view().end(), stream, mr); auto const d_offsets = offsets->view().data(); diff --git a/cpp/src/strings/slice.cu b/cpp/src/strings/slice.cu index cf82a837c51..4c39fc96397 100644 --- a/cpp/src/strings/slice.cu +++ b/cpp/src/strings/slice.cu @@ -22,16 +22,19 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include -#include +#include +#include #include #include #include @@ -40,6 +43,9 @@ namespace cudf { namespace strings { namespace detail { namespace { + +constexpr size_type AVG_CHAR_BYTES_THRESHOLD = 128; + /** * @brief Function logic for compute_substrings_from_fn API * @@ -51,17 +57,19 @@ struct substring_from_fn { IndexIterator const starts; IndexIterator const stops; - __device__ string_view operator()(size_type idx) const + __device__ string_index_pair operator()(size_type idx) const { - if (d_column.is_null(idx)) { return string_view{nullptr, 0}; } + if (d_column.is_null(idx)) { return string_index_pair{nullptr, 0}; } auto const d_str = d_column.template element(idx); auto const length = d_str.length(); auto const start = std::max(starts[idx], 0); - if (start >= length) { return string_view{}; } + if (start >= length) { return string_index_pair{"", 0}; } - auto const stop = stops[idx]; - auto const end = (((stop < 0) || (stop > length)) ? length : stop); - return start < end ? d_str.substr(start, end - start) : string_view{}; + auto const stop = stops[idx]; + auto const end = (((stop < 0) || (stop > length)) ? length : stop); + auto const sub_str = start < end ? d_str.substr(start, end - start) : string_view{}; + return sub_str.empty() ? string_index_pair{"", 0} + : string_index_pair{sub_str.data(), sub_str.size_bytes()}; } substring_from_fn(column_device_view const& d_column, IndexIterator starts, IndexIterator stops) @@ -70,6 +78,84 @@ struct substring_from_fn { } }; +template +CUDF_KERNEL void substring_from_kernel(column_device_view const d_strings, + IndexIterator starts, + IndexIterator stops, + string_index_pair* d_output) +{ + auto const idx = cudf::detail::grid_1d::global_thread_id(); + auto const str_idx = idx / cudf::detail::warp_size; + if (str_idx >= d_strings.size()) { return; } + + namespace cg = cooperative_groups; + auto const warp = cg::tiled_partition(cg::this_thread_block()); + + if (d_strings.is_null(str_idx)) { + if (warp.thread_rank() == 0) { d_output[str_idx] = string_index_pair{nullptr, 0}; } + return; + } + auto const d_str = d_strings.element(str_idx); + if (d_str.empty()) { + if (warp.thread_rank() == 0) { d_output[str_idx] = string_index_pair{"", 0}; } + return; + } + + auto const start = max(starts[str_idx], 0); + auto stop = [stop = stops[str_idx]] { + return (stop < 0) ? std::numeric_limits::max() : stop; + }(); + auto const end = d_str.data() + d_str.size_bytes(); + + auto start_counts = thrust::make_pair(0, 0); + auto stop_counts = thrust::make_pair(0, 0); + + auto itr = d_str.data() + warp.thread_rank(); + + size_type char_count = 0; + size_type byte_count = 0; + while (byte_count < d_str.size_bytes()) { + if (char_count <= start) { start_counts = {char_count, byte_count}; } + if (char_count <= stop) { + stop_counts = {char_count, byte_count}; + } else { + break; + } + size_type const cc = (itr < end) && is_begin_utf8_char(*itr); + size_type const bc = (itr < end) ? bytes_in_utf8_byte(*itr) : 0; + char_count += cg::reduce(warp, cc, cg::plus()); + byte_count += cg::reduce(warp, bc, cg::plus()); + itr += cudf::detail::warp_size; + } + + __syncwarp(); + + if (warp.thread_rank() == 0) { + if (start >= char_count) { + d_output[str_idx] = string_index_pair{"", 0}; + return; + } + + // we are just below start/stop and must now increment up to them from here + auto first_byte = start_counts.second; + if (start_counts.first < start) { + auto const sub_str = string_view(d_str.data() + first_byte, d_str.size_bytes() - first_byte); + first_byte += std::get<0>(bytes_to_character_position(sub_str, start - start_counts.first)); + } + + stop = min(stop, char_count); + auto last_byte = stop_counts.second; + if (stop_counts.first < stop) { + auto const sub_str = string_view(d_str.data() + last_byte, d_str.size_bytes() - last_byte); + last_byte += std::get<0>(bytes_to_character_position(sub_str, stop - stop_counts.first)); + } + + d_output[str_idx] = (first_byte < last_byte) + ? string_index_pair{d_str.data() + first_byte, last_byte - first_byte} + : string_index_pair{"", 0}; + } +} + /** * @brief Function logic for the substring API. * @@ -149,54 +235,67 @@ struct substring_fn { * * @tparam IndexIterator Iterator type for character position values * - * @param d_column Input strings column to substring + * @param input Input strings column to substring * @param starts Start positions index iterator * @param stops Stop positions index iterator * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory */ template -std::unique_ptr compute_substrings_from_fn(column_device_view const& d_column, +std::unique_ptr compute_substrings_from_fn(strings_column_view const& input, IndexIterator starts, IndexIterator stops, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto results = rmm::device_uvector(d_column.size(), stream); - thrust::transform(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(d_column.size()), - results.begin(), - substring_from_fn{d_column, starts, stops}); - return make_strings_column(results, string_view{nullptr, 0}, stream, mr); + auto results = rmm::device_uvector(input.size(), stream); + + auto const d_column = column_device_view::create(input.parent(), stream); + + if ((input.chars_size(stream) / (input.size() - input.null_count())) < AVG_CHAR_BYTES_THRESHOLD) { + thrust::transform(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + results.begin(), + substring_from_fn{*d_column, starts, stops}); + } else { + constexpr thread_index_type block_size = 512; + auto const threads = + static_cast(input.size()) * cudf::detail::warp_size; + auto const num_blocks = util::div_rounding_up_safe(threads, block_size); + substring_from_kernel + <<>>(*d_column, starts, stops, results.data()); + } + return make_strings_column(results.begin(), results.end(), stream, mr); } } // namespace // -std::unique_ptr slice_strings(strings_column_view const& strings, +std::unique_ptr slice_strings(strings_column_view const& input, numeric_scalar const& start, numeric_scalar const& stop, numeric_scalar const& step, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - if (strings.is_empty()) return make_empty_column(type_id::STRING); + if (input.size() == input.null_count()) { + return std::make_unique(input.parent(), stream, mr); + } auto const step_valid = step.is_valid(stream); - auto const step_value = step_valid ? step.value(stream) : 0; + auto const step_value = step_valid ? step.value(stream) : 1; if (step_valid) { CUDF_EXPECTS(step_value != 0, "Step parameter must not be 0"); } - auto const d_column = column_device_view::create(strings.parent(), stream); - // optimization for (step==1 and start < stop) -- expect this to be most common - if (step_value == 1 and start.is_valid(stream) and stop.is_valid(stream)) { - auto const start_value = start.value(stream); - auto const stop_value = stop.value(stream); + if (step_value == 1) { + auto const start_value = start.is_valid(stream) ? start.value(stream) : 0; + auto const stop_value = + stop.is_valid(stream) ? stop.value(stream) : std::numeric_limits::max(); // note that any negative values here must use the alternate function below if ((start_value >= 0) && (start_value < stop_value)) { // this is about 2x faster on long strings for this common case - return compute_substrings_from_fn(*d_column, + return compute_substrings_from_fn(input, thrust::constant_iterator(start_value), thrust::constant_iterator(stop_value), stream, @@ -204,31 +303,35 @@ std::unique_ptr slice_strings(strings_column_view const& strings, } } + auto const d_column = column_device_view::create(input.parent(), stream); + auto const d_start = get_scalar_device_view(const_cast&>(start)); auto const d_stop = get_scalar_device_view(const_cast&>(stop)); auto const d_step = get_scalar_device_view(const_cast&>(step)); auto [offsets, chars] = make_strings_children( - substring_fn{*d_column, d_start, d_stop, d_step}, strings.size(), stream, mr); + substring_fn{*d_column, d_start, d_stop, d_step}, input.size(), stream, mr); - return make_strings_column(strings.size(), + return make_strings_column(input.size(), std::move(offsets), chars.release(), - strings.null_count(), - cudf::detail::copy_bitmask(strings.parent(), stream, mr)); + input.null_count(), + cudf::detail::copy_bitmask(input.parent(), stream, mr)); } -std::unique_ptr slice_strings(strings_column_view const& strings, +std::unique_ptr slice_strings(strings_column_view const& input, column_view const& starts_column, column_view const& stops_column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - size_type strings_count = strings.size(); - if (strings_count == 0) return make_empty_column(type_id::STRING); - CUDF_EXPECTS(starts_column.size() == strings_count, + if (input.size() == input.null_count()) { + return std::make_unique(input.parent(), stream, mr); + } + + CUDF_EXPECTS(starts_column.size() == input.size(), "Parameter starts must have the same number of rows as strings."); - CUDF_EXPECTS(stops_column.size() == strings_count, + CUDF_EXPECTS(stops_column.size() == input.size(), "Parameter stops must have the same number of rows as strings."); CUDF_EXPECTS(cudf::have_same_types(starts_column, stops_column), "Parameters starts and stops must be of the same type.", @@ -242,17 +345,16 @@ std::unique_ptr slice_strings(strings_column_view const& strings, "Positions values must be fixed width type.", cudf::data_type_error); - auto strings_column = column_device_view::create(strings.parent(), stream); - auto starts_iter = cudf::detail::indexalator_factory::make_input_iterator(starts_column); - auto stops_iter = cudf::detail::indexalator_factory::make_input_iterator(stops_column); - return compute_substrings_from_fn(*strings_column, starts_iter, stops_iter, stream, mr); + auto starts_iter = cudf::detail::indexalator_factory::make_input_iterator(starts_column); + auto stops_iter = cudf::detail::indexalator_factory::make_input_iterator(stops_column); + return compute_substrings_from_fn(input, starts_iter, stops_iter, stream, mr); } } // namespace detail // external API -std::unique_ptr slice_strings(strings_column_view const& strings, +std::unique_ptr slice_strings(strings_column_view const& input, numeric_scalar const& start, numeric_scalar const& stop, numeric_scalar const& step, @@ -260,17 +362,17 @@ std::unique_ptr slice_strings(strings_column_view const& strings, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::slice_strings(strings, start, stop, step, stream, mr); + return detail::slice_strings(input, start, stop, step, stream, mr); } -std::unique_ptr slice_strings(strings_column_view const& strings, +std::unique_ptr slice_strings(strings_column_view const& input, column_view const& starts_column, column_view const& stops_column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::slice_strings(strings, starts_column, stops_column, stream, mr); + return detail::slice_strings(input, starts_column, stops_column, stream, mr); } } // namespace strings diff --git a/cpp/src/strings/split/partition.cu b/cpp/src/strings/split/partition.cu index 93d55c494fe..df1cdcc9d79 100644 --- a/cpp/src/strings/split/partition.cu +++ b/cpp/src/strings/split/partition.cu @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu index bc01a46ca6d..352ca83c8b2 100644 --- a/cpp/src/strings/split/split.cu +++ b/cpp/src/strings/split/split.cu @@ -28,10 +28,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index 4d7096c02ca..81aca001d53 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -24,10 +24,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -142,7 +142,7 @@ struct base_split_tokenizer { // max_tokens already included in token counts if (d_tokens.size() == 1) { - d_tokens[0] = string_index_pair{d_str.data(), d_str.size_bytes()}; + d_tokens[0] = string_index_pair{(d_str.empty() ? "" : d_str.data()), d_str.size_bytes()}; return; } @@ -357,24 +357,20 @@ std::pair, rmm::device_uvector> split auto const chars_bytes = get_offset_value(input.offsets(), input.offset() + strings_count, stream) - get_offset_value(input.offsets(), input.offset(), stream); - if (chars_bytes == 0) { - auto offsets = cudf::make_column_from_scalar( - numeric_scalar(0, true, stream), strings_count + 1, stream, mr); - auto tokens = rmm::device_uvector(0, stream); - return std::pair{std::move(offsets), std::move(tokens)}; - } auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(input.offsets(), input.offset()); // count the number of delimiters in the entire column rmm::device_scalar d_count(0, stream); - constexpr int64_t block_size = 512; - constexpr size_type bytes_per_thread = 4; - auto const num_blocks = util::div_rounding_up_safe( - util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); - count_delimiters_kernel - <<>>( - tokenizer, d_offsets, chars_bytes, d_count.data()); + if (chars_bytes > 0) { + constexpr int64_t block_size = 512; + constexpr size_type bytes_per_thread = 4; + auto const num_blocks = util::div_rounding_up_safe( + util::div_rounding_up_safe(chars_bytes, static_cast(bytes_per_thread)), block_size); + count_delimiters_kernel + <<>>( + tokenizer, d_offsets, chars_bytes, d_count.data()); + } // Create a vector of every delimiter position in the chars column. // These may include overlapping or otherwise out-of-bounds delimiters which diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index d72ec1085b5..ef96b9d3f36 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include #include #include @@ -71,6 +71,10 @@ struct token_reader_fn { auto const token_offset = d_token_offsets[idx]; auto const token_count = d_token_offsets[idx + 1] - token_offset; auto const d_result = d_tokens + token_offset; // store tokens here + if (nchars == 0) { + d_result[0] = string_index_pair{"", 0}; + return; + } int64_t token_idx = 0; auto itr = d_str.begin(); @@ -148,7 +152,7 @@ std::pair, std::unique_ptr> gener auto const end = begin + strings_count; auto [offsets, total_tokens] = cudf::detail::make_offsets_child_column( - begin, end, stream, rmm::mr::get_current_device_resource()); + begin, end, stream, cudf::get_current_device_resource_ref()); auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view()); // build a vector of tokens @@ -206,8 +210,8 @@ std::unique_ptr
split_re(strings_column_view const& input, auto d_strings = column_device_view::create(input.parent(), stream); // count the number of delimiters matched in each string - auto const counts = count_matches( - *d_strings, *d_prog, strings_count, stream, rmm::mr::get_current_device_resource()); + auto const counts = + count_matches(*d_strings, *d_prog, stream, cudf::get_current_device_resource_ref()); // get the split tokens from the input column; this also converts the counts into offsets auto [tokens, offsets] = @@ -271,7 +275,7 @@ std::unique_ptr split_record_re(strings_column_view const& input, auto d_strings = column_device_view::create(input.parent(), stream); // count the number of delimiters matched in each string - auto counts = count_matches(*d_strings, *d_prog, strings_count, stream, mr); + auto counts = count_matches(*d_strings, *d_prog, stream, mr); // get the split tokens from the input column; this also converts the counts into offsets auto [tokens, offsets] = diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu index 3e8be750b9e..6f14462faf1 100644 --- a/cpp/src/strings/split/split_record.cu +++ b/cpp/src/strings/split/split_record.cu @@ -27,9 +27,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu index a298285f841..07516f91dcf 100644 --- a/cpp/src/strings/strings_column_factories.cu +++ b/cpp/src/strings/strings_column_factories.cu @@ -21,11 +21,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/strings_scalar_factories.cpp b/cpp/src/strings/strings_scalar_factories.cpp index cf973638cc4..219d1174d42 100644 --- a/cpp/src/strings/strings_scalar_factories.cpp +++ b/cpp/src/strings/strings_scalar_factories.cpp @@ -16,9 +16,9 @@ #include #include +#include #include -#include namespace cudf { // Create a strings-type column from array of pointer/size pairs diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu index 639097abe63..0dc4c038a02 100644 --- a/cpp/src/strings/strip.cu +++ b/cpp/src/strings/strip.cu @@ -23,10 +23,10 @@ #include #include #include +#include #include #include -#include namespace cudf { namespace strings { diff --git a/cpp/src/strings/translate.cu b/cpp/src/strings/translate.cu index a242b008a54..22ab5d4fe81 100644 --- a/cpp/src/strings/translate.cu +++ b/cpp/src/strings/translate.cu @@ -25,10 +25,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -107,8 +107,8 @@ std::unique_ptr translate(strings_column_view const& strings, return lhs.first < rhs.first; }); // copy translate table to device memory - rmm::device_uvector table = - cudf::detail::make_device_uvector_async(htable, stream, rmm::mr::get_current_device_resource()); + rmm::device_uvector table = cudf::detail::make_device_uvector_async( + htable, stream, cudf::get_current_device_resource_ref()); auto d_strings = column_device_view::create(strings.parent(), stream); diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index 068d89a52dc..45bd4615435 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -26,11 +26,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/strings/wrap.cu b/cpp/src/strings/wrap.cu index dff1891c3cc..38a18aff98d 100644 --- a/cpp/src/strings/wrap.cu +++ b/cpp/src/strings/wrap.cu @@ -25,11 +25,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/structs/copying/concatenate.cu b/cpp/src/structs/copying/concatenate.cu index 2ccf071711a..2120b4f08c4 100644 --- a/cpp/src/structs/copying/concatenate.cu +++ b/cpp/src/structs/copying/concatenate.cu @@ -23,9 +23,9 @@ #include #include #include +#include #include -#include #include #include diff --git a/cpp/src/structs/scan/scan_inclusive.cu b/cpp/src/structs/scan/scan_inclusive.cu index a6ccea5fca1..28756b25c89 100644 --- a/cpp/src/structs/scan/scan_inclusive.cu +++ b/cpp/src/structs/scan/scan_inclusive.cu @@ -20,11 +20,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/structs/structs_column_factories.cu b/cpp/src/structs/structs_column_factories.cu index bbe2bb96fde..86b30d0ccbd 100644 --- a/cpp/src/structs/structs_column_factories.cu +++ b/cpp/src/structs/structs_column_factories.cu @@ -17,9 +17,9 @@ #include #include #include +#include #include -#include #include diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp index 81806c92e23..5df9943303d 100644 --- a/cpp/src/structs/utilities.cpp +++ b/cpp/src/structs/utilities.cpp @@ -25,11 +25,10 @@ #include #include #include +#include #include #include -#include - #include #include diff --git a/cpp/src/table/row_operators.cu b/cpp/src/table/row_operators.cu index 2969557c78f..990c4855a14 100644 --- a/cpp/src/table/row_operators.cu +++ b/cpp/src/table/row_operators.cu @@ -27,12 +27,10 @@ #include #include #include +#include #include #include -#include -#include - #include namespace cudf { @@ -319,7 +317,7 @@ auto list_lex_preprocess(table_view const& table, rmm::cuda_stream_view stream) } } auto d_dremel_device_views = detail::make_device_uvector_sync( - dremel_device_views, stream, rmm::mr::get_current_device_resource()); + dremel_device_views, stream, cudf::get_current_device_resource_ref()); return std::make_tuple(std::move(dremel_data), std::move(d_dremel_device_views)); } @@ -588,12 +586,12 @@ transform_lists_of_structs(column_view const& lhs, auto const concatenated_children = cudf::detail::concatenate(std::vector{child_lhs, child_rhs}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const ranks = compute_ranks(concatenated_children->view(), column_null_order, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const ranks_slices = cudf::detail::slice( ranks->view(), {0, child_lhs.size(), child_lhs.size(), child_lhs.size() + child_rhs.size()}, @@ -647,13 +645,13 @@ std::shared_ptr preprocessed_table::create( { check_lex_compatibility(preprocessed_input); - auto d_table = table_device_view::create(preprocessed_input, stream); - auto d_column_order = - detail::make_device_uvector_async(column_order, stream, rmm::mr::get_current_device_resource()); + auto d_table = table_device_view::create(preprocessed_input, stream); + auto d_column_order = detail::make_device_uvector_async( + column_order, stream, cudf::get_current_device_resource_ref()); auto d_null_precedence = detail::make_device_uvector_async( - null_precedence, stream, rmm::mr::get_current_device_resource()); + null_precedence, stream, cudf::get_current_device_resource_ref()); auto d_depths = detail::make_device_uvector_async( - verticalized_col_depths, stream, rmm::mr::get_current_device_resource()); + verticalized_col_depths, stream, cudf::get_current_device_resource_ref()); if (detail::has_nested_columns(preprocessed_input)) { auto [dremel_data, d_dremel_device_view] = list_lex_preprocess(preprocessed_input, stream); @@ -699,7 +697,7 @@ std::shared_ptr preprocessed_table::create( lhs_col, null_precedence.empty() ? null_order::BEFORE : new_null_precedence[col_idx], stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); transformed_cvs.emplace_back(std::move(transformed)); transformed_columns.insert(transformed_columns.end(), @@ -761,7 +759,7 @@ preprocessed_table::create(table_view const& lhs, rhs_col, null_precedence.empty() ? null_order::BEFORE : null_precedence[col_idx], stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); transformed_lhs_cvs.emplace_back(std::move(transformed_lhs)); transformed_rhs_cvs.emplace_back(std::move(transformed_rhs)); @@ -854,7 +852,7 @@ std::shared_ptr preprocessed_table::create(table_view const& check_eq_compatibility(t); auto [null_pushed_table, nullable_data] = - structs::detail::push_down_nulls(t, stream, rmm::mr::get_current_device_resource()); + structs::detail::push_down_nulls(t, stream, cudf::get_current_device_resource_ref()); auto struct_offset_removed_table = remove_struct_child_offsets(null_pushed_table); auto verticalized_t = std::get<0>(decompose_structs(struct_offset_removed_table, decompose_lists_column::YES)); diff --git a/cpp/src/table/table.cpp b/cpp/src/table/table.cpp index 9dac7be5efe..cb707c94288 100644 --- a/cpp/src/table/table.cpp +++ b/cpp/src/table/table.cpp @@ -18,9 +18,9 @@ #include #include #include +#include #include -#include namespace cudf { diff --git a/cpp/src/text/bpe/byte_pair_encoding.cu b/cpp/src/text/bpe/byte_pair_encoding.cu index e196eee275f..f46f49ddc0e 100644 --- a/cpp/src/text/bpe/byte_pair_encoding.cu +++ b/cpp/src/text/bpe/byte_pair_encoding.cu @@ -29,12 +29,12 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/text/bpe/byte_pair_encoding.cuh b/cpp/src/text/bpe/byte_pair_encoding.cuh index a2e441c3284..69c77224eb7 100644 --- a/cpp/src/text/bpe/byte_pair_encoding.cuh +++ b/cpp/src/text/bpe/byte_pair_encoding.cuh @@ -106,7 +106,7 @@ using merge_pairs_map_type = cuco::static_map, cuco_storage>; /** @@ -164,7 +164,7 @@ using mp_table_map_type = cuco::static_map, cuco_storage>; } // namespace detail diff --git a/cpp/src/text/bpe/load_merge_pairs.cu b/cpp/src/text/bpe/load_merge_pairs.cu index f34c5c4f7f6..cd68566bdec 100644 --- a/cpp/src/text/bpe/load_merge_pairs.cu +++ b/cpp/src/text/bpe/load_merge_pairs.cu @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include -#include #include @@ -43,16 +43,16 @@ namespace { std::unique_ptr initialize_merge_pairs_map( cudf::column_device_view const& input, rmm::cuda_stream_view stream) { - auto merge_pairs_map = - std::make_unique(static_cast(input.size()), - cuco::empty_key{-1}, - cuco::empty_value{-1}, - bpe_equal{input}, - bpe_probe_scheme{bpe_hasher{input}}, - cuco::thread_scope_device, - cuco_storage{}, - cudf::detail::cuco_allocator{stream}, - stream.value()); + auto merge_pairs_map = std::make_unique( + static_cast(input.size()), + cuco::empty_key{-1}, + cuco::empty_value{-1}, + bpe_equal{input}, + bpe_probe_scheme{bpe_hasher{input}}, + cuco::thread_scope_device, + cuco_storage{}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()); auto iter = cudf::detail::make_counting_transform_iterator( 0, @@ -67,15 +67,16 @@ std::unique_ptr initialize_merge_pairs_map( std::unique_ptr initialize_mp_table_map( cudf::column_device_view const& input, rmm::cuda_stream_view stream) { - auto mp_table_map = std::make_unique(static_cast(input.size()), - cuco::empty_key{-1}, - cuco::empty_value{-1}, - mp_equal{input}, - mp_probe_scheme{mp_hasher{input}}, - cuco::thread_scope_device, - cuco_storage{}, - cudf::detail::cuco_allocator{stream}, - stream.value()); + auto mp_table_map = std::make_unique( + static_cast(input.size()), + cuco::empty_key{-1}, + cuco::empty_value{-1}, + mp_equal{input}, + mp_probe_scheme{mp_hasher{input}}, + cuco::thread_scope_device, + cuco_storage{}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, + stream.value()); auto iter = cudf::detail::make_counting_transform_iterator( 0, diff --git a/cpp/src/text/detokenize.cu b/cpp/src/text/detokenize.cu index 6635b61093e..15cb53c7c28 100644 --- a/cpp/src/text/detokenize.cu +++ b/cpp/src/text/detokenize.cu @@ -27,12 +27,12 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -148,7 +148,7 @@ std::unique_ptr detokenize(cudf::strings_column_view const& string auto strings_column = cudf::column_device_view::create(strings.parent(), stream); // the indices may not be in order so we need to build a sorted map auto sorted_rows = cudf::detail::stable_sorted_order( - cudf::table_view({row_indices}), {}, {}, stream, rmm::mr::get_current_device_resource()); + cudf::table_view({row_indices}), {}, {}, stream, cudf::get_current_device_resource_ref()); auto const d_row_map = sorted_rows->view().data(); // create offsets for the tokens for each output string diff --git a/cpp/src/text/edit_distance.cu b/cpp/src/text/edit_distance.cu index 8d857175407..b04e9961e01 100644 --- a/cpp/src/text/edit_distance.cu +++ b/cpp/src/text/edit_distance.cu @@ -22,13 +22,13 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 724f3603f29..a87ecb81b9d 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -29,17 +29,19 @@ #include #include #include +#include #include #include #include -#include +#include +#include #include +#include #include #include -#include #include @@ -120,7 +122,7 @@ std::unique_ptr generate_ngrams(cudf::strings_column_view const& s return !d_strings.element(idx).empty(); }, stream, - rmm::mr::get_current_device_resource()) + cudf::get_current_device_resource_ref()) ->release(); strings_count = table_offsets.front()->size() - 1; auto result = std::move(table_offsets.front()); @@ -165,6 +167,47 @@ std::unique_ptr generate_ngrams(cudf::strings_column_view const& s namespace detail { namespace { +constexpr cudf::thread_index_type block_size = 256; +constexpr cudf::thread_index_type bytes_per_thread = 4; + +/** + * @brief Counts the number of ngrams in each row of the given strings column + * + * Each warp processes a single string. + * Formula is `count = max(0,str.length() - ngrams + 1)` + * If a string has less than ngrams characters, its count is 0. + */ +CUDF_KERNEL void count_char_ngrams_kernel(cudf::column_device_view const d_strings, + cudf::size_type ngrams, + cudf::size_type* d_counts) +{ + auto const idx = cudf::detail::grid_1d::global_thread_id(); + + auto const str_idx = idx / cudf::detail::warp_size; + if (str_idx >= d_strings.size()) { return; } + if (d_strings.is_null(str_idx)) { + d_counts[str_idx] = 0; + return; + } + + namespace cg = cooperative_groups; + auto const warp = cg::tiled_partition(cg::this_thread_block()); + + auto const d_str = d_strings.element(str_idx); + auto const end = d_str.data() + d_str.size_bytes(); + + auto const lane_idx = warp.thread_rank(); + cudf::size_type count = 0; + for (auto itr = d_str.data() + (lane_idx * bytes_per_thread); itr < end; + itr += cudf::detail::warp_size * bytes_per_thread) { + for (auto s = itr; (s < (itr + bytes_per_thread)) && (s < end); ++s) { + count += static_cast(cudf::strings::detail::is_begin_utf8_char(*s)); + } + } + auto const char_count = cg::reduce(warp, count, cg::plus()); + if (lane_idx == 0) { d_counts[str_idx] = cuda::std::max(0, char_count - ngrams + 1); } +} + /** * @brief Generate character ngrams for each string * @@ -220,17 +263,16 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie auto const d_strings = cudf::column_device_view::create(input.parent(), stream); - auto sizes_itr = cudf::detail::make_counting_transform_iterator( - 0, - cuda::proclaim_return_type( - [d_strings = *d_strings, ngrams] __device__(auto idx) { - if (d_strings.is_null(idx)) { return 0; } - auto const length = d_strings.element(idx).length(); - return std::max(0, static_cast(length + 1 - ngrams)); - })); - auto [offsets, total_ngrams] = - cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + input.size(), stream, mr); + auto [offsets, total_ngrams] = [&] { + auto counts = rmm::device_uvector(input.size(), stream); + auto const num_blocks = cudf::util::div_rounding_up_safe( + static_cast(input.size()) * cudf::detail::warp_size, block_size); + count_char_ngrams_kernel<<>>( + *d_strings, ngrams, counts.data()); + return cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr); + }(); auto d_offsets = offsets->view().data(); + CUDF_EXPECTS(total_ngrams > 0, "Insufficient number of characters in each string to generate ngrams"); @@ -246,36 +288,64 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie } namespace { + /** * @brief Computes the hash of each character ngram * - * Each thread processes a single string. Substrings are resolved for every character + * Each warp processes a single string. Substrings are resolved for every character * of the string and hashed. */ -struct character_ngram_hash_fn { - cudf::column_device_view const d_strings; - cudf::size_type ngrams; - cudf::size_type const* d_ngram_offsets; - cudf::hash_value_type* d_results; +CUDF_KERNEL void character_ngram_hash_kernel(cudf::column_device_view const d_strings, + cudf::size_type ngrams, + cudf::size_type const* d_ngram_offsets, + cudf::hash_value_type* d_results) +{ + auto const idx = cudf::detail::grid_1d::global_thread_id(); + if (idx >= (static_cast(d_strings.size()) * cudf::detail::warp_size)) { + return; + } - __device__ void operator()(cudf::size_type idx) const - { - if (d_strings.is_null(idx)) return; - auto const d_str = d_strings.element(idx); - if (d_str.empty()) return; - auto itr = d_str.begin(); - auto const ngram_offset = d_ngram_offsets[idx]; - auto const ngram_count = d_ngram_offsets[idx + 1] - ngram_offset; - auto const hasher = cudf::hashing::detail::MurmurHash3_x86_32{0}; - auto d_hashes = d_results + ngram_offset; - for (cudf::size_type n = 0; n < ngram_count; ++n, ++itr) { - auto const begin = itr.byte_offset(); - auto const end = (itr + ngrams).byte_offset(); - auto const ngram = cudf::string_view(d_str.data() + begin, end - begin); - *d_hashes++ = hasher(ngram); + auto const str_idx = idx / cudf::detail::warp_size; + + if (d_strings.is_null(str_idx)) { return; } + auto const d_str = d_strings.element(str_idx); + if (d_str.empty()) { return; } + + __shared__ cudf::hash_value_type hvs[block_size]; // temp store for hash values + + auto const ngram_offset = d_ngram_offsets[str_idx]; + auto const hasher = cudf::hashing::detail::MurmurHash3_x86_32{0}; + + auto const end = d_str.data() + d_str.size_bytes(); + auto const warp_count = (d_str.size_bytes() / cudf::detail::warp_size) + 1; + auto const lane_idx = idx % cudf::detail::warp_size; + + auto d_hashes = d_results + ngram_offset; + auto itr = d_str.data() + lane_idx; + for (auto i = 0; i < warp_count; ++i) { + cudf::hash_value_type hash = 0; + if (itr < end && cudf::strings::detail::is_begin_utf8_char(*itr)) { + // resolve ngram substring + auto const sub_str = + cudf::string_view(itr, static_cast(thrust::distance(itr, end))); + auto const [bytes, left] = + cudf::strings::detail::bytes_to_character_position(sub_str, ngrams); + if (left == 0) { hash = hasher(cudf::string_view(itr, bytes)); } + } + hvs[threadIdx.x] = hash; // store hash into shared memory + __syncwarp(); + if (lane_idx == 0) { + // copy valid hash values into d_hashes + auto const hashes = &hvs[threadIdx.x]; + d_hashes = thrust::copy_if( + thrust::seq, hashes, hashes + cudf::detail::warp_size, d_hashes, [](auto h) { + return h != 0; + }); } + __syncwarp(); + itr += cudf::detail::warp_size; } -}; +} } // namespace std::unique_ptr hash_character_ngrams(cudf::strings_column_view const& input, @@ -291,18 +361,16 @@ std::unique_ptr hash_character_ngrams(cudf::strings_column_view co if (input.is_empty()) { return cudf::make_empty_column(output_type); } auto const d_strings = cudf::column_device_view::create(input.parent(), stream); + auto const grid = cudf::detail::grid_1d( + static_cast(input.size()) * cudf::detail::warp_size, block_size); // build offsets column by computing the number of ngrams per string - auto sizes_itr = cudf::detail::make_counting_transform_iterator( - 0, - cuda::proclaim_return_type( - [d_strings = *d_strings, ngrams] __device__(auto idx) { - if (d_strings.is_null(idx)) { return 0; } - auto const length = d_strings.element(idx).length(); - return std::max(0, static_cast(length + 1 - ngrams)); - })); - auto [offsets, total_ngrams] = - cudf::detail::make_offsets_child_column(sizes_itr, sizes_itr + input.size(), stream, mr); + auto [offsets, total_ngrams] = [&] { + auto counts = rmm::device_uvector(input.size(), stream); + count_char_ngrams_kernel<<>>( + *d_strings, ngrams, counts.data()); + return cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr); + }(); auto d_offsets = offsets->view().data(); CUDF_EXPECTS(total_ngrams > 0, @@ -313,11 +381,8 @@ std::unique_ptr hash_character_ngrams(cudf::strings_column_view co cudf::make_numeric_column(output_type, total_ngrams, cudf::mask_state::UNALLOCATED, stream, mr); auto d_hashes = hashes->mutable_view().data(); - character_ngram_hash_fn generator{*d_strings, ngrams, d_offsets, d_hashes}; - thrust::for_each_n(rmm::exec_policy(stream), - thrust::counting_iterator(0), - input.size(), - generator); + character_ngram_hash_kernel<<>>( + *d_strings, ngrams, d_offsets, d_hashes); return make_lists_column( input.size(), std::move(offsets), std::move(hashes), 0, rmm::device_buffer{}, stream, mr); diff --git a/cpp/src/text/jaccard.cu b/cpp/src/text/jaccard.cu index e465fb79c89..2de94a4eb59 100644 --- a/cpp/src/text/jaccard.cu +++ b/cpp/src/text/jaccard.cu @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -33,7 +34,6 @@ #include #include #include -#include #include #include @@ -376,7 +376,7 @@ std::pair, rmm::device_uvector> hash_subs sub_offsets.begin(), sub_offsets.end(), indices.begin()); - return cudf::detail::make_std_vector_sync(indices, stream); + return cudf::detail::make_host_vector_sync(indices, stream); }(); // Call segmented sort with the sort sections diff --git a/cpp/src/text/minhash.cu b/cpp/src/text/minhash.cu index 4318123627d..a03a34f5fa7 100644 --- a/cpp/src/text/minhash.cu +++ b/cpp/src/text/minhash.cu @@ -25,15 +25,17 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include -#include #include #include @@ -151,15 +153,111 @@ std::unique_ptr minhash_fn(cudf::strings_column_view const& input, mr); auto d_hashes = hashes->mutable_view().data(); - constexpr int block_size = 256; - cudf::detail::grid_1d grid{input.size() * cudf::detail::warp_size, block_size}; + constexpr cudf::thread_index_type block_size = 256; + cudf::detail::grid_1d grid{ + static_cast(input.size()) * cudf::detail::warp_size, block_size}; minhash_kernel<<>>( *d_strings, seeds, width, d_hashes); return hashes; } -std::unique_ptr build_list_result(cudf::strings_column_view const& input, +/** + * @brief Compute the minhash of each list row of strings for each seed + * + * This is a warp-per-row algorithm where parallel threads within a warp + * work on strings in a single list row. + * + * @tparam HashFunction hash function to use on each string + * + * @param d_input List of strings to process + * @param seeds Seeds for hashing each string + * @param d_hashes Minhash output values (one per row) + */ +template < + typename HashFunction, + typename hash_value_type = std:: + conditional_t, uint32_t, uint64_t>> +CUDF_KERNEL void minhash_word_kernel(cudf::detail::lists_column_device_view const d_input, + cudf::device_span seeds, + hash_value_type* d_hashes) +{ + auto const idx = cudf::detail::grid_1d::global_thread_id(); + auto const row_idx = idx / cudf::detail::warp_size; + + if (row_idx >= d_input.size()) { return; } + if (d_input.is_null(row_idx)) { return; } + + auto const d_row = cudf::list_device_view(d_input, row_idx); + auto const d_output = d_hashes + (row_idx * seeds.size()); + + // initialize hashes output for this row + auto const lane_idx = static_cast(idx % cudf::detail::warp_size); + if (lane_idx == 0) { + auto const init = d_row.size() == 0 ? 0 : std::numeric_limits::max(); + thrust::fill(thrust::seq, d_output, d_output + seeds.size(), init); + } + __syncwarp(); + + // each lane hashes a string from the input row + for (auto str_idx = lane_idx; str_idx < d_row.size(); str_idx += cudf::detail::warp_size) { + auto const hash_str = + d_row.is_null(str_idx) ? cudf::string_view{} : d_row.element(str_idx); + for (std::size_t seed_idx = 0; seed_idx < seeds.size(); ++seed_idx) { + auto const hasher = HashFunction(seeds[seed_idx]); + // hash string and store the min value + hash_value_type hv; + if constexpr (std::is_same_v) { + hv = hasher(hash_str); + } else { + // This code path assumes the use of MurmurHash3_x64_128 which produces 2 uint64 values + // but only uses the first uint64 value as requested by the LLM team. + hv = thrust::get<0>(hasher(hash_str)); + } + cuda::atomic_ref ref{*(d_output + seed_idx)}; + ref.fetch_min(hv, cuda::std::memory_order_relaxed); + } + } +} + +template < + typename HashFunction, + typename hash_value_type = std:: + conditional_t, uint32_t, uint64_t>> +std::unique_ptr word_minhash_fn(cudf::lists_column_view const& input, + cudf::device_span seeds, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_EXPECTS(!seeds.empty(), "Parameter seeds cannot be empty", std::invalid_argument); + CUDF_EXPECTS((static_cast(input.size()) * seeds.size()) < + static_cast(std::numeric_limits::max()), + "The number of seeds times the number of input rows exceeds the column size limit", + std::overflow_error); + + auto const output_type = cudf::data_type{cudf::type_to_id()}; + if (input.is_empty()) { return cudf::make_empty_column(output_type); } + + auto const d_input = cudf::column_device_view::create(input.parent(), stream); + + auto hashes = cudf::make_numeric_column(output_type, + input.size() * static_cast(seeds.size()), + cudf::mask_state::UNALLOCATED, + stream, + mr); + auto d_hashes = hashes->mutable_view().data(); + auto lcdv = cudf::detail::lists_column_device_view(*d_input); + + constexpr cudf::thread_index_type block_size = 256; + cudf::detail::grid_1d grid{ + static_cast(input.size()) * cudf::detail::warp_size, block_size}; + minhash_word_kernel + <<>>(lcdv, seeds, d_hashes); + + return hashes; +} + +std::unique_ptr build_list_result(cudf::column_view const& input, std::unique_ptr&& hashes, cudf::size_type seeds_size, rmm::cuda_stream_view stream, @@ -176,7 +274,7 @@ std::unique_ptr build_list_result(cudf::strings_column_view const& std::move(offsets), std::move(hashes), input.null_count(), - cudf::detail::copy_bitmask(input.parent(), stream, mr), + cudf::detail::copy_bitmask(input, stream, mr), stream, mr); // expect this condition to be very rare @@ -208,7 +306,7 @@ std::unique_ptr minhash(cudf::strings_column_view const& input, { using HashFunction = cudf::hashing::detail::MurmurHash3_x86_32; auto hashes = detail::minhash_fn(input, seeds, width, stream, mr); - return build_list_result(input, std::move(hashes), seeds.size(), stream, mr); + return build_list_result(input.parent(), std::move(hashes), seeds.size(), stream, mr); } std::unique_ptr minhash64(cudf::strings_column_view const& input, @@ -232,7 +330,27 @@ std::unique_ptr minhash64(cudf::strings_column_view const& input, { using HashFunction = cudf::hashing::detail::MurmurHash3_x64_128; auto hashes = detail::minhash_fn(input, seeds, width, stream, mr); - return build_list_result(input, std::move(hashes), seeds.size(), stream, mr); + return build_list_result(input.parent(), std::move(hashes), seeds.size(), stream, mr); +} + +std::unique_ptr word_minhash(cudf::lists_column_view const& input, + cudf::device_span seeds, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + using HashFunction = cudf::hashing::detail::MurmurHash3_x86_32; + auto hashes = detail::word_minhash_fn(input, seeds, stream, mr); + return build_list_result(input.parent(), std::move(hashes), seeds.size(), stream, mr); +} + +std::unique_ptr word_minhash64(cudf::lists_column_view const& input, + cudf::device_span seeds, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + using HashFunction = cudf::hashing::detail::MurmurHash3_x64_128; + auto hashes = detail::word_minhash_fn(input, seeds, stream, mr); + return build_list_result(input.parent(), std::move(hashes), seeds.size(), stream, mr); } } // namespace detail @@ -276,4 +394,21 @@ std::unique_ptr minhash64(cudf::strings_column_view const& input, return detail::minhash64(input, seeds, width, stream, mr); } +std::unique_ptr word_minhash(cudf::lists_column_view const& input, + cudf::device_span seeds, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::word_minhash(input, seeds, stream, mr); +} + +std::unique_ptr word_minhash64(cudf::lists_column_view const& input, + cudf::device_span seeds, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::word_minhash64(input, seeds, stream, mr); +} } // namespace nvtext diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu index 95dd8ff3d6c..eee293268a2 100644 --- a/cpp/src/text/ngrams_tokenize.cu +++ b/cpp/src/text/ngrams_tokenize.cu @@ -27,13 +27,13 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -166,7 +166,7 @@ std::unique_ptr ngrams_tokenize(cudf::strings_column_view const& s auto const count_itr = cudf::detail::make_counting_transform_iterator(0, strings_tokenizer{d_strings, d_delimiter}); auto [token_offsets, total_tokens] = cudf::strings::detail::make_offsets_child_column( - count_itr, count_itr + strings_count, stream, rmm::mr::get_current_device_resource()); + count_itr, count_itr + strings_count, stream, cudf::get_current_device_resource_ref()); auto d_token_offsets = cudf::detail::offsetalator_factory::make_input_iterator(token_offsets->view()); @@ -191,7 +191,7 @@ std::unique_ptr ngrams_tokenize(cudf::strings_column_view const& s return (token_count >= ngrams) ? token_count - ngrams + 1 : 0; })); auto [ngram_offsets, total_ngrams] = cudf::detail::make_offsets_child_column( - ngram_counts, ngram_counts + strings_count, stream, rmm::mr::get_current_device_resource()); + ngram_counts, ngram_counts + strings_count, stream, cudf::get_current_device_resource_ref()); auto d_ngram_offsets = ngram_offsets->view().begin(); // Compute the total size of the ngrams for each string (not for each ngram) @@ -207,7 +207,7 @@ std::unique_ptr ngrams_tokenize(cudf::strings_column_view const& s auto const sizes_itr = cudf::detail::make_counting_transform_iterator( 0, ngram_builder_fn{d_strings, d_separator, ngrams, d_token_offsets, d_token_positions}); auto [chars_offsets, output_chars_size] = cudf::strings::detail::make_offsets_child_column( - sizes_itr, sizes_itr + strings_count, stream, rmm::mr::get_current_device_resource()); + sizes_itr, sizes_itr + strings_count, stream, cudf::get_current_device_resource_ref()); auto d_chars_offsets = cudf::detail::offsetalator_factory::make_input_iterator(chars_offsets->view()); diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu index 4db11dc5beb..7e2b766862d 100644 --- a/cpp/src/text/normalize.cu +++ b/cpp/src/text/normalize.cu @@ -32,11 +32,11 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/text/replace.cu b/cpp/src/text/replace.cu index 81c787caf86..943bcbe9b3a 100644 --- a/cpp/src/text/replace.cu +++ b/cpp/src/text/replace.cu @@ -28,11 +28,11 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/cpp/src/text/stemmer.cu b/cpp/src/text/stemmer.cu index 4746b6b74b9..379e68b891b 100644 --- a/cpp/src/text/stemmer.cu +++ b/cpp/src/text/stemmer.cu @@ -25,12 +25,12 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/text/subword/load_hash_file.cu b/cpp/src/text/subword/load_hash_file.cu index a08fdea3e84..eca703e2604 100644 --- a/cpp/src/text/subword/load_hash_file.cu +++ b/cpp/src/text/subword/load_hash_file.cu @@ -22,13 +22,13 @@ #include #include #include +#include #include #include #include #include -#include #include diff --git a/cpp/src/text/subword/subword_tokenize.cu b/cpp/src/text/subword/subword_tokenize.cu index e05427eb6ac..d7e04a0c208 100644 --- a/cpp/src/text/subword/subword_tokenize.cu +++ b/cpp/src/text/subword/subword_tokenize.cu @@ -25,13 +25,13 @@ #include #include #include +#include #include #include #include #include -#include #include #include diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu index 3ce6064d9c2..df25950e6d5 100644 --- a/cpp/src/text/tokenize.cu +++ b/cpp/src/text/tokenize.cu @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include @@ -79,14 +79,14 @@ std::unique_ptr tokenize_fn(cudf::size_type strings_count, { // get the number of tokens in each string auto const token_counts = - token_count_fn(strings_count, tokenizer, stream, rmm::mr::get_current_device_resource()); + token_count_fn(strings_count, tokenizer, stream, cudf::get_current_device_resource_ref()); auto d_token_counts = token_counts->view(); // create token-index offsets from the counts auto [token_offsets, total_tokens] = cudf::detail::make_offsets_child_column(d_token_counts.template begin(), d_token_counts.template end(), stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // build a list of pointers to each token rmm::device_uvector tokens(total_tokens, stream); // now go get the tokens diff --git a/cpp/src/text/vocabulary_tokenize.cu b/cpp/src/text/vocabulary_tokenize.cu index 97abb1487d8..a2297987732 100644 --- a/cpp/src/text/vocabulary_tokenize.cu +++ b/cpp/src/text/vocabulary_tokenize.cu @@ -32,11 +32,11 @@ #include #include #include +#include #include #include -#include #include #include @@ -100,7 +100,7 @@ using vocabulary_map_type = cuco::static_map, cuco_storage>; } // namespace } // namespace detail @@ -152,7 +152,7 @@ tokenize_vocabulary::tokenize_vocabulary(cudf::strings_column_view const& input, detail::probe_scheme{detail::vocab_hasher{*d_vocabulary}}, cuco::thread_scope_device, detail::cuco_storage{}, - cudf::detail::cuco_allocator{stream}, + cudf::detail::cuco_allocator{rmm::mr::polymorphic_allocator{}, stream}, stream.value()); // the row index is the token id (value for each key in the map) diff --git a/cpp/src/transform/bools_to_mask.cu b/cpp/src/transform/bools_to_mask.cu index c12f65deb46..f365d690fde 100644 --- a/cpp/src/transform/bools_to_mask.cu +++ b/cpp/src/transform/bools_to_mask.cu @@ -23,11 +23,11 @@ #include #include #include +#include #include #include #include -#include namespace cudf { namespace detail { @@ -59,10 +59,10 @@ std::pair, cudf::size_type> bools_to_mask( } // namespace detail std::pair, cudf::size_type> bools_to_mask( - column_view const& input, rmm::device_async_resource_ref mr) + column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::bools_to_mask(input, cudf::get_default_stream(), mr); + return detail::bools_to_mask(input, stream, mr); } } // namespace cudf diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu index 7960731f3a1..93105b321dd 100644 --- a/cpp/src/transform/compute_column.cu +++ b/cpp/src/transform/compute_column.cu @@ -30,11 +30,11 @@ #include #include #include +#include #include #include #include -#include namespace cudf { namespace detail { @@ -138,10 +138,11 @@ std::unique_ptr compute_column(table_view const& table, std::unique_ptr compute_column(table_view const& table, ast::expression const& expr, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::compute_column(table, expr, cudf::get_default_stream(), mr); + return detail::compute_column(table, expr, stream, mr); } } // namespace cudf diff --git a/cpp/src/transform/encode.cu b/cpp/src/transform/encode.cu index 7a044b9f6f7..cffb77ba776 100644 --- a/cpp/src/transform/encode.cu +++ b/cpp/src/transform/encode.cu @@ -27,11 +27,11 @@ #include #include #include +#include #include #include #include -#include #include #include @@ -72,10 +72,10 @@ std::pair, std::unique_ptr> encode(table_view con } // namespace detail std::pair, std::unique_ptr> encode( - cudf::table_view const& input, rmm::device_async_resource_ref mr) + cudf::table_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::encode(input, cudf::get_default_stream(), mr); + return detail::encode(input, stream, mr); } } // namespace cudf diff --git a/cpp/src/transform/mask_to_bools.cu b/cpp/src/transform/mask_to_bools.cu index adf5db02d9c..fe1f6674e8b 100644 --- a/cpp/src/transform/mask_to_bools.cu +++ b/cpp/src/transform/mask_to_bools.cu @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -62,9 +62,10 @@ std::unique_ptr mask_to_bools(bitmask_type const* bitmask, std::unique_ptr mask_to_bools(bitmask_type const* bitmask, size_type begin_bit, size_type end_bit, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::mask_to_bools(bitmask, begin_bit, end_bit, cudf::get_default_stream(), mr); + return detail::mask_to_bools(bitmask, begin_bit, end_bit, stream, mr); } } // namespace cudf diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/nans_to_nulls.cu index fd4f33c594c..adb8852c6e6 100644 --- a/cpp/src/transform/nans_to_nulls.cu +++ b/cpp/src/transform/nans_to_nulls.cu @@ -22,11 +22,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -93,10 +93,10 @@ std::pair, cudf::size_type> nans_to_nulls( } // namespace detail std::pair, cudf::size_type> nans_to_nulls( - column_view const& input, rmm::device_async_resource_ref mr) + column_view const& input, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::nans_to_nulls(input, cudf::get_default_stream(), mr); + return detail::nans_to_nulls(input, stream, mr); } } // namespace cudf diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu index 808f2d1b284..e1a784a985e 100644 --- a/cpp/src/transform/one_hot_encode.cu +++ b/cpp/src/transform/one_hot_encode.cu @@ -24,12 +24,12 @@ #include #include #include +#include #include #include #include #include -#include #include #include @@ -115,9 +115,10 @@ std::pair, table_view> one_hot_encode(column_view const& std::pair, table_view> one_hot_encode(column_view const& input, column_view const& categories, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::one_hot_encode(input, categories, cudf::get_default_stream(), mr); + return detail::one_hot_encode(input, categories, stream, mr); } } // namespace cudf diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu index 12a15eb7e34..66bbe532e46 100644 --- a/cpp/src/transform/row_bit_count.cu +++ b/cpp/src/transform/row_bit_count.cu @@ -28,15 +28,15 @@ #include #include #include +#include #include #include #include -#include #include +#include #include -#include #include namespace cudf { @@ -159,9 +159,9 @@ void flatten_hierarchy(ColIter begin, std::vector& info, hierarchy_info& h_info, rmm::cuda_stream_view stream, - size_type cur_depth = 0, - size_type cur_branch_depth = 0, - thrust::optional parent_index = {}); + size_type cur_depth = 0, + size_type cur_branch_depth = 0, + cuda::std::optional parent_index = {}); /** * @brief Type-dispatched functor called by flatten_hierarchy. @@ -177,7 +177,7 @@ struct flatten_functor { rmm::cuda_stream_view, size_type cur_depth, size_type cur_branch_depth, - thrust::optional) + cuda::std::optional) { out.push_back(col); info.push_back({cur_depth, cur_branch_depth, cur_branch_depth}); @@ -194,7 +194,7 @@ struct flatten_functor { rmm::cuda_stream_view, size_type cur_depth, size_type cur_branch_depth, - thrust::optional) + cuda::std::optional) { out.push_back(col); info.push_back({cur_depth, cur_branch_depth, cur_branch_depth}); @@ -210,7 +210,7 @@ struct flatten_functor { rmm::cuda_stream_view stream, size_type cur_depth, size_type cur_branch_depth, - thrust::optional parent_index) + cuda::std::optional parent_index) { // track branch depth as we reach this list and after we pass it auto const branch_depth_start = cur_branch_depth; @@ -243,7 +243,7 @@ struct flatten_functor { rmm::cuda_stream_view stream, size_type cur_depth, size_type cur_branch_depth, - thrust::optional) + cuda::std::optional) { out.push_back(col); info.push_back({cur_depth, cur_branch_depth, cur_branch_depth}); @@ -284,7 +284,7 @@ void flatten_hierarchy(ColIter begin, rmm::cuda_stream_view stream, size_type cur_depth, size_type cur_branch_depth, - thrust::optional parent_index) + cuda::std::optional parent_index) { std::for_each(begin, end, [&](column_view const& col) { cudf::type_dispatcher(col.type(), @@ -526,7 +526,7 @@ std::unique_ptr segmented_row_bit_count(table_view const& t, // move stack info to the gpu rmm::device_uvector d_info = - cudf::detail::make_device_uvector_async(info, stream, rmm::mr::get_current_device_resource()); + cudf::detail::make_device_uvector_async(info, stream, cudf::get_current_device_resource_ref()); // each thread needs to maintain a stack of row spans of size max_branch_depth. we will use // shared memory to do this rather than allocating a potentially gigantic temporary buffer @@ -561,23 +561,26 @@ std::unique_ptr row_bit_count(table_view const& t, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return segmented_row_bit_count(t, 1, stream, mr); + return detail::segmented_row_bit_count(t, 1, stream, mr); } } // namespace detail std::unique_ptr segmented_row_bit_count(table_view const& t, size_type segment_length, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::segmented_row_bit_count(t, segment_length, cudf::get_default_stream(), mr); + return detail::segmented_row_bit_count(t, segment_length, stream, mr); } -std::unique_ptr row_bit_count(table_view const& t, rmm::device_async_resource_ref mr) +std::unique_ptr row_bit_count(table_view const& t, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::row_bit_count(t, cudf::get_default_stream(), mr); + return detail::row_bit_count(t, stream, mr); } } // namespace cudf diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp index 98ec44758b9..52b96bc9039 100644 --- a/cpp/src/transform/transform.cpp +++ b/cpp/src/transform/transform.cpp @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -97,10 +97,11 @@ std::unique_ptr transform(column_view const& input, std::string const& unary_udf, data_type output_type, bool is_ptx, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::transform(input, unary_udf, output_type, is_ptx, cudf::get_default_stream(), mr); + return detail::transform(input, unary_udf, output_type, is_ptx, stream, mr); } } // namespace cudf diff --git a/cpp/src/transpose/transpose.cu b/cpp/src/transpose/transpose.cu index abde43535be..810fd8afd73 100644 --- a/cpp/src/transpose/transpose.cu +++ b/cpp/src/transpose/transpose.cu @@ -22,11 +22,11 @@ #include #include #include +#include #include #include #include -#include #include #include diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu index ec21813705a..0913796a527 100644 --- a/cpp/src/unary/cast_ops.cu +++ b/cpp/src/unary/cast_ops.cu @@ -27,12 +27,12 @@ #include #include #include +#include #include #include #include #include -#include #include diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu index ab17da5f8c4..1d506c59cd9 100644 --- a/cpp/src/unary/math_ops.cu +++ b/cpp/src/unary/math_ops.cu @@ -22,10 +22,10 @@ #include #include #include +#include #include #include -#include #include @@ -349,7 +349,7 @@ std::unique_ptr transform_fn(cudf::dictionary_column_view const& i { auto dictionary_view = cudf::column_device_view::create(input.parent(), stream); auto dictionary_itr = dictionary::detail::make_dictionary_iterator(*dictionary_view); - auto default_mr = rmm::mr::get_current_device_resource(); + auto default_mr = cudf::get_current_device_resource_ref(); // call unary-op using temporary output buffer auto output = transform_fn(dictionary_itr, dictionary_itr + input.size(), diff --git a/cpp/src/unary/nan_ops.cu b/cpp/src/unary/nan_ops.cu index 08aa8755624..17a90a14248 100644 --- a/cpp/src/unary/nan_ops.cu +++ b/cpp/src/unary/nan_ops.cu @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include namespace cudf { namespace detail { diff --git a/cpp/src/unary/null_ops.cu b/cpp/src/unary/null_ops.cu index a223a090128..f6514ea265b 100644 --- a/cpp/src/unary/null_ops.cu +++ b/cpp/src/unary/null_ops.cu @@ -18,8 +18,7 @@ #include #include #include - -#include +#include #include diff --git a/cpp/src/unary/unary_ops.cuh b/cpp/src/unary/unary_ops.cuh index 61c41705665..34a20d88f37 100644 --- a/cpp/src/unary/unary_ops.cuh +++ b/cpp/src/unary/unary_ops.cuh @@ -21,10 +21,10 @@ #include #include #include +#include #include #include -#include #include diff --git a/cpp/src/utilities/cuda.cpp b/cpp/src/utilities/cuda.cpp new file mode 100644 index 00000000000..53ca0608170 --- /dev/null +++ b/cpp/src/utilities/cuda.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +namespace cudf::detail { + +cudf::size_type num_multiprocessors() +{ + int device = 0; + CUDF_CUDA_TRY(cudaGetDevice(&device)); + int num_sms = 0; + CUDF_CUDA_TRY(cudaDeviceGetAttribute(&num_sms, cudaDevAttrMultiProcessorCount, device)); + return num_sms; +} + +} // namespace cudf::detail diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp index 7c3cea42023..125b98c4a67 100644 --- a/cpp/src/utilities/host_memory.cpp +++ b/cpp/src/utilities/host_memory.cpp @@ -18,12 +18,12 @@ #include #include #include +#include #include #include #include #include -#include namespace cudf { diff --git a/cpp/src/utilities/stream_pool.cpp b/cpp/src/utilities/stream_pool.cpp index 9d3a7ce5a4e..9824c472b20 100644 --- a/cpp/src/utilities/stream_pool.cpp +++ b/cpp/src/utilities/stream_pool.cpp @@ -132,6 +132,13 @@ struct cuda_event { cuda_event() { CUDF_CUDA_TRY(cudaEventCreateWithFlags(&e_, cudaEventDisableTiming)); } virtual ~cuda_event() { CUDF_ASSERT_CUDA_SUCCESS(cudaEventDestroy(e_)); } + // Moveable but not copyable. + cuda_event(const cuda_event&) = delete; + cuda_event& operator=(const cuda_event&) = delete; + + cuda_event(cuda_event&&) = default; + cuda_event& operator=(cuda_event&&) = default; + operator cudaEvent_t() { return e_; } private: @@ -147,11 +154,12 @@ struct cuda_event { */ cudaEvent_t event_for_thread() { - thread_local std::vector> thread_events(get_num_cuda_devices()); + // The program may crash if this function is called from the main thread and user application + // subsequently calls cudaDeviceReset(). + // As a workaround, here we intentionally disable RAII and leak cudaEvent_t. + thread_local std::vector thread_events(get_num_cuda_devices()); auto const device_id = get_current_cuda_device(); - if (not thread_events[device_id.value()]) { - thread_events[device_id.value()] = std::make_unique(); - } + if (not thread_events[device_id.value()]) { thread_events[device_id.value()] = new cuda_event(); } return *thread_events[device_id.value()]; } diff --git a/cpp/src/utilities/type_checks.cpp b/cpp/src/utilities/type_checks.cpp index dac981fb532..3095b342748 100644 --- a/cpp/src/utilities/type_checks.cpp +++ b/cpp/src/utilities/type_checks.cpp @@ -139,11 +139,6 @@ bool have_same_types(column_view const& lhs, column_view const& rhs) return type_dispatcher(lhs.type(), columns_equal_fn{}, lhs, rhs); } -bool column_types_equal(column_view const& lhs, column_view const& rhs) -{ - return have_same_types(lhs, rhs); -} - bool have_same_types(column_view const& lhs, scalar const& rhs) { return type_dispatcher(lhs.type(), column_scalar_equal_fn{}, lhs, rhs); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 22827484f9a..b67d922d377 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -24,8 +24,8 @@ rapids_test_init() # properties and linking to build the test function(ConfigureTest CMAKE_TEST_NAME) set(options) - set(one_value GPUS PERCENT STREAM_MODE EXTRA_LIB) - set(multi_value) + set(one_value GPUS PERCENT STREAM_MODE) + set(multi_value EXTRA_LIBS) cmake_parse_arguments(_CUDF_TEST "${options}" "${one_value}" "${multi_value}" ${ARGN}) if(NOT DEFINED _CUDF_TEST_GPUS AND NOT DEFINED _CUDF_TEST_PERCENT) set(_CUDF_TEST_GPUS 1) @@ -57,7 +57,7 @@ function(ConfigureTest CMAKE_TEST_NAME) target_link_libraries( ${CMAKE_TEST_NAME} PRIVATE cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main - nvtx3::nvtx3-cpp $ "${_CUDF_TEST_EXTRA_LIB}" + nvtx3::nvtx3-cpp $ "${_CUDF_TEST_EXTRA_LIBS}" ) rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME}) rapids_test_add( @@ -78,6 +78,14 @@ function(ConfigureTest CMAKE_TEST_NAME) endif() endfunction() +# ################################################################################################## +# dependencies ################################################################################### +# ################################################################################################## + +# No need to install Arrow libs when only the final test executables are shipped. +set(CUDF_EXCLUDE_ARROW_FROM_ALL ON) +include(../cmake/thirdparty/get_arrow.cmake) + # ################################################################################################## # test sources ################################################################################## # ################################################################################################## @@ -102,10 +110,6 @@ ConfigureTest(SCALAR_TEST scalar/scalar_test.cpp scalar/scalar_device_view_test. # * timestamps tests ------------------------------------------------------------------------------ ConfigureTest(TIMESTAMPS_TEST wrappers/timestamps_test.cu) -# ################################################################################################## -# * cudf tests ------------------------------------------------------------------------------------ -ConfigureTest(ERROR_TEST error/error_handling_test.cu) - # ################################################################################################## # * groupby tests --------------------------------------------------------------------------------- ConfigureTest( @@ -197,7 +201,7 @@ ConfigureTest( QUANTILES_TEST quantiles/percentile_approx_test.cpp quantiles/quantile_test.cpp quantiles/quantiles_test.cpp GPUS 1 - PERCENT 70 + PERCENT 70 EXTRA_LIBS ${ARROW_LIBRARIES} ) # ################################################################################################## @@ -276,8 +280,9 @@ ConfigureTest( interop/from_arrow_host_test.cpp interop/from_arrow_stream_test.cpp interop/dlpack_test.cpp - EXTRA_LIB + EXTRA_LIBS nanoarrow + ${ARROW_LIBRARIES} ) # ################################################################################################## @@ -288,7 +293,7 @@ ConfigureTest(ROW_SELECTION_TEST io/row_selection_test.cpp) ConfigureTest( CSV_TEST io/csv_test.cpp GPUS 1 - PERCENT 30 + PERCENT 30 EXTRA_LIBS ${ARROW_LIBRARIES} ) ConfigureTest( FILE_IO_TEST io/file_io_test.cpp @@ -316,15 +321,15 @@ ConfigureTest( ConfigureTest( JSON_TEST io/json/json_test.cpp io/json/json_chunked_reader.cu GPUS 1 - PERCENT 30 + PERCENT 30 EXTRA_LIBS ${ARROW_LIBRARIES} ) ConfigureTest(JSON_WRITER_TEST io/json/json_writer.cpp) ConfigureTest(JSON_TYPE_CAST_TEST io/json/json_type_cast_test.cu) ConfigureTest(NESTED_JSON_TEST io/json/nested_json_test.cpp io/json/json_tree.cpp) -ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp) ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp) ConfigureTest(JSON_QUOTE_NORMALIZATION io/json/json_quote_normalization_test.cpp) ConfigureTest(JSON_WHITESPACE_NORMALIZATION io/json/json_whitespace_normalization_test.cu) +ConfigureTest(JSON_TREE_CSR io/json/json_tree_csr.cu) ConfigureTest( DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp GPUS 1 @@ -334,9 +339,6 @@ target_link_libraries(DATA_CHUNK_SOURCE_TEST PRIVATE ZLIB::ZLIB) ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu) ConfigureTest(FST_TEST io/fst/fst_test.cu) ConfigureTest(TYPE_INFERENCE_TEST io/type_inference_test.cu) -if(CUDF_ENABLE_ARROW_S3) - target_compile_definitions(ARROW_IO_SOURCE_TEST PRIVATE "S3_ENABLED") -endif() # ################################################################################################## # * sort tests ------------------------------------------------------------------------------------ @@ -393,6 +395,7 @@ ConfigureTest( utilities_tests/pinned_memory_tests.cpp utilities_tests/type_check_tests.cpp utilities_tests/type_list_tests.cpp + utilities_tests/batched_memset_tests.cu ) # ################################################################################################## @@ -685,14 +688,12 @@ ConfigureTest(STREAM_BINARYOP_TEST streams/binaryop_test.cpp STREAM_MODE testing ConfigureTest(STREAM_CONCATENATE_TEST streams/concatenate_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_COPYING_TEST streams/copying_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_CSVIO_TEST streams/io/csv_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_DATETIME_TEST streams/datetime_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_DICTIONARY_TEST streams/dictionary_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_FILLING_TEST streams/filling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_GROUPBY_TEST streams/groupby_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_HASHING_TEST streams/hash_test.cpp STREAM_MODE testing) -# Deprecation from 16297 and fixes in 16379 caused this test to be empty This will be reenabled once -# the deprecated APIs have been replaced in 24.10. -# -# ConfigureTest(STREAM_INTEROP_TEST streams/interop_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_JOIN_TEST streams/join_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_JSONIO_TEST streams/io/json_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LABELING_BINS_TEST streams/labeling_bins_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_LISTS_TEST streams/lists_test.cpp STREAM_MODE testing) @@ -704,6 +705,7 @@ ConfigureTest(STREAM_PARQUETIO_TEST streams/io/parquet_test.cpp STREAM_MODE test ConfigureTest(STREAM_POOL_TEST streams/pool_test.cu STREAM_MODE testing) ConfigureTest(STREAM_REDUCTION_TEST streams/reduction_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_REPLACE_TEST streams/replace_test.cpp STREAM_MODE testing) +ConfigureTest(STREAM_RESHAPE_TEST streams/reshape_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_ROLLING_TEST streams/rolling_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SEARCH_TEST streams/search_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_SORTING_TEST streams/sorting_test.cpp STREAM_MODE testing) @@ -734,6 +736,7 @@ ConfigureTest( STREAM_MODE testing ) +ConfigureTest(STREAM_TRANSFORM_TEST streams/transform_test.cpp STREAM_MODE testing) ConfigureTest(STREAM_UNARY_TEST streams/unary_test.cpp STREAM_MODE testing) # ################################################################################################## diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp index 4bf648bed5a..fe221fb1c48 100644 --- a/cpp/tests/bitmask/bitmask_tests.cpp +++ b/cpp/tests/bitmask/bitmask_tests.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -90,7 +91,7 @@ rmm::device_uvector make_mask(cudf::size_type size, bool fil { if (!fill_valid) { return cudf::detail::make_zeroed_device_uvector_sync( - size, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + size, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); } else { auto ret = rmm::device_uvector(size, cudf::get_default_stream()); CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(), diff --git a/cpp/tests/bitmask/valid_if_tests.cu b/cpp/tests/bitmask/valid_if_tests.cu index 65143ec17f1..96f122f21a8 100644 --- a/cpp/tests/bitmask/valid_if_tests.cu +++ b/cpp/tests/bitmask/valid_if_tests.cu @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -43,7 +44,7 @@ TEST_F(ValidIfTest, EmptyRange) thrust::make_counting_iterator(0), odds_valid{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const& buffer = actual.first; EXPECT_EQ(0u, buffer.size()); EXPECT_EQ(nullptr, buffer.data()); @@ -56,7 +57,7 @@ TEST_F(ValidIfTest, InvalidRange) thrust::make_counting_iterator(0), odds_valid{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()), + cudf::get_current_device_resource_ref()), cudf::logic_error); } @@ -68,7 +69,7 @@ TEST_F(ValidIfTest, OddsValid) thrust::make_counting_iterator(10000), odds_valid{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.first.data(), actual.first.data(), expected.first.size()); EXPECT_EQ(5000, actual.second); EXPECT_EQ(expected.second, actual.second); @@ -82,7 +83,7 @@ TEST_F(ValidIfTest, AllValid) thrust::make_counting_iterator(10000), all_valid{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.first.data(), actual.first.data(), expected.first.size()); EXPECT_EQ(0, actual.second); EXPECT_EQ(expected.second, actual.second); @@ -96,7 +97,7 @@ TEST_F(ValidIfTest, AllNull) thrust::make_counting_iterator(10000), all_null{}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.first.data(), actual.first.data(), expected.first.size()); EXPECT_EQ(10000, actual.second); EXPECT_EQ(expected.second, actual.second); diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp index 1ba9b14dc1f..14b4197de71 100644 --- a/cpp/tests/column/column_test.cpp +++ b/cpp/tests/column/column_test.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -373,7 +374,7 @@ TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorNoMask) this->num_elements()); auto original = cudf::detail::make_device_uvector_async( - data, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + data, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto original_data = original.data(); cudf::column moved_to{std::move(original), rmm::device_buffer{}, 0}; verify_column_views(moved_to); @@ -389,7 +390,7 @@ TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorWithMask) this->num_elements()); auto original = cudf::detail::make_device_uvector_async( - data, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + data, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto original_data = original.data(); auto original_mask = this->all_valid_mask.data(); cudf::column moved_to{std::move(original), std::move(this->all_valid_mask), 0}; diff --git a/cpp/tests/copying/copy_range_tests.cpp b/cpp/tests/copying/copy_range_tests.cpp index 223946ddcee..25d93da277b 100644 --- a/cpp/tests/copying/copy_range_tests.cpp +++ b/cpp/tests/copying/copy_range_tests.cpp @@ -232,6 +232,16 @@ TEST_F(CopyRangeTestFixture, CopyWithNullsString) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*p_ret, expected); } +TEST_F(CopyRangeTestFixture, CopyWithTargetNullsString) +{ + auto target = + cudf::test::strings_column_wrapper({"a", "b", "", "d", "", "é"}, {1, 1, 0, 1, 1, 1}); + auto source = cudf::test::strings_column_wrapper({"A", "B", "C", "D", "E", "F"}); + auto result = cudf::copy_range(source, target, 1, 5, 1); + auto expected = cudf::test::strings_column_wrapper({"a", "B", "C", "D", "E", "é"}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected); +} + TEST_F(CopyRangeTestFixture, CopyNoNullsString) { cudf::size_type size{100}; diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu index 17ced5ccd34..b9ae91afd1e 100644 --- a/cpp/tests/copying/detail_gather_tests.cu +++ b/cpp/tests/copying/detail_gather_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -62,7 +63,7 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) gather_map.end(), cudf::out_of_bounds_policy::DONT_CHECK, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); @@ -79,7 +80,7 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest) gather_map.data() + gather_map.size(), cudf::out_of_bounds_policy::DONT_CHECK, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); @@ -107,7 +108,7 @@ TYPED_TEST(GatherTest, GatherDetailInvalidIndexTest) cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto expect_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2) ? 0 : i; }); diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp index b31f34504e7..28098878086 100644 --- a/cpp/tests/copying/gather_str_tests.cpp +++ b/cpp/tests/copying/gather_str_tests.cpp @@ -24,8 +24,7 @@ #include #include #include - -#include +#include class GatherTestStr : public cudf::test::BaseFixture {}; @@ -91,7 +90,7 @@ TEST_F(GatherTestStr, Gather) cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); std::vector h_expected; std::vector expected_validity; @@ -122,7 +121,7 @@ TEST_F(GatherTestStr, GatherDontCheckOutOfBounds) cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); std::vector h_expected; for (int itr : h_map) { @@ -141,7 +140,7 @@ TEST_F(GatherTestStr, GatherEmptyMapStringsColumn) cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); cudf::test::expect_column_empty(results->get_column(0).view()); } @@ -155,6 +154,6 @@ TEST_F(GatherTestStr, GatherZeroSizeStringsColumn) cudf::out_of_bounds_policy::NULLIFY, cudf::detail::negative_index_policy::NOT_ALLOWED, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results->get_column(0).view()); } diff --git a/cpp/tests/copying/gather_tests.cpp b/cpp/tests/copying/gather_tests.cpp index 284b6c4c50c..07ce672b14d 100644 --- a/cpp/tests/copying/gather_tests.cpp +++ b/cpp/tests/copying/gather_tests.cpp @@ -43,7 +43,7 @@ TYPED_TEST(GatherTest, IdentityTest) cudf::table_view source_table({source_column}); - std::unique_ptr result = std::move(cudf::gather(source_table, gather_map)); + std::unique_ptr result = cudf::gather(source_table, gather_map); for (auto i = 0; i < source_table.num_columns(); ++i) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(source_table.column(i), result->view().column(i)); @@ -66,7 +66,7 @@ TYPED_TEST(GatherTest, ReverseIdentityTest) cudf::table_view source_table({source_column}); - std::unique_ptr result = std::move(cudf::gather(source_table, gather_map)); + std::unique_ptr result = cudf::gather(source_table, gather_map); cudf::test::fixed_width_column_wrapper expect_column(reversed_data, reversed_data + source_size); @@ -94,7 +94,7 @@ TYPED_TEST(GatherTest, EveryOtherNullOdds) cudf::table_view source_table({source_column}); - std::unique_ptr result = std::move(cudf::gather(source_table, gather_map)); + std::unique_ptr result = cudf::gather(source_table, gather_map); auto expect_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 0; }); auto expect_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return 0; }); @@ -126,7 +126,7 @@ TYPED_TEST(GatherTest, EveryOtherNullEvens) cudf::table_view source_table({source_column}); - std::unique_ptr result = std::move(cudf::gather(source_table, gather_map)); + std::unique_ptr result = cudf::gather(source_table, gather_map); auto expect_data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i * 2 + 1; }); @@ -160,7 +160,7 @@ TYPED_TEST(GatherTest, AllNull) cudf::table_view source_table({source_column}); - std::unique_ptr result = std::move(cudf::gather(source_table, gather_map)); + std::unique_ptr result = cudf::gather(source_table, gather_map); // Check that the result is also all invalid CUDF_TEST_EXPECT_TABLES_EQUAL(source_table, result->view()); @@ -190,7 +190,7 @@ TYPED_TEST(GatherTest, MultiColReverseIdentityTest) cudf::table_view source_table{source_columns}; - std::unique_ptr result = std::move(cudf::gather(source_table, gather_map)); + std::unique_ptr result = cudf::gather(source_table, gather_map); cudf::test::fixed_width_column_wrapper expect_column(reversed_data, reversed_data + source_size); @@ -228,7 +228,7 @@ TYPED_TEST(GatherTest, MultiColNulls) cudf::table_view source_table{source_columns}; - std::unique_ptr result = std::move(cudf::gather(source_table, gather_map)); + std::unique_ptr result = cudf::gather(source_table, gather_map); // Expected data auto expect_data = diff --git a/cpp/tests/copying/pack_tests.cpp b/cpp/tests/copying/pack_tests.cpp index ea4408efa6a..8a50e071cb9 100644 --- a/cpp/tests/copying/pack_tests.cpp +++ b/cpp/tests/copying/pack_tests.cpp @@ -573,6 +573,8 @@ TEST_F(PackUnpackTest, SlicedEmpty) cudf::table_view t({a, b, c, d}); - auto sliced = cudf::split(t, {0}); - this->run_test(sliced[0]); + auto sliced = cudf::split(t, {0}); + auto packed = cudf::pack(t); + auto unpacked = cudf::unpack(packed); + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(t, unpacked); } diff --git a/cpp/tests/copying/shift_tests.cpp b/cpp/tests/copying/shift_tests.cpp index 01ad4f2247c..ff6808d9a79 100644 --- a/cpp/tests/copying/shift_tests.cpp +++ b/cpp/tests/copying/shift_tests.cpp @@ -23,10 +23,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -37,7 +37,7 @@ using TestTypes = cudf::test::Types; template > std::unique_ptr make_scalar( rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto s = new ScalarType(cudf::test::make_type_param_scalar(0), false, stream, mr); return std::unique_ptr(s); @@ -47,7 +47,7 @@ template > std::unique_ptr make_scalar( T value, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto s = new ScalarType(value, true, stream, mr); return std::unique_ptr(s); diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp index 7ff159cf896..ee3e7da5e0f 100644 --- a/cpp/tests/copying/split_tests.cpp +++ b/cpp/tests/copying/split_tests.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -1383,7 +1384,7 @@ struct ContiguousSplitTest : public cudf::test::BaseFixture {}; std::vector do_chunked_pack(cudf::table_view const& input) { - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); rmm::device_buffer bounce_buff(1 * 1024 * 1024, cudf::get_default_stream(), mr); auto bounce_buff_span = @@ -2383,7 +2384,7 @@ TEST_F(ContiguousSplitTableCornerCases, ChunkSpanTooSmall) { auto chunked_pack = cudf::chunked_pack::create({}, 1 * 1024 * 1024); rmm::device_buffer buff( - 1 * 1024, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + 1 * 1024, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); cudf::device_span too_small(static_cast(buff.data()), buff.size()); std::size_t copied = 0; // throws because we created chunked_contig_split with 1MB, but we are giving @@ -2396,7 +2397,7 @@ TEST_F(ContiguousSplitTableCornerCases, EmptyTableHasNextFalse) { auto chunked_pack = cudf::chunked_pack::create({}, 1 * 1024 * 1024); rmm::device_buffer buff( - 1 * 1024 * 1024, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + 1 * 1024 * 1024, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); cudf::device_span bounce_buff(static_cast(buff.data()), buff.size()); EXPECT_EQ(chunked_pack->has_next(), false); // empty input table std::size_t copied = 0; @@ -2409,7 +2410,7 @@ TEST_F(ContiguousSplitTableCornerCases, ExhaustedHasNextFalse) cudf::test::strings_column_wrapper a{"abc", "def", "ghi", "jkl", "mno", "", "st", "uvwx"}; cudf::table_view t({a}); rmm::device_buffer buff( - 1 * 1024 * 1024, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + 1 * 1024 * 1024, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); cudf::device_span bounce_buff(static_cast(buff.data()), buff.size()); auto chunked_pack = cudf::chunked_pack::create(t, buff.size()); EXPECT_EQ(chunked_pack->has_next(), true); diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu index ccf5ccae187..b81f8196d89 100644 --- a/cpp/tests/device_atomics/device_atomics_test.cu +++ b/cpp/tests/device_atomics/device_atomics_test.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -144,9 +145,9 @@ struct AtomicsTest : public cudf::test::BaseFixture { result_init[5] = result_init[2]; auto dev_data = cudf::detail::make_device_uvector_sync( - v, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + v, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto dev_result = cudf::detail::make_device_uvector_sync( - result_init, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + result_init, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); if (block_size == 0) { block_size = vec_size; } diff --git a/cpp/tests/dictionary/search_test.cpp b/cpp/tests/dictionary/search_test.cpp index 1b73576e083..25501b4fde7 100644 --- a/cpp/tests/dictionary/search_test.cpp +++ b/cpp/tests/dictionary/search_test.cpp @@ -20,6 +20,7 @@ #include #include +#include struct DictionarySearchTest : public cudf::test::BaseFixture {}; @@ -39,7 +40,7 @@ TEST_F(DictionarySearchTest, StringsColumn) result = cudf::dictionary::detail::get_insert_index(dictionary, cudf::string_scalar("eee"), cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); n_result = dynamic_cast*>(result.get()); EXPECT_EQ(uint32_t{5}, n_result->value()); } @@ -59,7 +60,7 @@ TEST_F(DictionarySearchTest, WithNulls) result = cudf::dictionary::detail::get_insert_index(dictionary, cudf::numeric_scalar(5), cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); n_result = dynamic_cast*>(result.get()); EXPECT_EQ(uint32_t{1}, n_result->value()); } @@ -71,7 +72,7 @@ TEST_F(DictionarySearchTest, EmptyColumn) auto result = cudf::dictionary::get_index(dictionary, key); EXPECT_FALSE(result->is_valid()); result = cudf::dictionary::detail::get_insert_index( - dictionary, key, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + dictionary, key, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); EXPECT_FALSE(result->is_valid()); } @@ -82,6 +83,6 @@ TEST_F(DictionarySearchTest, Errors) EXPECT_THROW(cudf::dictionary::get_index(dictionary, key), cudf::data_type_error); EXPECT_THROW( cudf::dictionary::detail::get_insert_index( - dictionary, key, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), + dictionary, key, cudf::get_default_stream(), cudf::get_current_device_resource_ref()), cudf::data_type_error); } diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu deleted file mode 100644 index 46d01ec14ff..00000000000 --- a/cpp/tests/error/error_handling_test.cu +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include - -TEST(ExpectsTest, FalseCondition) -{ - EXPECT_THROW(CUDF_EXPECTS(false, "condition is false"), cudf::logic_error); -} - -TEST(ExpectsTest, TrueCondition) { EXPECT_NO_THROW(CUDF_EXPECTS(true, "condition is true")); } - -TEST(CudaTryTest, Error) { EXPECT_THROW(CUDF_CUDA_TRY(cudaErrorLaunchFailure), cudf::cuda_error); } - -TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); } - -TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); } - -namespace { -// Some silly kernel that will cause an error -CUDF_KERNEL void test_kernel(int* data) { data[threadIdx.x] = threadIdx.x; } -} // namespace - -// In a release build and without explicit synchronization, CUDF_CHECK_CUDA may -// or may not fail on erroneous asynchronous CUDA calls. Invoke -// cudaStreamSynchronize to guarantee failure on error. In a non-release build, -// CUDF_CHECK_CUDA deterministically fails on erroneous asynchronous CUDA -// calls. -TEST(StreamCheck, FailedKernel) -{ - rmm::cuda_stream stream; - int a; - test_kernel<<<0, 0, 0, stream.value()>>>(&a); -#ifdef NDEBUG - stream.synchronize(); -#endif - EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error); -} - -TEST(StreamCheck, CatchFailedKernel) -{ - rmm::cuda_stream stream; - int a; - test_kernel<<<0, 0, 0, stream.value()>>>(&a); -#ifndef NDEBUG - stream.synchronize(); -#endif - EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error); -} - -CUDF_KERNEL void kernel() { asm("trap;"); } - -TEST(DeathTest, CudaFatalError) -{ - testing::FLAGS_gtest_death_test_style = "threadsafe"; - auto call_kernel = []() { - kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(); - try { - CUDF_CUDA_TRY(cudaDeviceSynchronize()); - } catch (const cudf::fatal_cuda_error& fe) { - std::abort(); - } - }; - ASSERT_DEATH(call_kernel(), ""); -} - -#ifndef NDEBUG - -CUDF_KERNEL void assert_false_kernel() { cudf_assert(false && "this kernel should die"); } - -CUDF_KERNEL void assert_true_kernel() { cudf_assert(true && "this kernel should live"); } - -TEST(DebugAssertDeathTest, cudf_assert_false) -{ - testing::FLAGS_gtest_death_test_style = "threadsafe"; - - auto call_kernel = []() { - auto const stream = cudf::get_default_stream().value(); - assert_false_kernel<<<1, 1, 0, stream>>>(); - - // Kernel should fail with `cudaErrorAssert` - // This error invalidates the current device context, so we need to kill - // the current process. Running with EXPECT_DEATH spawns a new process for - // each attempted kernel launch - if (cudaErrorAssert == cudaDeviceSynchronize()) { std::abort(); } - - // If we reach this point, the cudf_assert didn't work so we exit normally, which will cause - // EXPECT_DEATH to fail. - }; - - EXPECT_DEATH(call_kernel(), "this kernel should die"); -} - -TEST(DebugAssert, cudf_assert_true) -{ - auto const stream = cudf::get_default_stream().value(); - assert_true_kernel<<<1, 1, 0, stream>>>(); - ASSERT_EQ(cudaSuccess, cudaDeviceSynchronize()); -} - -#endif - -// These tests don't use CUDF_TEST_PROGRAM_MAIN because : -// 1.) They don't need the RMM Pool -// 2.) The RMM Pool interferes with the death test -int main(int argc, char** argv) -{ - ::testing::InitGoogleTest(&argc, argv); - auto const cmd_opts = parse_cudf_test_opts(argc, argv); - auto adaptor = make_stream_mode_adaptor(cmd_opts); - return RUN_ALL_TESTS(); -} diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu index 24b4e335840..f34760341d8 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cu +++ b/cpp/tests/fixed_point/fixed_point_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -82,7 +83,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) std::vector vec1(1000, decimal32{1, scale_type{-2}}); auto d_vec1 = cudf::detail::make_device_uvector_sync( - vec1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + vec1, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const sum = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()), std::cbegin(d_vec1), @@ -96,7 +97,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice) thrust::inclusive_scan(std::cbegin(vec1), std::cend(vec1), std::begin(vec1)); d_vec1 = cudf::detail::make_device_uvector_sync( - vec1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + vec1, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); std::vector vec2(1000); std::iota(std::begin(vec2), std::end(vec2), 1); diff --git a/cpp/tests/groupby/histogram_tests.cpp b/cpp/tests/groupby/histogram_tests.cpp index 612486d8e5c..2d447025919 100644 --- a/cpp/tests/groupby/histogram_tests.cpp +++ b/cpp/tests/groupby/histogram_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include #include +#include using int32s_col = cudf::test::fixed_width_column_wrapper; using int64s_col = cudf::test::fixed_width_column_wrapper; @@ -68,7 +69,7 @@ auto groupby_histogram(cudf::column_view const& keys, cudf::order::ASCENDING, cudf::null_order::BEFORE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); return std::pair{std::move(sorted_keys), std::move(sorted_histograms)}; } diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu index 97edc1c45a7..baa59026b07 100644 --- a/cpp/tests/groupby/tdigest_tests.cu +++ b/cpp/tests/groupby/tdigest_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -468,16 +469,16 @@ TEST_F(TDigestMergeTest, EmptyGroups) cudf::test::fixed_width_column_wrapper keys{0, 0, 0, 0, 0, 0, 0}; int const delta = 1000; - auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + auto a = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto b = cudf::type_dispatcher( static_cast(values_b).type(), tdigest_gen_grouped{}, keys, values_b, delta); - auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + auto c = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d = cudf::type_dispatcher( static_cast(values_d).type(), tdigest_gen_grouped{}, keys, values_d, delta); - auto e = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + auto e = cudf::tdigest::detail::make_empty_tdigest_column( + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); std::vector cols; cols.push_back(*a); diff --git a/cpp/tests/identify_stream_usage/test_default_stream_identification.cu b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu index 268c7b37c81..c5fb75a7a8e 100644 --- a/cpp/tests/identify_stream_usage/test_default_stream_identification.cu +++ b/cpp/tests/identify_stream_usage/test_default_stream_identification.cu @@ -33,6 +33,7 @@ void test_cudaLaunchKernel() } catch (std::runtime_error&) { return; } + if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } throw std::runtime_error("No exception raised for kernel on default stream!"); } diff --git a/cpp/tests/interop/arrow_utils.hpp b/cpp/tests/interop/arrow_utils.hpp index 1fdf02e02f1..70a9fe64d70 100644 --- a/cpp/tests/interop/arrow_utils.hpp +++ b/cpp/tests/interop/arrow_utils.hpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#pragma once + #include #include #include @@ -30,9 +32,65 @@ #include #include +#include #include -#pragma once +// Creating arrow as per given type_id and buffer arguments +template +std::shared_ptr to_arrow_array(cudf::type_id id, Ts&&... args) +{ + switch (id) { + case cudf::type_id::BOOL8: + return std::make_shared(std::forward(args)...); + case cudf::type_id::INT8: return std::make_shared(std::forward(args)...); + case cudf::type_id::INT16: + return std::make_shared(std::forward(args)...); + case cudf::type_id::INT32: + return std::make_shared(std::forward(args)...); + case cudf::type_id::INT64: + return std::make_shared(std::forward(args)...); + case cudf::type_id::UINT8: + return std::make_shared(std::forward(args)...); + case cudf::type_id::UINT16: + return std::make_shared(std::forward(args)...); + case cudf::type_id::UINT32: + return std::make_shared(std::forward(args)...); + case cudf::type_id::UINT64: + return std::make_shared(std::forward(args)...); + case cudf::type_id::FLOAT32: + return std::make_shared(std::forward(args)...); + case cudf::type_id::FLOAT64: + return std::make_shared(std::forward(args)...); + case cudf::type_id::TIMESTAMP_DAYS: + return std::make_shared(std::make_shared(), + std::forward(args)...); + case cudf::type_id::TIMESTAMP_SECONDS: + return std::make_shared(arrow::timestamp(arrow::TimeUnit::SECOND), + std::forward(args)...); + case cudf::type_id::TIMESTAMP_MILLISECONDS: + return std::make_shared(arrow::timestamp(arrow::TimeUnit::MILLI), + std::forward(args)...); + case cudf::type_id::TIMESTAMP_MICROSECONDS: + return std::make_shared(arrow::timestamp(arrow::TimeUnit::MICRO), + std::forward(args)...); + case cudf::type_id::TIMESTAMP_NANOSECONDS: + return std::make_shared(arrow::timestamp(arrow::TimeUnit::NANO), + std::forward(args)...); + case cudf::type_id::DURATION_SECONDS: + return std::make_shared(arrow::duration(arrow::TimeUnit::SECOND), + std::forward(args)...); + case cudf::type_id::DURATION_MILLISECONDS: + return std::make_shared(arrow::duration(arrow::TimeUnit::MILLI), + std::forward(args)...); + case cudf::type_id::DURATION_MICROSECONDS: + return std::make_shared(arrow::duration(arrow::TimeUnit::MICRO), + std::forward(args)...); + case cudf::type_id::DURATION_NANOSECONDS: + return std::make_shared(arrow::duration(arrow::TimeUnit::NANO), + std::forward(args)...); + default: CUDF_FAIL("Unsupported type_id conversion to arrow"); + } +} template std::enable_if_t() and !std::is_same_v, @@ -48,7 +106,7 @@ get_arrow_array(std::vector const& data, std::vector const& mask = { std::shared_ptr mask_buffer = mask.empty() ? nullptr : arrow::internal::BytesToBits(mask).ValueOrDie(); - return cudf::detail::to_arrow_array(cudf::type_to_id(), data.size(), data_buffer, mask_buffer); + return to_arrow_array(cudf::type_to_id(), data.size(), data_buffer, mask_buffer); } template @@ -154,8 +212,9 @@ std::shared_ptr get_arrow_list_array(std::vector data, "Failed to append values to buffer builder"); CUDF_EXPECTS(buff_builder.Finish(&offset_buffer).ok(), "Failed to allocate buffer"); + auto nullable = std::accumulate(list_validity.begin(), list_validity.end(), 0) > 0; return std::make_shared( - arrow::list(data_array->type()), + arrow::list(arrow::field("", data_array->type(), nullable)), offsets.size() - 1, offset_buffer, data_array, diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 733e5814425..81c406c0faf 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -14,13 +14,6 @@ * limitations under the License. */ -// These interop functions are deprecated. We keep the code in this -// test and will migrate the tests to export the arrow C data -// interface which we consume with from_arrow_host. For now, the tests -// are commented out. - -#if 0 - #include #include @@ -43,6 +36,10 @@ #include +#include +#include +#include + std::unique_ptr get_cudf_table() { std::vector> columns; @@ -93,6 +90,45 @@ struct FromArrowTest : public cudf::test::BaseFixture {}; template struct FromArrowTestDurationsTest : public cudf::test::BaseFixture {}; +std::optional> export_table(std::shared_ptr arrow_table) +{ + ArrowSchema schema; + if (!arrow::ExportSchema(*arrow_table->schema(), &schema).ok()) { return std::nullopt; } + auto batch = arrow_table->CombineChunksToBatch().ValueOrDie(); + ArrowArray arr; + if (!arrow::ExportRecordBatch(*batch, &arr).ok()) { return std::nullopt; } + auto ret = cudf::from_arrow(&schema, &arr); + arr.release(&arr); + schema.release(&schema); + return {std::move(ret)}; +} + +std::optional> export_scalar(arrow::Scalar const& arrow_scalar) +{ + auto maybe_array = arrow::MakeArrayFromScalar(arrow_scalar, 1); + if (!maybe_array.ok()) { return std::nullopt; } + auto array = *maybe_array; + + ArrowSchema schema; + if (!arrow::ExportType(*array->type(), &schema).ok()) { return std::nullopt; } + + ArrowArray arr; + if (!arrow::ExportArray(*array, &arr).ok()) { return std::nullopt; } + + auto col = cudf::from_arrow_column(&schema, &arr); + auto ret = cudf::get_element(col->view(), 0); + + arr.release(&arr); + schema.release(&schema); + return {std::move(ret)}; +} + +std::optional> export_scalar( + std::shared_ptr const arrow_scalar) +{ + return export_scalar(*arrow_scalar); +} + TYPED_TEST_SUITE(FromArrowTestDurationsTest, cudf::test::DurationTypes); TEST_F(FromArrowTest, EmptyTable) @@ -102,9 +138,10 @@ TEST_F(FromArrowTest, EmptyTable) auto expected_cudf_table = tables.first->view(); auto arrow_table = tables.second; - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, DateTimeTable) @@ -127,9 +164,10 @@ TEST_F(FromArrowTest, DateTimeTable) auto arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view()); } TYPED_TEST(FromArrowTestDurationsTest, DurationTable) @@ -160,9 +198,10 @@ TYPED_TEST(FromArrowTestDurationsTest, DurationTable) auto arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, NestedList) @@ -188,8 +227,9 @@ TEST_F(FromArrowTest, NestedList) auto arrow_table = arrow::Table::Make(schema, {nested_list_arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table->view()); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table_view, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, StructColumn) @@ -274,9 +314,10 @@ TEST_F(FromArrowTest, StructColumn) auto schema = std::make_shared(schema_vector); auto input = arrow::Table::Make(schema, {struct_array}); - auto got_cudf_table = cudf::from_arrow(*input); + auto got_cudf_table = export_table(input); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table, got_cudf_table.value()->view()); } TEST_F(FromArrowTest, DictionaryIndicesType) @@ -304,9 +345,10 @@ TEST_F(FromArrowTest, DictionaryIndicesType) cudf::table expected_table(std::move(columns)); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.view(), got_cudf_table.value()->view()); } TEST_F(FromArrowTest, ChunkedArray) @@ -369,9 +411,10 @@ TEST_F(FromArrowTest, ChunkedArray) auto expected_cudf_table = get_cudf_table(); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table->view(), got_cudf_table.value()->view()); } struct FromArrowTestSlice @@ -388,13 +431,14 @@ TEST_P(FromArrowTestSlice, SliceTest) auto sliced_cudf_table = cudf::slice(cudf_table_view, {start, end})[0]; auto expected_cudf_table = cudf::table{sliced_cudf_table}; auto sliced_arrow_table = arrow_table->Slice(start, end - start); - auto got_cudf_table = cudf::from_arrow(*sliced_arrow_table); + auto got_cudf_table = export_table(sliced_arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); // This has been added to take-care of empty string column issue with no children - if (got_cudf_table->num_rows() == 0 and expected_cudf_table.num_rows() == 0) { - CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table->view()); + if (got_cudf_table.value()->num_rows() == 0 and expected_cudf_table.num_rows() == 0) { + CUDF_TEST_EXPECT_TABLES_EQUIVALENT(expected_cudf_table.view(), got_cudf_table.value()->view()); } else { - CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_cudf_table.view(), got_cudf_table.value()->view()); } } @@ -417,9 +461,10 @@ TEST_F(FromArrowTest, FixedPoint128Table) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -441,9 +486,10 @@ TEST_F(FromArrowTest, FixedPoint128TableLarge) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -466,9 +512,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNulls) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -493,9 +540,10 @@ TEST_F(FromArrowTest, FixedPoint128TableNullsLarge) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_cudf_table = cudf::from_arrow(*arrow_table); + auto got_cudf_table = export_table(arrow_table); + ASSERT_TRUE(got_cudf_table.has_value()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table->view()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, got_cudf_table.value()->view()); } } @@ -519,9 +567,12 @@ TYPED_TEST(FromArrowNumericScalarTest, Basic) { TypeParam const value{42}; auto const arrow_scalar = arrow::MakeScalar(value); - auto const cudf_scalar = cudf::from_arrow(*arrow_scalar); + + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); + auto const cudf_numeric_scalar = - dynamic_cast*>(cudf_scalar.get()); + dynamic_cast*>(cudf_scalar.value().get()); if (cudf_numeric_scalar == nullptr) { CUDF_FAIL("Attempted to test with a non-numeric type."); } EXPECT_EQ(cudf_numeric_scalar->type(), cudf::data_type(cudf::type_to_id())); EXPECT_EQ(cudf_numeric_scalar->value(), value); @@ -535,12 +586,13 @@ TEST_F(FromArrowDecimalScalarTest, Basic) auto const value{42}; auto const precision{8}; auto const scale{4}; - auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale)); - auto cudf_scalar = cudf::from_arrow(arrow_scalar); + auto arrow_scalar = arrow::Decimal128Scalar(value, arrow::decimal128(precision, -scale)); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); // Arrow offers a minimum of 128 bits for the Decimal type. auto const cudf_decimal_scalar = - dynamic_cast*>(cudf_scalar.get()); + dynamic_cast*>(cudf_scalar.value().get()); EXPECT_EQ(cudf_decimal_scalar->type(), cudf::data_type(cudf::type_to_id(), scale)); EXPECT_EQ(cudf_decimal_scalar->value(), value); @@ -552,9 +604,10 @@ TEST_F(FromArrowStringScalarTest, Basic) { auto const value = std::string("hello world"); auto const arrow_scalar = arrow::StringScalar(value); - auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); - auto const cudf_string_scalar = dynamic_cast(cudf_scalar.get()); + auto const cudf_string_scalar = dynamic_cast(cudf_scalar.value().get()); EXPECT_EQ(cudf_string_scalar->type(), cudf::data_type(cudf::type_id::STRING)); EXPECT_EQ(cudf_string_scalar->to_string(), value); } @@ -572,9 +625,10 @@ TEST_F(FromArrowListScalarTest, Basic) auto const array = *maybe_array; auto const arrow_scalar = arrow::ListScalar(array); - auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); - auto const cudf_list_scalar = dynamic_cast(cudf_scalar.get()); + auto const cudf_list_scalar = dynamic_cast(cudf_scalar.value().get()); EXPECT_EQ(cudf_list_scalar->type(), cudf::data_type(cudf::type_id::LIST)); cudf::test::fixed_width_column_wrapper const lhs( @@ -592,9 +646,10 @@ TEST_F(FromArrowStructScalarTest, Basic) auto const field = arrow::field("", underlying_arrow_scalar->type); auto const arrow_type = arrow::struct_({field}); auto const arrow_scalar = arrow::StructScalar({underlying_arrow_scalar}, arrow_type); - auto const cudf_scalar = cudf::from_arrow(arrow_scalar); + auto const cudf_scalar = export_scalar(arrow_scalar); + ASSERT_TRUE(cudf_scalar.has_value()); - auto const cudf_struct_scalar = dynamic_cast(cudf_scalar.get()); + auto const cudf_struct_scalar = dynamic_cast(cudf_scalar.value().get()); EXPECT_EQ(cudf_struct_scalar->type(), cudf::data_type(cudf::type_id::STRUCT)); cudf::test::fixed_width_column_wrapper const col({value}); @@ -602,5 +657,3 @@ TEST_F(FromArrowStructScalarTest, Basic) CUDF_TEST_EXPECT_TABLES_EQUAL(lhs, cudf_struct_scalar->view()); } - -#endif diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp index 77da4039103..51216a8512c 100644 --- a/cpp/tests/interop/to_arrow_device_test.cpp +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -710,6 +710,83 @@ TEST_F(ToArrowDeviceTest, StructColumn) template using fp_wrapper = cudf::test::fixed_point_column_wrapper; +TEST_F(ToArrowDeviceTest, FixedPoint32Table) +{ + using namespace numeric; + + for (auto const scale : {6, 4, 2, 0, -1, -3, -5}) { + auto const expect_data = + std::vector{-1000, -1, -1, -1, 2400, 0, 0, 0, -3456, -1, -1, -1, + 4650, 0, 0, 0, 5154, 0, 0, 0, 6800, 0, 0, 0}; + auto col = fp_wrapper({-1000, 2400, -3456, 4650, 5154, 6800}, scale_type{scale}); + std::vector> table_cols; + table_cols.emplace_back(col.release()); + auto input = cudf::table(std::move(table_cols)); + + nanoarrow::UniqueSchema expected_schema; + ArrowSchemaInit(expected_schema.get()); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeStruct(expected_schema.get(), 1)); + ArrowSchemaInit(expected_schema->children[0]); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetTypeDecimal(expected_schema->children[0], + NANOARROW_TYPE_DECIMAL128, + cudf::detail::max_precision(), + -scale)); + NANOARROW_THROW_NOT_OK(ArrowSchemaSetName(expected_schema->children[0], "a")); + expected_schema->children[0]->flags = 0; + + auto got_arrow_schema = + cudf::to_arrow_schema(input.view(), std::vector{{"a"}}); + compare_schemas(expected_schema.get(), got_arrow_schema.get()); + + auto result_dev_data = std::make_unique>( + expect_data.size(), cudf::get_default_stream()); + cudaMemcpy(result_dev_data->data(), + expect_data.data(), + sizeof(int32_t) * expect_data.size(), + cudaMemcpyHostToDevice); + + cudf::get_default_stream().synchronize(); + nanoarrow::UniqueArray expected_array; + NANOARROW_THROW_NOT_OK( + ArrowArrayInitFromSchema(expected_array.get(), expected_schema.get(), nullptr)); + expected_array->length = input.num_rows(); + + expected_array->children[0]->length = input.num_rows(); + NANOARROW_THROW_NOT_OK( + ArrowBufferSetAllocator(ArrowArrayBuffer(expected_array->children[0], 0), noop_alloc)); + ArrowArrayValidityBitmap(expected_array->children[0])->buffer.data = + const_cast(reinterpret_cast(input.view().column(0).null_mask())); + + auto data_ptr = reinterpret_cast(result_dev_data->data()); + NANOARROW_THROW_NOT_OK(ArrowBufferSetAllocator( + ArrowArrayBuffer(expected_array->children[0], 1), + ArrowBufferDeallocator( + [](ArrowBufferAllocator* alloc, uint8_t*, int64_t) { + auto buf = + reinterpret_cast>*>(alloc->private_data); + delete buf; + }, + new std::unique_ptr>(std::move(result_dev_data))))); + ArrowArrayBuffer(expected_array->children[0], 1)->data = data_ptr; + NANOARROW_THROW_NOT_OK( + ArrowArrayFinishBuilding(expected_array.get(), NANOARROW_VALIDATION_LEVEL_NONE, nullptr)); + + auto got_arrow_array = cudf::to_arrow_device(input.view()); + ASSERT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + ASSERT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + ASSERT_CUDA_SUCCEEDED( + cudaEventSynchronize(*reinterpret_cast(got_arrow_array->sync_event))); + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + + got_arrow_array = cudf::to_arrow_device(std::move(input)); + ASSERT_EQ(rmm::get_current_cuda_device().value(), got_arrow_array->device_id); + ASSERT_EQ(ARROW_DEVICE_CUDA, got_arrow_array->device_type); + ASSERT_CUDA_SUCCEEDED( + cudaEventSynchronize(*reinterpret_cast(got_arrow_array->sync_event))); + compare_arrays(expected_schema.get(), expected_array.get(), &got_arrow_array->array); + } +} + TEST_F(ToArrowDeviceTest, FixedPoint64Table) { using namespace numeric; diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 328ba210a3f..90ae12cdd90 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -14,13 +14,6 @@ * limitations under the License. */ -// These interop functions are deprecated. We keep the code in this -// test and will migrate the tests to export via the arrow C data -// interface with to_arrow_host which arrow can consume. For now, the -// test is commented out. - -#if 0 - #include #include @@ -38,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +39,8 @@ #include +#include + using vector_of_columns = std::vector>; std::pair, std::shared_ptr> get_tables( @@ -130,7 +126,7 @@ std::pair, std::shared_ptr> get_table auto keys = cudf::test::to_host(view.keys()).first; auto indices = cudf::test::to_host(view.indices()).first; auto dict_array = get_arrow_dict_array(std::vector(keys.begin(), keys.end()), - std::vector(indices.begin(), indices.end()), + std::vector(indices.begin(), indices.end()), validity); auto boolarray = get_arrow_array(bool_data, bool_validity); auto list_array = get_arrow_list_array( @@ -168,6 +164,21 @@ struct ToArrowTest : public cudf::test::BaseFixture {}; template struct ToArrowTestDurationsTest : public cudf::test::BaseFixture {}; +auto is_equal(cudf::table_view const& table, + cudf::host_span metadata, + std::shared_ptr expected_arrow_table) +{ + auto got_arrow_schema = cudf::to_arrow_schema(table, metadata); + auto got_arrow_table = cudf::to_arrow_host(table); + + for (auto i = 0; i < got_arrow_schema->n_children; ++i) { + auto arr = arrow::ImportArray(got_arrow_table->array.children[i], got_arrow_schema->children[i]) + .ValueOrDie(); + if (!expected_arrow_table->column(i)->Equals(arrow::ChunkedArray(arr))) { return false; } + } + return true; +} + TYPED_TEST_SUITE(ToArrowTestDurationsTest, cudf::test::DurationTypes); TEST_F(ToArrowTest, EmptyTable) @@ -179,10 +190,9 @@ TEST_F(ToArrowTest, EmptyTable) auto struct_meta = cudf::column_metadata{"f"}; struct_meta.children_meta = {{"integral"}, {"string"}}; - auto got_arrow_table = - cudf::to_arrow(cudf_table_view, {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}); - - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = { + {"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}; + ASSERT_TRUE(is_equal(cudf_table_view, metadata, expected_arrow_table)); } TEST_F(ToArrowTest, DateTimeTable) @@ -203,12 +213,10 @@ TEST_F(ToArrowTest, DateTimeTable) std::vector> schema_vector({arrow::field("a", arr->type())}); auto schema = std::make_shared(schema_vector); - auto expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}}); - - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table)); } TYPED_TEST(ToArrowTestDurationsTest, DurationTable) @@ -239,9 +247,8 @@ TYPED_TEST(ToArrowTestDurationsTest, DurationTable) auto expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}}); - - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table)); } TEST_F(ToArrowTest, NestedList) @@ -255,20 +262,20 @@ TEST_F(ToArrowTest, NestedList) auto list_arr = get_arrow_list_array({6, 7, 8, 9}, {0, 1, 4}, {1, 0, 1, 1}); std::vector offset{0, 0, 2}; auto mask_buffer = arrow::internal::BytesToBits({0, 1}).ValueOrDie(); - auto nested_list_arr = std::make_shared(arrow::list(list(arrow::int64())), - offset.size() - 1, - arrow::Buffer::Wrap(offset), - list_arr, - mask_buffer); + auto nested_list_arr = std::make_shared( + arrow::list(arrow::field("a", arrow::list(arrow::int64()), false)), + offset.size() - 1, + arrow::Buffer::Wrap(offset), + list_arr, + mask_buffer); std::vector> schema_vector( {arrow::field("a", nested_list_arr->type())}); auto schema = std::make_shared(schema_vector); - auto expected_arrow_table = arrow::Table::Make(schema, {nested_list_arr}); - auto got_arrow_table = cudf::to_arrow(input_view, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + auto expected_arrow_table = arrow::Table::Make(schema, {nested_list_arr}); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input_view, metadata, expected_arrow_table)); } TEST_F(ToArrowTest, StructColumn) @@ -324,7 +331,10 @@ TEST_F(ToArrowTest, StructColumn) auto list_arr = get_arrow_list_array({1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 2, 4, 5, 6, 7, 9}); std::vector offset{0, 3, 4, 6}; auto nested_list_arr = std::make_shared( - arrow::list(list(arrow::int64())), offset.size() - 1, arrow::Buffer::Wrap(offset), list_arr); + arrow::list(arrow::field("a", arrow::list(arrow::field("a", arrow::int64(), false)), false)), + offset.size() - 1, + arrow::Buffer::Wrap(offset), + list_arr); std::vector> child_arrays2({str2_array, int2_array}); auto fields2 = std::vector>{ @@ -356,9 +366,8 @@ TEST_F(ToArrowTest, StructColumn) auto expected_arrow_table = arrow::Table::Make(schema, {struct_array}); - auto got_arrow_table = cudf::to_arrow(input_view, {metadata}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const meta = {metadata}; + ASSERT_TRUE(is_equal(input_view, meta, expected_arrow_table)); } template @@ -380,9 +389,8 @@ TEST_F(ToArrowTest, FixedPoint64Table) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -402,9 +410,8 @@ TEST_F(ToArrowTest, FixedPoint128Table) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -431,9 +438,8 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -455,9 +461,8 @@ TEST_F(ToArrowTest, FixedPoint128TableLarge) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -479,9 +484,8 @@ TEST_F(ToArrowTest, FixedPoint64TableNullsSimple) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, arrow_table)); } } @@ -503,9 +507,8 @@ TEST_F(ToArrowTest, FixedPoint128TableNullsSimple) auto const schema = std::make_shared(schema_vector); auto const arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, arrow_table)); } } @@ -529,9 +532,8 @@ TEST_F(ToArrowTest, FixedPoint64TableNulls) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -554,9 +556,8 @@ TEST_F(ToArrowTest, FixedPoint128TableNulls) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - auto const got_arrow_table = cudf::to_arrow(input, {{"a"}}); - - ASSERT_TRUE(expected_arrow_table->Equals(*got_arrow_table, true)); + std::vector const metadata = {{"a"}}; + ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } @@ -575,10 +576,10 @@ TEST_P(ToArrowTestSlice, SliceTest) auto expected_arrow_table = arrow_table->Slice(start, end - start); auto struct_meta = cudf::column_metadata{"f"}; struct_meta.children_meta = {{"integral"}, {"string"}}; - auto got_arrow_table = - cudf::to_arrow(sliced_cudf_table, {{"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}); - ASSERT_EQ(expected_arrow_table->Equals(*got_arrow_table, true), true); + std::vector const metadata = { + {"a"}, {"b"}, {"c"}, {"d"}, {"e"}, struct_meta}; + ASSERT_TRUE(is_equal(sliced_cudf_table, metadata, expected_arrow_table)); } INSTANTIATE_TEST_CASE_P(ToArrowTest, @@ -595,13 +596,58 @@ using NumericTypesNotBool = cudf::test::Concat; TYPED_TEST_SUITE(ToArrowNumericScalarTest, NumericTypesNotBool); +auto col_to_arrow_type(cudf::column_view const& col) +{ + switch (col.type().id()) { + case cudf::type_id::BOOL8: return arrow::boolean(); + case cudf::type_id::INT8: return arrow::int8(); + case cudf::type_id::INT16: return arrow::int16(); + case cudf::type_id::INT32: return arrow::int32(); + case cudf::type_id::INT64: return arrow::int64(); + case cudf::type_id::UINT8: return arrow::uint8(); + case cudf::type_id::UINT16: return arrow::uint16(); + case cudf::type_id::UINT32: return arrow::uint32(); + case cudf::type_id::UINT64: return arrow::uint64(); + case cudf::type_id::FLOAT32: return arrow::float32(); + case cudf::type_id::FLOAT64: return arrow::float64(); + case cudf::type_id::TIMESTAMP_DAYS: return arrow::date32(); + case cudf::type_id::STRING: return arrow::utf8(); + case cudf::type_id::LIST: + return arrow::list(col_to_arrow_type(col.child(cudf::lists_column_view::child_column_index))); + case cudf::type_id::DECIMAL128: return arrow::decimal(38, -col.type().scale()); + default: CUDF_FAIL("Unsupported type_id conversion to arrow type", cudf::data_type_error); + } +} + +std::optional> cudf_scalar_to_arrow( + cudf::scalar const& scalar, std::optional metadata = std::nullopt) +{ + auto const cudf_column = cudf::make_column_from_scalar(scalar, 1); + auto const c_arrow_array = cudf::to_arrow_host(*cudf_column); + auto const arrow_array = [&]() { + if (metadata.has_value()) { + auto const table = cudf::table_view({cudf_column->view()}); + std::vector const table_metadata = {metadata.value()}; + auto const arrow_schema = cudf::to_arrow_schema(table, table_metadata); + return arrow::ImportArray(&c_arrow_array->array, arrow_schema->children[0]).ValueOrDie(); + } else { + auto const arrow_type = col_to_arrow_type(cudf_column->view()); + return arrow::ImportArray(&c_arrow_array->array, arrow_type).ValueOrDie(); + } + }(); + auto const maybe_scalar = arrow_array->GetScalar(0); + if (!maybe_scalar.ok()) { return std::nullopt; } + return maybe_scalar.ValueOrDie(); +} + TYPED_TEST(ToArrowNumericScalarTest, Basic) { TypeParam const value{42}; auto const cudf_scalar = cudf::make_fixed_width_scalar(value); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const ref_arrow_scalar = arrow::MakeScalar(value); EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); @@ -621,8 +667,9 @@ TEST_F(ToArrowDecimalScalarTest, Basic) auto const cudf_scalar = cudf::make_fixed_point_scalar(value, numeric::scale_type{scale}); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const maybe_ref_arrow_scalar = arrow::MakeScalar(arrow::decimal128(precision, -scale), value); @@ -636,9 +683,10 @@ struct ToArrowStringScalarTest : public cudf::test::BaseFixture {}; TEST_F(ToArrowStringScalarTest, Basic) { std::string const value{"hello world"}; - auto const cudf_scalar = cudf::make_string_scalar(value); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const cudf_scalar = cudf::make_string_scalar(value); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const ref_arrow_scalar = arrow::MakeScalar(value); EXPECT_TRUE(arrow_scalar->Equals(*ref_arrow_scalar)); @@ -656,8 +704,9 @@ TEST_F(ToArrowListScalarTest, Basic) auto const cudf_scalar = cudf::make_list_scalar(col); - cudf::column_metadata const metadata{""}; - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; arrow::Int64Builder builder; auto const status = builder.AppendValues(host_values, host_validity); @@ -682,7 +731,10 @@ TEST_F(ToArrowStructScalarTest, Basic) cudf::column_metadata metadata{""}; metadata.children_meta.emplace_back(field_name); - auto const arrow_scalar = cudf::to_arrow(*cudf_scalar, metadata); + + auto const maybe_scalar = cudf_scalar_to_arrow(*cudf_scalar, metadata); + ASSERT_TRUE(maybe_scalar.has_value()); + auto const arrow_scalar = *maybe_scalar; auto const underlying_arrow_scalar = arrow::MakeScalar(value); auto const field = arrow::field(field_name, underlying_arrow_scalar->type, false); @@ -693,5 +745,3 @@ TEST_F(ToArrowStructScalarTest, Basic) } CUDF_TEST_PROGRAM_MAIN() - -#endif diff --git a/cpp/tests/io/arrow_io_source_test.cpp b/cpp/tests/io/arrow_io_source_test.cpp deleted file mode 100644 index ffdf2c7e00f..00000000000 --- a/cpp/tests/io/arrow_io_source_test.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2021-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -// Global environment for temporary files -auto const temp_env = static_cast( - ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment)); - -// Base test fixture for tests -struct ArrowIOTest : public cudf::test::BaseFixture {}; - -TEST_F(ArrowIOTest, URIFileSystem) -{ - const std::string file_name = temp_env->get_temp_dir() + "JsonLinesFileTest.json"; - std::ofstream outfile(file_name, std::ofstream::out); - outfile << "{\"a\":11, \"b\":1.1}\n{\"a\":22, \"b\":2.2}"; - outfile.close(); - - std::string file_uri = "file://" + file_name; - auto datasource = std::make_unique(file_uri); - - // Populate the JSON Reader Options - cudf::io::json_reader_options options = - cudf::io::json_reader_options::builder(cudf::io::source_info(datasource.get())).lines(true); - - // Read the JSON file from the LocalFileSystem - cudf::io::table_with_metadata tbl = cudf::io::read_json(options); - - ASSERT_EQ(2, tbl.tbl->num_columns()); - ASSERT_EQ(2, tbl.tbl->num_rows()); -} - -TEST_F(ArrowIOTest, S3FileSystem) -{ - std::string s3_uri = "s3://rapidsai-data/cudf/test/tips.parquet?region=us-east-2"; - - // Check to see if Arrow was built with support for S3. If not, ensure this - // test throws. If so, validate the S3 file contents. - auto const s3_unsupported = arrow::fs::FileSystemFromUri(s3_uri).status().IsNotImplemented(); - if (s3_unsupported) { - EXPECT_THROW(std::make_unique(s3_uri), cudf::logic_error); - } else { - auto datasource = std::make_unique(s3_uri); - - // Populate the Parquet Reader Options - cudf::io::source_info src(datasource.get()); - std::vector single_column; - single_column.insert(single_column.begin(), "total_bill"); - cudf::io::parquet_reader_options_builder builder(src); - cudf::io::parquet_reader_options options = builder.columns(single_column).build(); - - // Read the Parquet file from S3 - cudf::io::table_with_metadata tbl = cudf::io::read_parquet(options); - - ASSERT_EQ(1, tbl.tbl->num_columns()); // Only single column specified in reader_options - ASSERT_EQ(244, tbl.tbl->num_rows()); // known number of rows from the S3 file - } - -#ifdef ARROW_S3 - if (!s3_unsupported) { - // Verify that we are using Arrow with S3, and call finalize - // https://github.com/apache/arrow/issues/36974 - // This needs to be in a separate conditional to ensure we call - // finalize after all arrow_io_source instances have been deleted. - [[maybe_unused]] auto _ = arrow::fs::EnsureS3Finalized(); - } -#endif -} - -CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp index 38c1a57eca9..840cf263ed9 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -176,23 +176,19 @@ TEST_F(NvcompConfigTest, Compression) using cudf::io::nvcomp::compression_type; auto const& comp_disabled = cudf::io::nvcomp::is_compression_disabled; - EXPECT_FALSE(comp_disabled(compression_type::DEFLATE, {2, 5, 0, true, true, 0})); - // version 2.5 required - EXPECT_TRUE(comp_disabled(compression_type::DEFLATE, {2, 4, 0, true, true, 0})); + EXPECT_FALSE(comp_disabled(compression_type::DEFLATE, {true, true})); // all integrations enabled required - EXPECT_TRUE(comp_disabled(compression_type::DEFLATE, {2, 5, 0, false, true, 0})); + EXPECT_TRUE(comp_disabled(compression_type::DEFLATE, {false, true})); - EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {2, 4, 0, true, true, 0})); - EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {2, 4, 0, false, true, 0})); - // 2.4 version required - EXPECT_TRUE(comp_disabled(compression_type::ZSTD, {2, 3, 1, false, true, 0})); + EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {true, true})); + EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {false, true})); // stable integrations enabled required - EXPECT_TRUE(comp_disabled(compression_type::ZSTD, {2, 4, 0, false, false, 0})); + EXPECT_TRUE(comp_disabled(compression_type::ZSTD, {false, false})); - EXPECT_FALSE(comp_disabled(compression_type::SNAPPY, {2, 5, 0, true, true, 0})); - EXPECT_FALSE(comp_disabled(compression_type::SNAPPY, {2, 4, 0, false, true, 0})); + EXPECT_FALSE(comp_disabled(compression_type::SNAPPY, {true, true})); + EXPECT_FALSE(comp_disabled(compression_type::SNAPPY, {false, true})); // stable integrations enabled required - EXPECT_TRUE(comp_disabled(compression_type::SNAPPY, {2, 3, 0, false, false, 0})); + EXPECT_TRUE(comp_disabled(compression_type::SNAPPY, {false, false})); } TEST_F(NvcompConfigTest, Decompression) @@ -200,27 +196,19 @@ TEST_F(NvcompConfigTest, Decompression) using cudf::io::nvcomp::compression_type; auto const& decomp_disabled = cudf::io::nvcomp::is_decompression_disabled; - EXPECT_FALSE(decomp_disabled(compression_type::DEFLATE, {2, 5, 0, true, true, 7})); - // version 2.5 required - EXPECT_TRUE(decomp_disabled(compression_type::DEFLATE, {2, 4, 0, true, true, 7})); + EXPECT_FALSE(decomp_disabled(compression_type::DEFLATE, {true, true})); // all integrations enabled required - EXPECT_TRUE(decomp_disabled(compression_type::DEFLATE, {2, 5, 0, false, true, 7})); - - EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {2, 4, 0, true, true, 7})); - EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {2, 3, 2, false, true, 6})); - EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {2, 3, 0, true, true, 6})); - // 2.3.1 and earlier requires all integrations to be enabled - EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {2, 3, 1, false, true, 7})); - // 2.3 version required - EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {2, 2, 0, true, true, 7})); + EXPECT_TRUE(decomp_disabled(compression_type::DEFLATE, {false, true})); + + EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {true, true})); + EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {false, true})); // stable integrations enabled required - EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {2, 4, 0, false, false, 7})); + EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {false, false})); - EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {2, 4, 0, true, true, 7})); - EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {2, 3, 0, false, true, 7})); - EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {2, 2, 0, false, true, 7})); + EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {true, true})); + EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {false, true})); // stable integrations enabled required - EXPECT_TRUE(decomp_disabled(compression_type::SNAPPY, {2, 2, 0, false, false, 7})); + EXPECT_TRUE(decomp_disabled(compression_type::SNAPPY, {false, false})); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index ff433264446..dc14824d834 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -25,8 +25,8 @@ #include #include -#include #include +#include #include #include #include @@ -1197,30 +1197,6 @@ TEST_F(CsvReaderTest, HeaderOnlyFile) EXPECT_EQ(3, view.num_columns()); } -TEST_F(CsvReaderTest, ArrowFileSource) -{ - auto filepath = temp_env->get_temp_dir() + "ArrowFileSource.csv"; - { - std::ofstream outfile(filepath, std::ofstream::out); - outfile << "A\n9\n8\n7\n6\n5\n4\n3\n2\n"; - } - - std::shared_ptr infile; - ASSERT_TRUE(arrow::io::ReadableFile::Open(filepath).Value(&infile).ok()); - - auto arrow_source = cudf::io::arrow_io_source{infile}; - cudf::io::csv_reader_options in_opts = - cudf::io::csv_reader_options::builder(cudf::io::source_info{&arrow_source}) - .dtypes({dtype()}); - auto result = cudf::io::read_csv(in_opts); - - auto const view = result.tbl->view(); - EXPECT_EQ(1, view.num_columns()); - ASSERT_EQ(type_id::INT8, view.column(0).type().id()); - - expect_column_data_equal(std::vector{9, 8, 7, 6, 5, 4, 3, 2}, view.column(0)); -} - TEST_F(CsvReaderTest, InvalidFloatingPoint) { auto const filepath = temp_env->get_temp_dir() + "InvalidFloatingPoint.csv"; diff --git a/cpp/tests/io/json/json_chunked_reader.cu b/cpp/tests/io/json/json_chunked_reader.cu index b9dee54752c..c9ee6542a4d 100644 --- a/cpp/tests/io/json/json_chunked_reader.cu +++ b/cpp/tests/io/json/json_chunked_reader.cu @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include @@ -63,7 +63,7 @@ TEST_F(JsonReaderTest, ByteRange_SingleSource) json_lines_options, chunk_size, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto table_views = std::vector(tables.size()); std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) { @@ -158,7 +158,7 @@ TEST_F(JsonReaderTest, ByteRange_MultiSource) json_lines_options, chunk_size, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto table_views = std::vector(tables.size()); std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) { diff --git a/cpp/tests/io/json/json_quote_normalization_test.cpp b/cpp/tests/io/json/json_quote_normalization_test.cpp index 55ad0afe499..d23acf3ae00 100644 --- a/cpp/tests/io/json/json_quote_normalization_test.cpp +++ b/cpp/tests/io/json/json_quote_normalization_test.cpp @@ -25,8 +25,9 @@ #include #include #include +#include -#include +#include #include #include @@ -42,12 +43,11 @@ void run_test(std::string const& host_input, std::string const& expected_host_ou std::make_shared(); auto stream_view = cudf::test::get_default_stream(); - auto device_input = cudf::detail::make_device_uvector_async( - host_input, stream_view, rmm::mr::get_current_device_resource()); + auto device_input = rmm::device_buffer( + host_input.c_str(), host_input.size(), stream_view, cudf::get_current_device_resource_ref()); // Preprocessing FST - cudf::io::datasource::owning_buffer> device_data( - std::move(device_input)); + cudf::io::datasource::owning_buffer device_data(std::move(device_input)); cudf::io::json::detail::normalize_single_quotes(device_data, stream_view, rsc.get()); std::string preprocessed_host_output(device_data.size(), 0); diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp index 993ab82f423..a094ac7d772 100644 --- a/cpp/tests/io/json/json_test.cpp +++ b/cpp/tests/io/json/json_test.cpp @@ -26,7 +26,6 @@ #include #include -#include #include #include #include @@ -681,6 +680,53 @@ TEST_F(JsonReaderTest, JsonLinesByteRange) CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), int64_wrapper{{3000, 4000, 5000}}); } +TEST_F(JsonReaderTest, JsonLinesByteRangeWithRealloc) +{ + std::string long_string = "haha"; + std::size_t log_repetitions = 12; + long_string.reserve(long_string.size() * (1UL << log_repetitions)); + for (std::size_t i = 0; i < log_repetitions; i++) { + long_string += long_string; + } + + auto json_string = [&long_string]() { + std::string json_string = R"( + { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 } + { "a": { "y" : 6}, "b" : [4, 5 ], "c": 12 } + { "a": { "y" : 6}, "b" : [6 ], "c": 13 } + { "a": { "y" : 6}, "b" : [7 ], "c": 14 })"; + std::string replace_chars = "c"; + std::size_t pos = json_string.find(replace_chars); + while (pos != std::string::npos) { + // Replace the substring with the specified string + json_string.replace(pos, replace_chars.size(), long_string); + + // Find the next occurrence of the substring + pos = json_string.find(replace_chars, pos + long_string.size()); + } + return json_string; + }(); + + // Initialize parsing options (reading json lines). Set byte range offset and size so as to read + // the second row of input + cudf::io::json_reader_options json_lines_options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{cudf::host_span( + reinterpret_cast(json_string.data()), json_string.size())}) + .lines(true) + .compression(cudf::io::compression_type::NONE) + .recovery_mode(cudf::io::json_recovery_mode_t::FAIL) + .byte_range_offset(16430) + .byte_range_size(30); + + // Read full test data via existing, nested JSON lines reader + cudf::io::table_with_metadata result = cudf::io::read_json(json_lines_options); + + EXPECT_EQ(result.tbl->num_columns(), 3); + EXPECT_EQ(result.tbl->num_rows(), 1); + EXPECT_EQ(result.metadata.schema_info[2].name, long_string); +} + TEST_F(JsonReaderTest, JsonLinesMultipleFilesByteRange_AcrossFiles) { const std::string file1 = temp_env->get_temp_dir() + "JsonLinesMultipleFilesByteRangeTest1.json"; @@ -958,31 +1004,6 @@ TEST_F(JsonReaderTest, NoDataFileValues) EXPECT_EQ(0, view.num_columns()); } -TEST_F(JsonReaderTest, ArrowFileSource) -{ - const std::string fname = temp_env->get_temp_dir() + "ArrowFileSource.csv"; - - std::ofstream outfile(fname, std::ofstream::out); - outfile << "[9]\n[8]\n[7]\n[6]\n[5]\n[4]\n[3]\n[2]\n"; - outfile.close(); - - std::shared_ptr infile; - ASSERT_TRUE(arrow::io::ReadableFile::Open(fname).Value(&infile).ok()); - - auto arrow_source = cudf::io::arrow_io_source{infile}; - cudf::io::json_reader_options in_options = - cudf::io::json_reader_options::builder(cudf::io::source_info{&arrow_source}) - .dtypes({dtype()}) - .lines(true); - - cudf::io::table_with_metadata result = cudf::io::read_json(in_options); - - EXPECT_EQ(result.tbl->num_columns(), 1); - EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT8); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), int8_wrapper{{9, 8, 7, 6, 5, 4, 3, 2}}); -} - TEST_P(JsonReaderParamTest, InvalidFloatingPoint) { auto const test_opt = GetParam(); @@ -2159,6 +2180,86 @@ TEST_F(JsonReaderTest, JSONLinesRecoveringSync) cudf::set_pinned_memory_resource(last_mr); } +// Validation +TEST_F(JsonReaderTest, ValueValidation) +{ + // parsing error as null rows + std::string data = + // 0 -> a: -2 (valid) + R"({"a":-2 }{})" + "\n" + // 1 -> (invalid) + R"({"b":{}should_be_invalid})" + "\n" + // 2 -> b (valid) + R"({"b":{"a":3} })" + "\n" + // 3 -> c: (valid/null based on option) + R"({"a": 1, "c":nan, "d": "null" } )" + "\n" + "\n" + // 4 -> (valid/null based on option) + R"({"a":04, "c": 1.23, "d": "abc"} 123)" + "\n" + // 5 -> (valid) + R"({"a":5}//Comment after record)" + "\n" + // 6 -> ((valid/null based on option) + R"({"a":06} //Comment after whitespace)" + "\n" + // 7 -> (invalid) + R"({"a":5 //Invalid Comment within record})"; + + // leadingZeros allowed + // na_values, + { + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .lines(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .strict_validation(true); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + + EXPECT_EQ(result.tbl->num_columns(), 4); + EXPECT_EQ(result.tbl->num_rows(), 8); + auto b_a_col = int64_wrapper({0, 0, 3, 0, 0, 0, 0, 0}); + auto a_column = int64_wrapper{{-2, 0, 0, 0, 4, 5, 6, 0}, + {true, false, false, false, true, true, true, false}}; + auto b_column = cudf::test::structs_column_wrapper( + {b_a_col}, {false, false, true, false, false, false, false, false}); + auto c_column = float64_wrapper({0.0, 0.0, 0.0, 0.0, 1.23, 0.0, 0.0, 0.0}, + {false, false, false, false, true, false, false, false}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), a_column); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), b_column); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(2), c_column); + } + // leadingZeros not allowed, NaN allowed + { + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .lines(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .strict_validation(true) + .numeric_leading_zeros(false) + .na_values({"nan"}); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + + EXPECT_EQ(result.tbl->num_columns(), 4); + EXPECT_EQ(result.tbl->num_rows(), 8); + EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::INT8); // empty column + auto b_a_col = int64_wrapper({0, 0, 3, 0, 0, 0, 0, 0}); + auto a_column = int64_wrapper{{-2, 0, 0, 1, 4, 5, 6, 0}, + {true, false, false, true, false, true, false, false}}; + auto b_column = cudf::test::structs_column_wrapper( + {b_a_col}, {false, false, true, false, false, false, false, false}); + auto c_column = int8_wrapper({0, 0, 0, 0, 0, 0, 0, 0}, + {false, false, false, false, false, false, false, false}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), a_column); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(1), b_column); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(2), c_column); + } +} + TEST_F(JsonReaderTest, MixedTypes) { using LCWS = cudf::test::lists_column_wrapper; @@ -2351,7 +2452,7 @@ TEST_F(JsonReaderTest, MapTypes) // Testing function for mixed types in JSON (for spark json reader) auto test_fn = [](std::string_view json_string, bool lines, std::vector types) { std::map dtype_schema{ - {"foo1", {data_type{type_id::STRING}}}, // list won't be a string + {"foo1", {data_type{type_id::STRING}}}, // list forced as a string {"foo2", {data_type{type_id::STRING}}}, // struct forced as a string {"1", {data_type{type_id::STRING}}}, {"2", {data_type{type_id::STRING}}}, @@ -2378,17 +2479,17 @@ TEST_F(JsonReaderTest, MapTypes) test_fn(R"([{ "foo1": [1,2,3], "bar": 123 }, { "foo2": { "a": 1 }, "bar": 456 }])", false, - {type_id::LIST, type_id::INT32, type_id::STRING}); + {type_id::STRING, type_id::INT32, type_id::STRING}); // jsonl test_fn(R"( { "foo1": [1,2,3], "bar": 123 } { "foo2": { "a": 1 }, "bar": 456 })", true, - {type_id::LIST, type_id::INT32, type_id::STRING}); + {type_id::STRING, type_id::INT32, type_id::STRING}); // jsonl-array test_fn(R"([123, [1,2,3]] [456, null, { "a": 1 }])", true, - {type_id::INT64, type_id::LIST, type_id::STRING}); + {type_id::INT64, type_id::STRING, type_id::STRING}); // json-array test_fn(R"([[[1,2,3], null, 123], [null, { "a": 1 }, 456 ]])", @@ -2474,6 +2575,30 @@ TEST_F(JsonReaderTest, ViableDelimiter) EXPECT_THROW(json_parser_options.set_delimiter('\t'), std::invalid_argument); } +TEST_F(JsonReaderTest, ViableDelimiterNewlineWS) +{ + // Test input + std::string input = R"({"a": + 100})"; + + cudf::io::json_reader_options json_parser_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{input.c_str(), input.size()}) + .lines(true) + .delimiter('\0'); + + auto result = cudf::io::read_json(json_parser_options); + EXPECT_EQ(result.tbl->num_columns(), 1); + EXPECT_EQ(result.tbl->num_rows(), 1); + + EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64); + + EXPECT_EQ(result.metadata.schema_info[0].name, "a"); + + auto col1_iterator = thrust::constant_iterator(100); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), + int64_wrapper(col1_iterator, col1_iterator + 1)); +} + // Test case for dtype prune: // all paths, only one. // one present, another not present, nothing present @@ -2678,38 +2803,177 @@ TEST_F(JsonReaderTest, JsonNestedDtypeFilter) TEST_F(JsonReaderTest, JSONMixedTypeChildren) { - std::string const json_str = R"( -{ "Root": { "Key": [ { "EE": "A" } ] } } -{ "Root": { "Key": { } } } -{ "Root": { "Key": [{ "YY": 1}] } } -)"; - // Column "EE" is created and destroyed - // Column "YY" should not be created + // struct mixed. + { + std::string const json_str = R"( + { "Root": { "Key": [ { "EE": "A" } ] } } + { "Root": { "Key": { } } } + { "Root": { "Key": [{ "YY": 1}] } } + )"; + // Column "EE" is created and destroyed + // Column "YY" should not be created + + cudf::io::json_reader_options options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{json_str.c_str(), json_str.size()}) + .lines(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .normalize_single_quotes(true) + .normalize_whitespace(false) + .mixed_types_as_string(true) + .keep_quotes(true); + + auto result = cudf::io::read_json(options); + + ASSERT_EQ(result.tbl->num_columns(), 1); + ASSERT_EQ(result.metadata.schema_info.size(), 1); + EXPECT_EQ(result.metadata.schema_info[0].name, "Root"); + ASSERT_EQ(result.metadata.schema_info[0].children.size(), 1); + EXPECT_EQ(result.metadata.schema_info[0].children[0].name, "Key"); + ASSERT_EQ(result.metadata.schema_info[0].children[0].children.size(), 2); + EXPECT_EQ(result.metadata.schema_info[0].children[0].children[0].name, "offsets"); + // types + EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRUCT); + EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::STRING); + cudf::test::strings_column_wrapper expected( + {R"([ { "EE": "A" } ])", "{ }", R"([{ "YY": 1}])"}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result.tbl->get_column(0).child(0)); + } - cudf::io::json_reader_options options = - cudf::io::json_reader_options::builder(cudf::io::source_info{json_str.c_str(), json_str.size()}) - .lines(true) + // list mixed. + { + std::string const json_str = R"( + { "Root": { "Key": [ { "EE": "A" } ] } } + { "Root": { "Key": "abc" } } + { "Root": { "Key": [{ "YY": 1}] } } + )"; + // Column "EE" is created and destroyed + // Column "YY" should not be created + + cudf::io::json_reader_options options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{json_str.c_str(), json_str.size()}) + .lines(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .normalize_single_quotes(true) + .normalize_whitespace(false) + .mixed_types_as_string(true) + .keep_quotes(true); + + auto result = cudf::io::read_json(options); + + ASSERT_EQ(result.tbl->num_columns(), 1); + ASSERT_EQ(result.metadata.schema_info.size(), 1); + EXPECT_EQ(result.metadata.schema_info[0].name, "Root"); + ASSERT_EQ(result.metadata.schema_info[0].children.size(), 1); + EXPECT_EQ(result.metadata.schema_info[0].children[0].name, "Key"); + ASSERT_EQ(result.metadata.schema_info[0].children[0].children.size(), 2); + EXPECT_EQ(result.metadata.schema_info[0].children[0].children[0].name, "offsets"); + // types + EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRUCT); + EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::STRING); + cudf::test::strings_column_wrapper expected( + {R"([ { "EE": "A" } ])", "\"abc\"", R"([{ "YY": 1}])"}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result.tbl->get_column(0).child(0)); + } +} + +TEST_F(JsonReaderTest, MixedTypesWithSchema) +{ + std::string data = "{\"data\": {\"A\": 0, \"B\": 1}}\n{\"data\": [1,0]}\n"; + + std::map data_types; + std::map child_types; + child_types.insert( + std::pair{"element", cudf::io::schema_element{cudf::data_type{cudf::type_id::STRING, 0}, {}}}); + data_types.insert(std::pair{ + "data", cudf::io::schema_element{cudf::data_type{cudf::type_id::LIST, 0}, child_types}}); + + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .dtypes(data_types) .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) .normalize_single_quotes(true) - .normalize_whitespace(false) + .normalize_whitespace(true) .mixed_types_as_string(true) - .keep_quotes(true); + .experimental(true) + .keep_quotes(true) + .lines(true); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + EXPECT_EQ(result.tbl->num_columns(), 1); + EXPECT_EQ(result.tbl->num_rows(), 2); + EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::LIST); + EXPECT_EQ(result.tbl->get_column(0).child(1).type().id(), cudf::type_id::STRING); +} - auto result = cudf::io::read_json(options); +TEST_F(JsonReaderTest, UnicodeFieldname) +{ + // unicode at nested and leaf levels + std::string data = R"({"data": {"a": 0, "b c": 1}} + {"data": {"\u0061": 2, "\u0062\tc": 3}} + {"d\u0061ta": {"a": 4}})"; - ASSERT_EQ(result.tbl->num_columns(), 1); - ASSERT_EQ(result.metadata.schema_info.size(), 1); - EXPECT_EQ(result.metadata.schema_info[0].name, "Root"); - ASSERT_EQ(result.metadata.schema_info[0].children.size(), 1); - EXPECT_EQ(result.metadata.schema_info[0].children[0].name, "Key"); - ASSERT_EQ(result.metadata.schema_info[0].children[0].children.size(), 2); - EXPECT_EQ(result.metadata.schema_info[0].children[0].children[0].name, "offsets"); - // types + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .experimental(true) + .lines(true); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + EXPECT_EQ(result.tbl->num_columns(), 1); + EXPECT_EQ(result.tbl->num_rows(), 3); EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRUCT); - EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::STRING); - cudf::test::strings_column_wrapper expected({R"([ { "EE": "A" } ])", "{ }", R"([{ "YY": 1}])"}); + EXPECT_EQ(result.tbl->get_column(0).num_children(), 2); + EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::INT64); + EXPECT_EQ(result.tbl->get_column(0).child(1).type().id(), cudf::type_id::INT64); + EXPECT_EQ(result.metadata.schema_info.at(0).name, "data"); + EXPECT_EQ(result.metadata.schema_info.at(0).children.at(0).name, "a"); + EXPECT_EQ(result.metadata.schema_info.at(0).children.at(1).name, "b\tc"); + EXPECT_EQ(result.metadata.schema_info.at(0).children.size(), 2); +} + +TEST_F(JsonReaderTest, JsonDtypeSchema) +{ + std::string data = R"( + {"a": 1, "b": {"0": "abc", "1": ["a", "b"]}, "c": true} + {"a": 1, "b": {"0": "abc" }, "c": false} + {"a": 1, "b": {"0": "lolol "}, "c": true} + )"; + + std::map dtype_schema{{"c", {data_type{type_id::STRING}}}, + {"b", {data_type{type_id::STRING}}}, + {"a", {dtype()}}}; + cudf::io::json_reader_options in_options = + cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()}) + .dtypes(dtype_schema) + .prune_columns(true) + .lines(true); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result.tbl->get_column(0).child(0)); + cudf::io::table_with_metadata result = cudf::io::read_json(in_options); + + EXPECT_EQ(result.tbl->num_columns(), 3); + EXPECT_EQ(result.tbl->num_rows(), 3); + + EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::FLOAT64); + EXPECT_EQ(result.tbl->get_column(1).type().id(), cudf::type_id::STRING); + EXPECT_EQ(result.tbl->get_column(2).type().id(), cudf::type_id::STRING); + + EXPECT_EQ(result.metadata.schema_info[0].name, "a"); + EXPECT_EQ(result.metadata.schema_info[1].name, "b"); + EXPECT_EQ(result.metadata.schema_info[2].name, "c"); + + // cudf::column::contents contents = result.tbl->get_column(1).release(); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), float64_wrapper{{1, 1, 1}}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + result.tbl->get_column(1), + cudf::test::strings_column_wrapper({"{\"0\": \"abc\", \"1\": [\"a\", \"b\"]}", + "{\"0\": \"abc\" }", + "{\"0\": \"lolol \"}"}), + cudf::test::debug_output_level::ALL_ERRORS); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(2), + cudf::test::strings_column_wrapper({"true", "false", "true"}), + cudf::test::debug_output_level::ALL_ERRORS); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/io/json/json_tree.cpp b/cpp/tests/io/json/json_tree.cpp index 8bcd5790e99..15682c6ae6b 100644 --- a/cpp/tests/io/json/json_tree.cpp +++ b/cpp/tests/io/json/json_tree.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -590,11 +591,11 @@ TEST_F(JsonTest, TreeRepresentation) // Parse the JSON and get the token stream auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( - d_input, options, stream, rmm::mr::get_current_device_resource()); + d_input, options, stream, cudf::get_current_device_resource_ref()); // Get the JSON's tree representation auto gpu_tree = cuio_json::detail::get_tree_representation( - tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()); + tokens_gpu, token_indices_gpu, false, stream, cudf::get_current_device_resource_ref()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); compare_trees(cpu_tree, gpu_tree); @@ -678,11 +679,11 @@ TEST_F(JsonTest, TreeRepresentation2) // Parse the JSON and get the token stream auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( - d_input, options, stream, rmm::mr::get_current_device_resource()); + d_input, options, stream, cudf::get_current_device_resource_ref()); // Get the JSON's tree representation auto gpu_tree = cuio_json::detail::get_tree_representation( - tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()); + tokens_gpu, token_indices_gpu, false, stream, cudf::get_current_device_resource_ref()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); compare_trees(cpu_tree, gpu_tree); @@ -753,11 +754,11 @@ TEST_F(JsonTest, TreeRepresentation3) // Parse the JSON and get the token stream auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( - d_input, options, stream, rmm::mr::get_current_device_resource()); + d_input, options, stream, cudf::get_current_device_resource_ref()); // Get the JSON's tree representation auto gpu_tree = cuio_json::detail::get_tree_representation( - tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()); + tokens_gpu, token_indices_gpu, false, stream, cudf::get_current_device_resource_ref()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); compare_trees(cpu_tree, gpu_tree); @@ -779,13 +780,13 @@ TEST_F(JsonTest, TreeRepresentationError) // Parse the JSON and get the token stream auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( - d_input, options, stream, rmm::mr::get_current_device_resource()); + d_input, options, stream, cudf::get_current_device_resource_ref()); // Get the JSON's tree representation // This JSON is invalid and will raise an exception. EXPECT_THROW( cuio_json::detail::get_tree_representation( - tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()), + tokens_gpu, token_indices_gpu, false, stream, cudf::get_current_device_resource_ref()), cudf::logic_error); } @@ -862,7 +863,7 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal) // Parse the JSON and get the token stream auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( - d_input, options, stream, rmm::mr::get_current_device_resource()); + d_input, options, stream, cudf::get_current_device_resource_ref()); // host tree generation auto cpu_tree = get_tree_representation_cpu(tokens_gpu, token_indices_gpu, options, stream); bool const is_array_of_arrays = @@ -875,7 +876,7 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal) records_orient_tree_traversal_cpu(input, cpu_tree, is_array_of_arrays, json_lines, stream); // gpu tree generation auto gpu_tree = cuio_json::detail::get_tree_representation( - tokens_gpu, token_indices_gpu, false, stream, rmm::mr::get_current_device_resource()); + tokens_gpu, token_indices_gpu, false, stream, cudf::get_current_device_resource_ref()); #if LIBCUDF_JSON_DEBUG_DUMP printf("BEFORE traversal (gpu_tree):\n"); @@ -888,8 +889,9 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal) gpu_tree, is_array_of_arrays, json_lines, + false, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); #if LIBCUDF_JSON_DEBUG_DUMP printf("AFTER traversal (gpu_tree):\n"); print_tree(gpu_tree); diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu new file mode 100644 index 00000000000..f988ae24b38 --- /dev/null +++ b/cpp/tests/io/json/json_tree_csr.cu @@ -0,0 +1,371 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "io/json/nested_json.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +namespace cuio_json = cudf::io::json; + +struct h_tree_meta_t { + std::vector node_categories; + std::vector parent_node_ids; + std::vector node_range_begin; + std::vector node_range_end; +}; + +struct h_column_tree { + // position of nnzs + std::vector row_idx; + std::vector col_idx; + // node properties + std::vector categories; + std::vector column_ids; +}; + +// debug printing +template +void print(cudf::host_span vec, std::string name) +{ + std::cout << name << " = "; + for (auto e : vec) { + std::cout << e << " "; + } + std::cout << std::endl; +} + +bool check_equality(cuio_json::tree_meta_t& d_a, + cudf::device_span d_a_max_row_offsets, + cuio_json::experimental::compressed_sparse_row& d_b_csr, + cuio_json::experimental::column_tree_properties& d_b_ctp, + rmm::cuda_stream_view stream) +{ + // convert from tree_meta_t to column_tree_csr + stream.synchronize(); + + h_tree_meta_t a{cudf::detail::make_std_vector_async(d_a.node_categories, stream), + cudf::detail::make_std_vector_async(d_a.parent_node_ids, stream), + cudf::detail::make_std_vector_async(d_a.node_range_begin, stream), + cudf::detail::make_std_vector_async(d_a.node_range_end, stream)}; + + h_column_tree b{cudf::detail::make_std_vector_async(d_b_csr.row_idx, stream), + cudf::detail::make_std_vector_async(d_b_csr.col_idx, stream), + cudf::detail::make_std_vector_async(d_b_ctp.categories, stream), + cudf::detail::make_std_vector_async(d_b_ctp.mapped_ids, stream)}; + + auto a_max_row_offsets = cudf::detail::make_std_vector_async(d_a_max_row_offsets, stream); + auto b_max_row_offsets = cudf::detail::make_std_vector_async(d_b_ctp.max_row_offsets, stream); + + stream.synchronize(); + + auto num_nodes = a.parent_node_ids.size(); + if (num_nodes > 1) { + if (b.row_idx.size() != num_nodes + 1) { return false; } + + for (auto pos = b.row_idx[0]; pos < b.row_idx[1]; pos++) { + auto v = b.col_idx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[0]) { return false; } + } + for (size_t u = 1; u < num_nodes; u++) { + auto v = b.col_idx[b.row_idx[u]]; + if (a.parent_node_ids[b.column_ids[u]] != b.column_ids[v]) { return false; } + + for (auto pos = b.row_idx[u] + 1; pos < b.row_idx[u + 1]; pos++) { + v = b.col_idx[pos]; + if (a.parent_node_ids[b.column_ids[v]] != b.column_ids[u]) { return false; } + } + } + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } + } + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } + } + } else if (num_nodes == 1) { + if (b.row_idx.size() != num_nodes + 1) { return false; } + + if (b.row_idx[0] != 0 || b.row_idx[1] != 1) return false; + if (!b.col_idx.empty()) return false; + for (size_t u = 0; u < num_nodes; u++) { + if (a.node_categories[b.column_ids[u]] != b.categories[u]) { return false; } + } + + for (size_t u = 0; u < num_nodes; u++) { + if (a_max_row_offsets[b.column_ids[u]] != b_max_row_offsets[u]) { return false; } + } + } + return true; +} + +void run_test(std::string const& input, bool enable_lines = true) +{ + auto const stream = cudf::get_default_stream(); + cudf::string_scalar d_scalar(input, true, stream); + auto d_input = cudf::device_span{d_scalar.data(), + static_cast(d_scalar.size())}; + + cudf::io::json_reader_options options{}; + options.enable_lines(enable_lines); + options.enable_mixed_types_as_string(true); + + // Parse the JSON and get the token stream + auto const [tokens_gpu, token_indices_gpu] = cudf::io::json::detail::get_token_stream( + d_input, options, stream, cudf::get_current_device_resource_ref()); + + // Get the JSON's tree representation + auto gpu_tree = + cuio_json::detail::get_tree_representation(tokens_gpu, + token_indices_gpu, + options.is_enabled_mixed_types_as_string(), + stream, + cudf::get_current_device_resource_ref()); + + bool const is_array_of_arrays = [&]() { + std::array h_node_categories = {cuio_json::NC_ERR, cuio_json::NC_ERR}; + auto const size_to_copy = std::min(size_t{2}, gpu_tree.node_categories.size()); + CUDF_CUDA_TRY(cudaMemcpyAsync(h_node_categories.data(), + gpu_tree.node_categories.data(), + sizeof(cuio_json::node_t) * size_to_copy, + cudaMemcpyDefault, + stream.value())); + stream.synchronize(); + if (options.is_enabled_lines()) return h_node_categories[0] == cuio_json::NC_LIST; + return h_node_categories[0] == cuio_json::NC_LIST and + h_node_categories[1] == cuio_json::NC_LIST; + }(); + + auto tup = + cuio_json::detail::records_orient_tree_traversal(d_input, + gpu_tree, + is_array_of_arrays, + options.is_enabled_lines(), + false, + stream, + rmm::mr::get_current_device_resource()); + auto& gpu_col_id = std::get<0>(tup); + auto& gpu_row_offsets = std::get<1>(tup); + + auto const num_nodes = gpu_col_id.size(); + rmm::device_uvector sorted_col_ids(gpu_col_id.size(), stream); // make a copy + thrust::copy( + rmm::exec_policy(stream), gpu_col_id.begin(), gpu_col_id.end(), sorted_col_ids.begin()); + + // sort by {col_id} on {node_ids} stable + rmm::device_uvector node_ids(gpu_col_id.size(), stream); + thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end()); + thrust::stable_sort_by_key( + rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin()); + + cudf::size_type const row_array_parent_col_id = [&]() { + cudf::size_type value = cuio_json::parent_node_sentinel; + auto const list_node_index = options.is_enabled_lines() ? 0 : 1; + CUDF_CUDA_TRY(cudaMemcpyAsync(&value, + gpu_col_id.data() + list_node_index, + sizeof(cudf::size_type), + cudaMemcpyDefault, + stream.value())); + stream.synchronize(); + return value; + }(); + + auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] = + cudf::io::json::detail::reduce_to_column_tree(gpu_tree, + gpu_col_id, + sorted_col_ids, + node_ids, + gpu_row_offsets, + is_array_of_arrays, + row_array_parent_col_id, + stream); + + auto [d_column_tree_csr, d_column_tree_properties] = + cudf::io::json::experimental::detail::reduce_to_column_tree(gpu_tree, + gpu_col_id, + sorted_col_ids, + node_ids, + gpu_row_offsets, + is_array_of_arrays, + row_array_parent_col_id, + stream); + + auto iseq = check_equality( + d_column_tree, d_max_row_offsets, d_column_tree_csr, d_column_tree_properties, stream); + // assert equality between csr and meta formats + ASSERT_TRUE(iseq); +} + +struct JsonColumnTreeTests : public cudf::test::BaseFixture {}; + +TEST_F(JsonColumnTreeTests, JSONL_Small) +{ + std::string const input = + R"( {} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"; // Prepare input & output buffers + run_test(input); +} + +TEST_F(JsonColumnTreeTests, JSONL_Large) +{ + std::string const input = + R"( {} + {} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + { "a": { "y" : 6, "z": [] }} + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"; + run_test(input); +} + +TEST_F(JsonColumnTreeTests, JSONL_ListofStruct) +{ + std::string const input = R"( + { "Root": { "Key": [ { "EE": "A" } ] } } + { "Root": { "Key": { } } } + { "Root": { "Key": [{ "YY": 1}] } } + )"; + run_test(input); +} + +TEST_F(JsonColumnTreeTests, JSONL_MissingEntries) +{ + std::string json_stringl = R"( + {"a": 1, "b": {"0": "abc", "1": [-1.]}, "c": true} + {"a": 1, "b": {"0": "abc" }, "c": false} + {"a": 1, "b": {}} + {"a": 1, "c": null} + )"; + run_test(json_stringl); +} + +TEST_F(JsonColumnTreeTests, JSONL_MoreMissingEntries) +{ + std::string json_stringl = R"( + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + )"; + run_test(json_stringl); +} + +TEST_F(JsonColumnTreeTests, JSONL_StillMoreMissingEntries) +{ + std::string json_stringl = R"( + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + { "foo1": ["123","456"], "bar": 123 } + { "foo2": { "b": 5 }, "car": 456 } + { "foo1": [1,2,3], "bar": 123 } + { "foo2": { "a": 1 }, "bar": 456 } + )"; + run_test(json_stringl); +} + +TEST_F(JsonColumnTreeTests, JSON_MissingEntries) +{ + std::string json_string = R"([ + {"a": 1, "b": {"0": "abc", "1": [-1.]}, "c": true}, + {"a": 1, "b": {"0": "abc" }, "c": false}, + {"a": 1, "b": {}}, + {"a": 1, "c": null} + ])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, JSON_StructOfStructs) +{ + std::string json_string = + R"([ + {}, + { "a": { "y" : 6, "z": [] }}, + { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} + ])"; // Prepare input & output buffers + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, JSONL_ArrayOfArrays_NestedList) +{ + std::string json_string = + R"([123, [1,2,3]] + [456, null, { "a": 1 }])"; + run_test(json_string); +} + +TEST_F(JsonColumnTreeTests, JSON_ArrayofArrays_NestedList) +{ + std::string json_string = R"([[[1,2,3], null, 123], + [null, { "a": 1 }, 456 ]])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, JSON_CornerCase_Empty) +{ + std::string json_string = R"([])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, JSONL_CornerCase_List) +{ + std::string json_string = R"([123])"; + run_test(json_string, true); +} + +TEST_F(JsonColumnTreeTests, JSON_CornerCase_EmptyNestedList) +{ + std::string json_string = R"([[[]]])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, JSON_CornerCase_EmptyNestedLists) +{ + std::string json_string = R"([[], [], []])"; + run_test(json_string, false); +} + +TEST_F(JsonColumnTreeTests, JSONL_CornerCase_ListofLists) +{ + std::string json_string = R"([[1, 2, 3], [4, 5, null], []])"; + run_test(json_string, true); +} + +TEST_F(JsonColumnTreeTests, JSONL_CornerCase_EmptyListOfLists) +{ + std::string json_string = R"([[]])"; + run_test(json_string, true); +} diff --git a/cpp/tests/io/json/json_type_cast_test.cu b/cpp/tests/io/json/json_type_cast_test.cu index fe430010f4b..c18d4189626 100644 --- a/cpp/tests/io/json/json_type_cast_test.cu +++ b/cpp/tests/io/json/json_type_cast_test.cu @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -73,7 +74,7 @@ auto default_json_options() TEST_F(JSONTypeCastTest, String) { auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); auto const type = cudf::data_type{cudf::type_id::STRING}; auto in_valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); @@ -110,7 +111,7 @@ TEST_F(JSONTypeCastTest, String) TEST_F(JSONTypeCastTest, Int) { auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); auto const type = cudf::data_type{cudf::type_id::INT64}; cudf::test::strings_column_wrapper data({"1", "null", "3", "true", "5", "false"}); @@ -141,7 +142,7 @@ TEST_F(JSONTypeCastTest, Int) TEST_F(JSONTypeCastTest, StringEscapes) { auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); auto const type = cudf::data_type{cudf::type_id::STRING}; cudf::test::strings_column_wrapper data({ @@ -183,7 +184,7 @@ TEST_F(JSONTypeCastTest, StringEscapes) TEST_F(JSONTypeCastTest, ErrorNulls) { auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); auto const type = cudf::data_type{cudf::type_id::STRING}; // error in decoding diff --git a/cpp/tests/io/json/json_whitespace_normalization_test.cu b/cpp/tests/io/json/json_whitespace_normalization_test.cu index 8ed5fa81b12..6a3bd69de81 100644 --- a/cpp/tests/io/json/json_whitespace_normalization_test.cu +++ b/cpp/tests/io/json/json_whitespace_normalization_test.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -33,130 +34,127 @@ // Base test fixture for tests struct JsonWSNormalizationTest : public cudf::test::BaseFixture {}; -void run_test(std::string const& host_input, std::string const& expected_host_output) -{ - // Prepare cuda stream for data transfers & kernels - auto stream_view = cudf::test::get_default_stream(); - - auto device_input = cudf::detail::make_device_uvector_async( - host_input, stream_view, rmm::mr::get_current_device_resource()); - - // Preprocessing FST - cudf::io::datasource::owning_buffer> device_data( - std::move(device_input)); - cudf::io::json::detail::normalize_whitespace( - device_data, stream_view, rmm::mr::get_current_device_resource()); - - std::string preprocessed_host_output(device_data.size(), 0); - CUDF_CUDA_TRY(cudaMemcpyAsync(preprocessed_host_output.data(), - device_data.data(), - preprocessed_host_output.size(), - cudaMemcpyDeviceToHost, - stream_view.value())); - - stream_view.synchronize(); - ASSERT_EQ(preprocessed_host_output.size(), expected_host_output.size()); - CUDF_TEST_EXPECT_VECTOR_EQUAL( - preprocessed_host_output, expected_host_output, preprocessed_host_output.size()); -} - -TEST_F(JsonWSNormalizationTest, GroundTruth_Spaces) +TEST_F(JsonWSNormalizationTest, ReadJsonOption) { - std::string input = R"({ "A" : "TEST" })"; - std::string output = R"({"A":"TEST"})"; - run_test(input, output); -} + // When mixed type fields are read as strings, the table read will differ depending the + // value of normalize_whitespace -TEST_F(JsonWSNormalizationTest, GroundTruth_MoreSpaces) -{ - std::string input = R"({"a": [1, 2, 3, 4, 5, 6, 7, 8], "b": {"c": "d"}})"; - std::string output = R"({"a":[1,2,3,4,5,6,7,8],"b":{"c":"d"}})"; - run_test(input, output); -} + // Test input + std::string const host_input = "{ \"a\" : {\"b\" :\t\"c\"}}"; + cudf::io::json_reader_options input_options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{host_input.data(), host_input.size()}) + .lines(true) + .mixed_types_as_string(true) + .normalize_whitespace(true); -TEST_F(JsonWSNormalizationTest, GroundTruth_SpacesInString) -{ - std::string input = R"({" a ":50})"; - std::string output = R"({" a ":50})"; - run_test(input, output); -} + cudf::io::table_with_metadata processed_table = cudf::io::read_json(input_options); -TEST_F(JsonWSNormalizationTest, GroundTruth_NewlineInString) -{ - std::string input = "{\"a\" : \"x\ny\"}\n{\"a\" : \"x\\ny\"}"; - std::string output = "{\"a\":\"x\ny\"}\n{\"a\":\"x\\ny\"}"; - run_test(input, output); -} + // Expected table + std::string const expected_input = R"({ "a" : {"b":"c"}})"; + cudf::io::json_reader_options expected_input_options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{expected_input.data(), expected_input.size()}) + .lines(true) + .mixed_types_as_string(true) + .normalize_whitespace(false); -TEST_F(JsonWSNormalizationTest, GroundTruth_Tabs) -{ - std::string input = "{\"a\":\t\"b\"}"; - std::string output = R"({"a":"b"})"; - run_test(input, output); + cudf::io::table_with_metadata expected_table = cudf::io::read_json(expected_input_options); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.tbl->view(), processed_table.tbl->view()); } -TEST_F(JsonWSNormalizationTest, GroundTruth_SpacesAndTabs) +TEST_F(JsonWSNormalizationTest, ReadJsonOption_InvalidRows) { - std::string input = "{\"A\" : \t\"TEST\" }"; - std::string output = R"({"A":"TEST"})"; - run_test(input, output); -} + // When mixed type fields are read as strings, the table read will differ depending the + // value of normalize_whitespace -TEST_F(JsonWSNormalizationTest, GroundTruth_MultilineJSONWithSpacesAndTabs) -{ - std::string input = - "{ \"foo rapids\": [1,2,3], \"bar\trapids\": 123 }\n\t{ \"foo rapids\": { \"a\": 1 }, " - "\"bar\trapids\": 456 }"; - std::string output = - "{\"foo rapids\":[1,2,3],\"bar\trapids\":123}\n{\"foo rapids\":{\"a\":1},\"bar\trapids\":456}"; - run_test(input, output); -} + // Test input + std::string const host_input = R"( + { "Root": { "Key": [ { "EE": tr ue } ] } } + { "Root": { "Key": "abc" } } + { "Root": { "Key": [ { "EE": 12 34 } ] } } + { "Root": { "Key": [{ "YY": 1}] } } + { "Root": { "Key": [ { "EE": 12. 34 } ] } } + { "Root": { "Key": [ { "EE": "efg" } ] } } + )"; + cudf::io::json_reader_options input_options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{host_input.data(), host_input.size()}) + .lines(true) + .mixed_types_as_string(true) + .normalize_whitespace(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL); -TEST_F(JsonWSNormalizationTest, GroundTruth_PureJSONExample) -{ - std::string input = R"([{"a":50}, {"a" : 60}])"; - std::string output = R"([{"a":50},{"a":60}])"; - run_test(input, output); -} + cudf::io::table_with_metadata processed_table = cudf::io::read_json(input_options); -TEST_F(JsonWSNormalizationTest, GroundTruth_NoNormalizationRequired) -{ - std::string input = R"({"a\\n\r\a":50})"; - std::string output = R"({"a\\n\r\a":50})"; - run_test(input, output); -} + // Expected table + std::string const expected_input = R"( + { "Root": { "Key": [ { "EE": tr ue } ] } } + { "Root": { "Key": "abc" } } + { "Root": { "Key": [ { "EE": 12 34 } ] } } + { "Root": { "Key": [{"YY":1}] } } + { "Root": { "Key": [ { "EE": 12. 34 } ] } } + { "Root": { "Key": [{"EE":"efg"}] } } + )"; + cudf::io::json_reader_options expected_input_options = + cudf::io::json_reader_options::builder( + cudf::io::source_info{expected_input.data(), expected_input.size()}) + .lines(true) + .mixed_types_as_string(true) + .normalize_whitespace(false) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL); -TEST_F(JsonWSNormalizationTest, GroundTruth_InvalidInput) -{ - std::string input = "{\"a\" : \"b }\n{ \"c \" :\t\"d\"}"; - std::string output = "{\"a\":\"b }\n{\"c \":\"d\"}"; - run_test(input, output); + cudf::io::table_with_metadata expected_table = cudf::io::read_json(expected_input_options); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.tbl->view(), processed_table.tbl->view()); } -TEST_F(JsonWSNormalizationTest, ReadJsonOption) +TEST_F(JsonWSNormalizationTest, ReadJsonOption_InvalidRows_NoMixedType) { // When mixed type fields are read as strings, the table read will differ depending the // value of normalize_whitespace // Test input - std::string const host_input = "{ \"a\" : {\"b\" :\t\"c\"}}"; + std::string const host_input = R"( + { "Root": { "Key": [ { "EE": tr ue } ] } } + { "Root": { "Key": [ { "EE": 12 34 } ] } } + { "Root": { "Key": [{ "YY": 1}] } } + { "Root": { "Key": [ { "EE": 12. 34 } ] } } + { "Root": { "Key": [ { "EE": "efg" }, { "YY" : "abc" } ] } } + { "Root": { "Key": [ { "YY" : "abc" } ] } } + )"; + + std::map dtype_schema{ + {"Key", {cudf::data_type{cudf::type_id::STRING}}}}; + cudf::io::json_reader_options input_options = cudf::io::json_reader_options::builder( cudf::io::source_info{host_input.data(), host_input.size()}) + .dtypes(dtype_schema) .lines(true) - .mixed_types_as_string(true) - .normalize_whitespace(true); + .prune_columns(true) + .normalize_whitespace(true) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL); cudf::io::table_with_metadata processed_table = cudf::io::read_json(input_options); // Expected table - std::string const expected_input = R"({ "a" : {"b":"c"}})"; + std::string const expected_input = R"( + { "Root": { "Key": [ { "EE": tr ue } , { "YY" : 2 } ] } } + { "Root": { "Key": [ { "EE": 12 34 } ] } } + { "Root": { "Key": [{"YY":1}] } } + { "Root": { "Key": [ { "EE": 12. 34 } ] } } + { "Root": { "Key": [{"EE":"efg"},{"YY":"abc"}] } } + { "Root": { "Key": [{"YY":"abc"}] } } + )"; + cudf::io::json_reader_options expected_input_options = cudf::io::json_reader_options::builder( cudf::io::source_info{expected_input.data(), expected_input.size()}) + .dtypes(dtype_schema) .lines(true) - .mixed_types_as_string(true) - .normalize_whitespace(false); + .prune_columns(true) + .normalize_whitespace(false) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL); cudf::io::table_with_metadata expected_table = cudf::io::read_json(expected_input_options); CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table.tbl->view(), processed_table.tbl->view()); diff --git a/cpp/tests/io/json/nested_json_test.cpp b/cpp/tests/io/json/nested_json_test.cpp index 5dc25133719..f32aba0e632 100644 --- a/cpp/tests/io/json/nested_json_test.cpp +++ b/cpp/tests/io/json/nested_json_test.cpp @@ -29,9 +29,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -447,7 +449,7 @@ TEST_F(JsonNewlineDelimiterTest, TokenStream) // Parse the JSON and get the token stream auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( - d_input, default_options, stream, rmm::mr::get_current_device_resource()); + d_input, default_options, stream, cudf::get_current_device_resource_ref()); // Copy back the number of tokens that were written auto const tokens_gpu = cudf::detail::make_std_vector_async(d_tokens_gpu, stream); auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream); @@ -581,7 +583,7 @@ TEST_F(JsonNewlineDelimiterTest, TokenStream2) // Parse the JSON and get the token stream auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( - d_input, default_options, stream, rmm::mr::get_current_device_resource()); + d_input, default_options, stream, cudf::get_current_device_resource_ref()); // Copy back the number of tokens that were written auto const tokens_gpu = cudf::detail::make_std_vector_async(d_tokens_gpu, stream); auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream); @@ -639,7 +641,7 @@ TEST_F(JsonParserTest, ExtractColumn) // Prepare cuda stream for data transfers & kernels auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); // Default parsing options cudf::io::json_reader_options default_options{}; @@ -648,7 +650,7 @@ TEST_F(JsonParserTest, ExtractColumn) auto const d_input = cudf::detail::make_device_uvector_async( cudf::host_span{input.c_str(), input.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, default_options, stream, mr); @@ -739,7 +741,7 @@ TEST_P(JsonDelimiterParamTest, RecoveringTokenStream) // Parse the JSON and get the token stream auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( - d_input, default_options, stream, rmm::mr::get_current_device_resource()); + d_input, default_options, stream, cudf::get_current_device_resource_ref()); // Copy back the number of tokens that were written auto const tokens_gpu = cudf::detail::make_std_vector_async(d_tokens_gpu, stream); auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream); @@ -856,9 +858,9 @@ TEST_F(JsonTest, PostProcessTokenStream) auto const d_offsets = cudf::detail::make_device_uvector_async( cudf::host_span{offsets.data(), offsets.size()}, stream, - rmm::mr::get_current_device_resource()); - auto const d_tokens = - cudf::detail::make_device_uvector_async(tokens, stream, rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); + auto const d_tokens = cudf::detail::make_device_uvector_async( + tokens, stream, cudf::get_current_device_resource_ref()); // Run system-under-test auto [d_filtered_tokens, d_filtered_indices] = @@ -883,7 +885,7 @@ TEST_P(JsonDelimiterParamTest, UTF_JSON) { // Prepare cuda stream for data transfers & kernels auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); auto json_parser = cuio_json::detail::device_parse_nested_json; char const delimiter = GetParam(); @@ -904,7 +906,7 @@ TEST_P(JsonDelimiterParamTest, UTF_JSON) auto const d_ascii_pass = cudf::detail::make_device_uvector_sync( cudf::host_span{ascii_pass.c_str(), ascii_pass.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_EXPECT_NO_THROW(json_parser(d_ascii_pass, default_options, stream, mr)); @@ -921,7 +923,7 @@ TEST_P(JsonDelimiterParamTest, UTF_JSON) auto const d_utf_failed = cudf::detail::make_device_uvector_sync( cudf::host_span{utf_failed.c_str(), utf_failed.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_EXPECT_NO_THROW(json_parser(d_utf_failed, default_options, stream, mr)); // utf-8 string that passes parsing. @@ -938,7 +940,7 @@ TEST_P(JsonDelimiterParamTest, UTF_JSON) auto const d_utf_pass = cudf::detail::make_device_uvector_sync( cudf::host_span{utf_pass.c_str(), utf_pass.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_EXPECT_NO_THROW(json_parser(d_utf_pass, default_options, stream, mr)); } @@ -949,7 +951,7 @@ TEST_F(JsonParserTest, ExtractColumnWithQuotes) // Prepare cuda stream for data transfers & kernels auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); // Default parsing options cudf::io::json_reader_options options{}; @@ -959,7 +961,7 @@ TEST_F(JsonParserTest, ExtractColumnWithQuotes) auto const d_input = cudf::detail::make_device_uvector_async( cudf::host_span{input.c_str(), input.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, options, stream, mr); @@ -982,7 +984,7 @@ TEST_F(JsonParserTest, ExpectFailMixStructAndList) // Prepare cuda stream for data transfers & kernels auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); // Default parsing options cudf::io::json_reader_options options{}; @@ -1002,7 +1004,7 @@ TEST_F(JsonParserTest, ExpectFailMixStructAndList) auto const d_input = cudf::detail::make_device_uvector_async( cudf::host_span{input.c_str(), input.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); EXPECT_THROW(auto const cudf_table = json_parser(d_input, options, stream, mr), cudf::logic_error); } @@ -1011,7 +1013,7 @@ TEST_F(JsonParserTest, ExpectFailMixStructAndList) auto const d_input = cudf::detail::make_device_uvector_async( cudf::host_span{input.c_str(), input.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_EXPECT_NO_THROW(auto const cudf_table = json_parser(d_input, options, stream, mr)); } } @@ -1023,7 +1025,7 @@ TEST_F(JsonParserTest, EmptyString) // Prepare cuda stream for data transfers & kernels auto const stream = cudf::get_default_stream(); - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); // Default parsing options cudf::io::json_reader_options default_options{}; @@ -1032,7 +1034,7 @@ TEST_F(JsonParserTest, EmptyString) auto const d_input = cudf::detail::make_device_uvector_sync(cudf::host_span{input.c_str(), input.size()}, stream, - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Get the JSON's tree representation auto const cudf_table = json_parser(d_input, default_options, stream, mr); @@ -1177,7 +1179,184 @@ TEST_P(JsonDelimiterParamTest, RecoveringTokenStreamNewlineAndDelimiter) // Parse the JSON and get the token stream auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( - d_input, default_options, stream, rmm::mr::get_current_device_resource()); + d_input, default_options, stream, cudf::get_current_device_resource_ref()); + // Copy back the number of tokens that were written + auto const tokens_gpu = cudf::detail::make_std_vector_async(d_tokens_gpu, stream); + auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream); + + stream.synchronize(); + // Verify the number of tokens matches + ASSERT_EQ(golden_token_stream.size(), tokens_gpu.size()); + ASSERT_EQ(golden_token_stream.size(), token_indices_gpu.size()); + + for (std::size_t i = 0; i < tokens_gpu.size(); i++) { + // Ensure the index the tokens are pointing to do match + EXPECT_EQ(golden_token_stream[i].first, token_indices_gpu[i]) << "Mismatch at #" << i; + // Ensure the token category is correct + EXPECT_EQ(golden_token_stream[i].second, tokens_gpu[i]) << "Mismatch at #" << i; + } +} + +TEST_P(JsonDelimiterParamTest, RecoveringTokenStreamNewlineAsWSAndDelimiter) +{ + // Test input. Inline comments used to indicate character indexes + // 012345678 <= line 0 + char const delimiter = GetParam(); + + /* Input: (Note that \n is considered whitespace according to the JSON spec when it is not used as + * a delimiter for JSONL) + * {"a":2} + * {"a":{"a":{"a":[321{"a":[1]} + * + * {"b":123} + * {"b":123} + * {"b"\n:\n\n\n123\n} + */ + std::string input = R"({"a":2})" + "\n"; + // starting position 8 (zero indexed) + input += R"({"a":)" + std::string(1, delimiter); + // starting position 14 (zero indexed) + input += R"({"a":{"a":[321)" + std::string(1, delimiter); + // starting position 29 (zero indexed) + input += R"({"a":[1]})" + std::string("\n\n") + std::string(1, delimiter); + // starting position 41 (zero indexed) + input += R"({"b":123})" + "\n"; + // starting position 51 (zero indexed) + input += R"({"b":123})" + std::string(1, delimiter); + // starting position 61 (zero indexed) + input += R"({"b")" + std::string("\n:\n\n\n123\n}"); + + // Golden token stream sample + using token_t = cuio_json::token_t; + std::vector> golden_token_stream; + if (delimiter != '\n') { + golden_token_stream = {// Line 0 (valid) + {0, token_t::StructBegin}, + {1, token_t::StructMemberBegin}, + {1, token_t::FieldNameBegin}, + {3, token_t::FieldNameEnd}, + {5, token_t::ValueBegin}, + {6, token_t::ValueEnd}, + {6, token_t::StructMemberEnd}, + {6, token_t::StructEnd}, + // Line 1 (invalid) + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, + // Line 2 (valid) + {29, token_t::StructBegin}, + {30, token_t::StructMemberBegin}, + {30, token_t::FieldNameBegin}, + {32, token_t::FieldNameEnd}, + {34, token_t::ListBegin}, + {35, token_t::ValueBegin}, + {36, token_t::ValueEnd}, + {36, token_t::ListEnd}, + {37, token_t::StructMemberEnd}, + {37, token_t::StructEnd}, + // Line 3 (valid) + {41, token_t::StructBegin}, + {42, token_t::StructMemberBegin}, + {42, token_t::FieldNameBegin}, + {44, token_t::FieldNameEnd}, + {46, token_t::ValueBegin}, + {49, token_t::ValueEnd}, + {49, token_t::StructMemberEnd}, + {49, token_t::StructEnd}, + // Line 4 (valid) + {61, token_t::StructBegin}, + {62, token_t::StructMemberBegin}, + {62, token_t::FieldNameBegin}, + {64, token_t::FieldNameEnd}, + {70, token_t::ValueBegin}, + {73, token_t::ValueEnd}, + {74, token_t::StructMemberEnd}, + {74, token_t::StructEnd}}; + } else { + /* Input: + * {"a":2} + * {"a": + * {"a":{"a":[321 + * {"a":[1]} + * + * + * {"b":123} + * {"b":123} + * {"b"\n:\n\n\n123\n} + */ + golden_token_stream = {// Line 0 (valid) + {0, token_t::StructBegin}, + {1, token_t::StructMemberBegin}, + {1, token_t::FieldNameBegin}, + {3, token_t::FieldNameEnd}, + {5, token_t::ValueBegin}, + {6, token_t::ValueEnd}, + {6, token_t::StructMemberEnd}, + {6, token_t::StructEnd}, + // Line 1 (invalid) + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, + // Line 2 (invalid) + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, + // Line 3 (valid) + {29, token_t::StructBegin}, + {30, token_t::StructMemberBegin}, + {30, token_t::FieldNameBegin}, + {32, token_t::FieldNameEnd}, + {34, token_t::ListBegin}, + {35, token_t::ValueBegin}, + {36, token_t::ValueEnd}, + {36, token_t::ListEnd}, + {37, token_t::StructMemberEnd}, + {37, token_t::StructEnd}, + // Line 4 (valid) + {41, token_t::StructBegin}, + {42, token_t::StructMemberBegin}, + {42, token_t::FieldNameBegin}, + {44, token_t::FieldNameEnd}, + {46, token_t::ValueBegin}, + {49, token_t::ValueEnd}, + {49, token_t::StructMemberEnd}, + {49, token_t::StructEnd}, + // Line 5 (valid) + {51, token_t::StructBegin}, + {52, token_t::StructMemberBegin}, + {52, token_t::FieldNameBegin}, + {54, token_t::FieldNameEnd}, + {56, token_t::ValueBegin}, + {59, token_t::ValueEnd}, + {59, token_t::StructMemberEnd}, + {59, token_t::StructEnd}, + // Line 6 (invalid) + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, + {0, token_t::StructBegin}, + {0, token_t::StructEnd}, + {0, token_t::StructBegin}, + {0, token_t::StructEnd}}; + } + + auto const stream = cudf::get_default_stream(); + + // Prepare input & output buffers + cudf::string_scalar const d_scalar(input, true, stream); + auto const d_input = cudf::device_span{ + d_scalar.data(), static_cast(d_scalar.size())}; + + // Default parsing options + cudf::io::json_reader_options const in_opts = + cudf::io::json_reader_options::builder(cudf::io::source_info{}) + .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL) + .delimiter(delimiter) + .lines(true); + + // Parse the JSON and get the token stream + auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream( + d_input, in_opts, stream, cudf::get_current_device_resource_ref()); // Copy back the number of tokens that were written auto const tokens_gpu = cudf::detail::make_std_vector_async(d_tokens_gpu, stream); auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream); diff --git a/cpp/tests/io/orc_chunked_reader_test.cu b/cpp/tests/io/orc_chunked_reader_test.cu index 2b78a5e7251..8ad1fea649d 100644 --- a/cpp/tests/io/orc_chunked_reader_test.cu +++ b/cpp/tests/io/orc_chunked_reader_test.cu @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -79,7 +80,7 @@ auto write_file(std::vector>& input_columns, null_count, std::move(col), cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Shift nulls of the next column by one position, to avoid having all nulls // in the same table rows. @@ -121,7 +122,7 @@ auto chunked_read(std::string const& filepath, // TODO: remove this scope, when we get rid of mem stat in the reader. // This is to avoid use-after-free of memory resource created by the mem stat object. - auto mr = rmm::mr::get_current_device_resource(); + auto mr = cudf::get_current_device_resource_ref(); do { auto chunk = reader.read_chunk(); diff --git a/cpp/tests/io/parquet_chunked_reader_test.cu b/cpp/tests/io/parquet_chunked_reader_test.cu index 66b36aeed63..153a8a0c5aa 100644 --- a/cpp/tests/io/parquet_chunked_reader_test.cu +++ b/cpp/tests/io/parquet_chunked_reader_test.cu @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -80,7 +81,7 @@ auto write_file(std::vector>& input_columns, null_count, std::move(col), cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); // Shift nulls of the next column by one position, to avoid having all nulls // in the same table rows. diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp index c1211869bcc..3dd5ad145ea 100644 --- a/cpp/tests/io/parquet_common.cpp +++ b/cpp/tests/io/parquet_common.cpp @@ -744,7 +744,7 @@ int32_t compare(T& v1, T& v2) int32_t compare_binary(std::vector const& v1, std::vector const& v2, cudf::io::parquet::detail::Type ptype, - thrust::optional const& ctype) + cuda::std::optional const& ctype) { auto ctype_val = ctype.value_or(cudf::io::parquet::detail::UNKNOWN); switch (ptype) { diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp index 59ee85444f2..bc6145d77da 100644 --- a/cpp/tests/io/parquet_common.hpp +++ b/cpp/tests/io/parquet_common.hpp @@ -172,7 +172,7 @@ std::pair create_parquet_typed_with_stats(std::string int32_t compare_binary(std::vector const& v1, std::vector const& v2, cudf::io::parquet::detail::Type ptype, - thrust::optional const& ctype); + cuda::std::optional const& ctype); void expect_compression_stats_empty(std::shared_ptr stats); diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp index e07ebe25322..c8100038942 100644 --- a/cpp/tests/io/parquet_writer_test.cpp +++ b/cpp/tests/io/parquet_writer_test.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -192,7 +193,7 @@ TEST_F(ParquetWriterTest, BufferSource) cudf::host_span{reinterpret_cast(out_buffer.data()), out_buffer.size()}, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto const d_buffer = cudf::device_span( reinterpret_cast(d_input.data()), d_input.size()); cudf::io::parquet_reader_options in_opts = diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu index 37156292f44..b20f2024cb9 100644 --- a/cpp/tests/io/type_inference_test.cu +++ b/cpp/tests/io/type_inference_test.cu @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -55,9 +56,9 @@ TEST_F(TypeInference, Basic) auto const string_offset = std::vector{1, 4, 7}; auto const string_length = std::vector{2, 2, 1}; auto const d_string_offset = cudf::detail::make_device_uvector_async( - string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_offset, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const d_string_length = cudf::detail::make_device_uvector_async( - string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_length, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -88,9 +89,9 @@ TEST_F(TypeInference, Null) auto const string_offset = std::vector{1, 1, 4}; auto const string_length = std::vector{0, 2, 1}; auto const d_string_offset = cudf::detail::make_device_uvector_async( - string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_offset, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const d_string_length = cudf::detail::make_device_uvector_async( - string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_length, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -121,9 +122,9 @@ TEST_F(TypeInference, AllNull) auto const string_offset = std::vector{1, 1, 1}; auto const string_length = std::vector{0, 0, 4}; auto const d_string_offset = cudf::detail::make_device_uvector_async( - string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_offset, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const d_string_length = cudf::detail::make_device_uvector_async( - string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_length, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -154,9 +155,9 @@ TEST_F(TypeInference, String) auto const string_offset = std::vector{1, 8, 12}; auto const string_length = std::vector{6, 3, 4}; auto const d_string_offset = cudf::detail::make_device_uvector_async( - string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_offset, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const d_string_length = cudf::detail::make_device_uvector_async( - string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_length, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -187,9 +188,9 @@ TEST_F(TypeInference, Bool) auto const string_offset = std::vector{1, 6, 12}; auto const string_length = std::vector{4, 5, 5}; auto const d_string_offset = cudf::detail::make_device_uvector_async( - string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_offset, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const d_string_length = cudf::detail::make_device_uvector_async( - string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_length, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -220,9 +221,9 @@ TEST_F(TypeInference, Timestamp) auto const string_offset = std::vector{1, 10}; auto const string_length = std::vector{8, 9}; auto const d_string_offset = cudf::detail::make_device_uvector_async( - string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_offset, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const d_string_length = cudf::detail::make_device_uvector_async( - string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_length, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); @@ -254,9 +255,9 @@ TEST_F(TypeInference, InvalidInput) auto const string_offset = std::vector{1, 3, 5, 7, 9}; auto const string_length = std::vector{1, 1, 1, 1, 1}; auto const d_string_offset = cudf::detail::make_device_uvector_async( - string_offset, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_offset, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const d_string_length = cudf::detail::make_device_uvector_async( - string_length, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + string_length, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_col_strings = thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin())); diff --git a/cpp/tests/iterator/indexalator_test.cu b/cpp/tests/iterator/indexalator_test.cu index 0c10853ec02..dac2356dcb0 100644 --- a/cpp/tests/iterator/indexalator_test.cu +++ b/cpp/tests/iterator/indexalator_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,10 +20,10 @@ #include +#include #include #include #include -#include #include #include #include @@ -84,15 +84,16 @@ TYPED_TEST(IndexalatorTest, optional_iterator) auto d_col = cudf::test::fixed_width_column_wrapper( host_values.begin(), host_values.end(), validity.begin()); - auto expected_values = thrust::host_vector>(host_values.size()); + auto expected_values = + thrust::host_vector>(host_values.size()); std::transform(host_values.begin(), host_values.end(), validity.begin(), expected_values.begin(), [](T v, bool b) { - return (b) ? thrust::make_optional(static_cast(v)) - : thrust::nullopt; + return (b) ? cuda::std::make_optional(static_cast(v)) + : cuda::std::nullopt; }); auto it_dev = cudf::detail::indexalator_factory::make_input_optional_iterator(d_col); diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh index c6da6b75930..5c9f6114eb5 100644 --- a/cpp/tests/iterator/iterator_tests.cuh +++ b/cpp/tests/iterator/iterator_tests.cuh @@ -22,6 +22,7 @@ #include // for meanvar #include #include +#include #include #include @@ -87,7 +88,7 @@ struct IteratorTest : public cudf::test::BaseFixture { InputIterator d_in_last = d_in + num_items; EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items); auto dev_expected = cudf::detail::make_device_uvector_sync( - expected, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + expected, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); // using a temporary vector and calling transform and all_of separately is // equivalent to thrust::equal but compiles ~3x faster diff --git a/cpp/tests/iterator/offsetalator_test.cu b/cpp/tests/iterator/offsetalator_test.cu index e569e58f42a..b206ff947bb 100644 --- a/cpp/tests/iterator/offsetalator_test.cu +++ b/cpp/tests/iterator/offsetalator_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/cpp/tests/iterator/optional_iterator_test.cuh b/cpp/tests/iterator/optional_iterator_test.cuh index 6a264cee9a8..04f5410a44f 100644 --- a/cpp/tests/iterator/optional_iterator_test.cuh +++ b/cpp/tests/iterator/optional_iterator_test.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #include +#include #include -#include template void nonull_optional_iterator(IteratorTest& testFixture) @@ -32,9 +32,9 @@ void nonull_optional_iterator(IteratorTest& testFixture) auto d_col = cudf::column_device_view::create(w_col); // calculate the expected value by CPU. - thrust::host_vector> replaced_array(host_values.size()); + thrust::host_vector> replaced_array(host_values.size()); std::transform(host_values.begin(), host_values.end(), replaced_array.begin(), [](auto s) { - return thrust::optional{s}; + return cuda::std::optional{s}; }); // GPU test @@ -61,19 +61,20 @@ void null_optional_iterator(IteratorTest& testFixture) auto d_col = cudf::column_device_view::create(w_col); // calculate the expected value by CPU. - thrust::host_vector> optional_values(host_values.size()); - std::transform(host_values.begin(), - host_values.end(), - host_bools.begin(), - optional_values.begin(), - [](auto s, bool b) { return b ? thrust::optional{s} : thrust::optional{}; }); + thrust::host_vector> optional_values(host_values.size()); + std::transform( + host_values.begin(), + host_values.end(), + host_bools.begin(), + optional_values.begin(), + [](auto s, bool b) { return b ? cuda::std::optional{s} : cuda::std::optional{}; }); - thrust::host_vector> value_all_valid(host_values.size()); + thrust::host_vector> value_all_valid(host_values.size()); std::transform(host_values.begin(), host_values.end(), host_bools.begin(), value_all_valid.begin(), - [](auto s, bool b) { return thrust::optional{s}; }); + [](auto s, bool b) { return cuda::std::optional{s}; }); // GPU test for correct null mapping testFixture.iterator_test_thrust( diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu index 98befb0a3ee..257c0979017 100644 --- a/cpp/tests/iterator/optional_iterator_test_numeric.cu +++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu @@ -18,9 +18,9 @@ #include +#include #include #include -#include #include #include @@ -49,21 +49,21 @@ TYPED_TEST(NumericOptionalIteratorTest, null_optional_iterator) { null_optional_ // Transformers and Operators for optional_iterator test template struct transformer_optional_meanvar { - using ResultType = thrust::optional>; + using ResultType = cuda::std::optional>; - CUDF_HOST_DEVICE inline ResultType operator()(thrust::optional const& optional) + CUDF_HOST_DEVICE inline ResultType operator()(cuda::std::optional const& optional) { if (optional.has_value()) { auto v = *optional; return cudf::meanvar{v, static_cast(v * v), 1}; } - return thrust::nullopt; + return cuda::std::nullopt; } }; template struct optional_to_meanvar { - CUDF_HOST_DEVICE inline T operator()(thrust::optional const& v) { return v.value_or(T{0}); } + CUDF_HOST_DEVICE inline T operator()(cuda::std::optional const& v) { return v.value_or(T{0}); } }; // TODO: enable this test also at __CUDACC_DEBUG__ diff --git a/cpp/tests/iterator/value_iterator_test.cuh b/cpp/tests/iterator/value_iterator_test.cuh index 8252ce88f39..a479a263b09 100644 --- a/cpp/tests/iterator/value_iterator_test.cuh +++ b/cpp/tests/iterator/value_iterator_test.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include +#include #include @@ -26,7 +27,7 @@ void non_null_iterator(IteratorTest& testFixture) { auto host_array = cudf::test::make_type_param_vector({0, 6, 0, -14, 13, 64, -13, -20, 45}); auto dev_array = cudf::detail::make_device_uvector_sync( - host_array, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + host_array, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); // calculate the expected value by CPU. thrust::host_vector replaced_array(host_array); diff --git a/cpp/tests/iterator/value_iterator_test_numeric.cu b/cpp/tests/iterator/value_iterator_test_numeric.cu index d3d1c12bdc7..39e05ff6832 100644 --- a/cpp/tests/iterator/value_iterator_test_numeric.cu +++ b/cpp/tests/iterator/value_iterator_test_numeric.cu @@ -23,17 +23,5 @@ template struct NumericValueIteratorTest : public IteratorTest {}; TYPED_TEST_SUITE(NumericValueIteratorTest, TestingTypes); -TYPED_TEST(NumericValueIteratorTest, non_null_iterator) -{ - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - non_null_iterator(*this); -} -TYPED_TEST(NumericValueIteratorTest, null_iterator) -{ - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - null_iterator(*this); -} +TYPED_TEST(NumericValueIteratorTest, non_null_iterator) { non_null_iterator(*this); } +TYPED_TEST(NumericValueIteratorTest, null_iterator) { null_iterator(*this); } diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu index 10bb3f21ee1..a965c65aef0 100644 --- a/cpp/tests/iterator/value_iterator_test_strings.cu +++ b/cpp/tests/iterator/value_iterator_test_strings.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ #include "iterator_tests.cuh" #include +#include #include #include @@ -31,7 +32,7 @@ auto strings_to_string_views(std::vector& input_strings) std::tie(chars, offsets) = cudf::test::detail::make_chars_and_offsets( input_strings.begin(), input_strings.end(), all_valid); auto dev_chars = cudf::detail::make_device_uvector_sync( - chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + chars, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); // calculate the expected value by CPU. (but contains device pointers) thrust::host_vector replaced_array(input_strings.size()); @@ -52,7 +53,7 @@ TEST_F(StringIteratorTest, string_view_null_iterator) std::string zero("zero"); // the char data has to be in GPU auto initmsg = cudf::detail::make_device_uvector_sync( - zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + zero, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); T init = T{initmsg.data(), int(initmsg.size())}; // data and valid arrays @@ -88,7 +89,7 @@ TEST_F(StringIteratorTest, string_view_no_null_iterator) std::string zero("zero"); // the char data has to be in GPU auto initmsg = cudf::detail::make_device_uvector_sync( - zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + zero, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); T init = T{initmsg.data(), int(initmsg.size())}; // data array @@ -113,7 +114,7 @@ TEST_F(StringIteratorTest, string_scalar_iterator) std::string zero("zero"); // the char data has to be in GPU auto initmsg = cudf::detail::make_device_uvector_sync( - zero, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + zero, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); T init = T{initmsg.data(), int(initmsg.size())}; // data array diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index 05ae4ea1d04..93754091b3f 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,7 @@ std::unique_ptr> get_left_indices(cudf::siz auto sequence = std::vector(size); std::iota(sequence.begin(), sequence.end(), 0); auto indices = cudf::detail::make_device_uvector_sync( - sequence, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + sequence, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); return std::make_unique>(std::move(indices)); } diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp index 4e88414d553..3431e941359 100644 --- a/cpp/tests/join/join_tests.cpp +++ b/cpp/tests/join/join_tests.cpp @@ -37,8 +37,9 @@ #include #include #include +#include -#include +#include #include @@ -61,6 +62,7 @@ template >, cudf::table_view const& left_keys, cudf::table_view const& right_keys, cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr), cudf::out_of_bounds_policy oob_policy = cudf::out_of_bounds_policy::DONT_CHECK> std::unique_ptr join_and_gather( @@ -69,12 +71,13 @@ std::unique_ptr join_and_gather( std::vector const& left_on, std::vector const& right_on, cudf::null_equality compare_nulls, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto left_selected = left_input.select(left_on); auto right_selected = right_input.select(right_on); auto const [left_join_indices, right_join_indices] = - join_impl(left_selected, right_selected, compare_nulls, mr); + join_impl(left_selected, right_selected, compare_nulls, stream, mr); auto left_indices_span = cudf::device_span{*left_join_indices}; auto right_indices_span = cudf::device_span{*right_join_indices}; @@ -2028,7 +2031,11 @@ struct JoinTestLists : public cudf::test::BaseFixture { auto const probe_tv = cudf::table_view{{probe}}; auto const [left_result_map, right_result_map] = - join_func(build_tv, probe_tv, nulls_equal, rmm::mr::get_current_device_resource()); + join_func(build_tv, + probe_tv, + nulls_equal, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); auto const left_result_table = sort_and_gather(build_tv, column_view_from_device_uvector(*left_result_map), oob_policy); diff --git a/cpp/tests/join/semi_anti_join_tests.cpp b/cpp/tests/join/semi_anti_join_tests.cpp index de3d8bdaa23..554d5754e39 100644 --- a/cpp/tests/join/semi_anti_join_tests.cpp +++ b/cpp/tests/join/semi_anti_join_tests.cpp @@ -28,8 +28,10 @@ #include #include #include +#include +#include -#include +#include #include @@ -52,6 +54,7 @@ template > (*join_impl)( cudf::table_view const& left_keys, cudf::table_view const& right_keys, cudf::null_equality compare_nulls, + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr)> std::unique_ptr join_and_gather( cudf::table_view const& left_input, @@ -59,11 +62,12 @@ std::unique_ptr join_and_gather( std::vector const& left_on, std::vector const& right_on, cudf::null_equality compare_nulls, - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()) + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()) { auto left_selected = left_input.select(left_on); auto right_selected = right_input.select(right_on); - auto const join_indices = join_impl(left_selected, right_selected, compare_nulls, mr); + auto const join_indices = join_impl(left_selected, right_selected, compare_nulls, stream, mr); auto left_indices_span = cudf::device_span{*join_indices}; auto left_indices_col = cudf::column_view{left_indices_span}; diff --git a/cpp/tests/large_strings/json_tests.cu b/cpp/tests/large_strings/json_tests.cu index 49abf7b484d..80bde168b75 100644 --- a/cpp/tests/large_strings/json_tests.cu +++ b/cpp/tests/large_strings/json_tests.cu @@ -22,19 +22,24 @@ #include #include #include +#include #include struct JsonLargeReaderTest : public cudf::test::StringsLargeTest {}; TEST_F(JsonLargeReaderTest, MultiBatch) { - std::string json_string = R"( + std::string json_string = R"( { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 } { "a": { "y" : 6}, "b" : [4, 5 ], "c": 12 } { "a": { "y" : 6}, "b" : [6 ], "c": 13 } { "a": { "y" : 6}, "b" : [7 ], "c": 14 })"; - constexpr size_t batch_size_ub = std::numeric_limits::max(); - constexpr size_t expected_file_size = 1.5 * static_cast(batch_size_ub); + + std::size_t const batch_size_upper_bound = std::numeric_limits::max() / 16; + // set smaller batch_size to reduce file size and execution time + setenv("LIBCUDF_JSON_BATCH_SIZE", std::to_string(batch_size_upper_bound).c_str(), 1); + + constexpr std::size_t expected_file_size = 1.5 * static_cast(batch_size_upper_bound); std::size_t const log_repetitions = static_cast(std::ceil(std::log2(expected_file_size / json_string.size()))); @@ -66,15 +71,18 @@ TEST_F(JsonLargeReaderTest, MultiBatch) datasources.emplace_back(cudf::io::datasource::create(hb)); } // Test for different chunk sizes - std::vector chunk_sizes{ - batch_size_ub / 4, batch_size_ub / 2, batch_size_ub, static_cast(batch_size_ub * 2)}; + std::vector chunk_sizes{batch_size_upper_bound / 4, + batch_size_upper_bound / 2, + batch_size_upper_bound, + static_cast(batch_size_upper_bound * 2)}; + for (auto chunk_size : chunk_sizes) { auto const tables = split_byte_range_reading(datasources, json_lines_options, chunk_size, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); auto table_views = std::vector(tables.size()); std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) { @@ -86,4 +94,7 @@ TEST_F(JsonLargeReaderTest, MultiBatch) // cannot use EQUAL due to concatenate removing null mask CUDF_TEST_EXPECT_TABLES_EQUIVALENT(current_reader_table.tbl->view(), result->view()); } + + // go back to normal batch_size + unsetenv("LIBCUDF_LARGE_STRINGS_THRESHOLD"); } diff --git a/cpp/tests/large_strings/large_strings_fixture.cpp b/cpp/tests/large_strings/large_strings_fixture.cpp index ac8159369a1..249319da7f7 100644 --- a/cpp/tests/large_strings/large_strings_fixture.cpp +++ b/cpp/tests/large_strings/large_strings_fixture.cpp @@ -126,7 +126,7 @@ int main(int argc, char** argv) auto const cmd_opts = parse_cudf_test_opts(argc, argv); // hardcoding the CUDA memory resource to keep from exceeding the pool auto mr = cudf::test::make_cuda(); - rmm::mr::set_current_device_resource(mr.get()); + cudf::set_current_device_resource(mr.get()); auto adaptor = make_stream_mode_adaptor(cmd_opts); // create object to automatically be destroyed at the end of main() diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp index 24dadf9b520..579d918a31d 100644 --- a/cpp/tests/partitioning/hash_partition_test.cpp +++ b/cpp/tests/partitioning/hash_partition_test.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -290,7 +291,7 @@ void run_fixed_width_test(size_t cols, // Make a table view of the partition numbers constexpr cudf::data_type dtype{cudf::type_id::INT32}; auto d_partitions = cudf::detail::make_device_uvector_sync( - partitions, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + partitions, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); cudf::column_view partitions_col(dtype, rows, d_partitions.data(), nullptr, 0); cudf::table_view partitions_table({partitions_col}); diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp index 06c6b9dfbe4..915717713df 100644 --- a/cpp/tests/quantiles/percentile_approx_test.cpp +++ b/cpp/tests/quantiles/percentile_approx_test.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include @@ -371,7 +372,7 @@ struct PercentileApproxTest : public cudf::test::BaseFixture {}; TEST_F(PercentileApproxTest, EmptyInput) { auto empty_ = cudf::tdigest::detail::make_empty_tdigest_column( - cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + cudf::get_default_stream(), cudf::get_current_device_resource_ref()); cudf::test::fixed_width_column_wrapper percentiles{0.0, 0.25, 0.3}; std::vector input; diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 0ec4cfa34c4..949ffcc26a6 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -300,9 +300,6 @@ TYPED_TEST_SUITE(ReductionTest, cudf::test::NumericTypes); TYPED_TEST(ReductionTest, Product) { using T = TypeParam; - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } std::vector int_values({5, -1, 1, 0, 3, 2, 4}); std::vector host_bools({true, true, false, false, true, true, true}); diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 37efc116d2a..19996f827cf 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,7 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls) {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{6, 4, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -87,10 +88,6 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls) TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls) { - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - // [1, 3, 5], [null, 3, 5], [1], [null], [null, null], [] // values: {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX} // offsets: {0, 3, 6, 7, 8, 10, 10} @@ -101,7 +98,7 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls) {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{15, 15, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -141,10 +138,6 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls) TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls) { - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - // [1, 2, 3], [1, null, 3], [1], [null], [null, null], [] // values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX} // offsets: {0, 3, 6, 7, 8, 10, 10} @@ -155,7 +148,7 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls) {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -193,10 +186,6 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls) TYPED_TEST(SegmentedReductionTest, MinExcludeNulls) { - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - // [1, 2, 3], [1, null, 3], [1], [null], [null, null], [] // values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX} // offsets: {0, 3, 6, 7, 8, 10, 10} @@ -207,7 +196,7 @@ TYPED_TEST(SegmentedReductionTest, MinExcludeNulls) {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}}; @@ -256,7 +245,7 @@ TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls) {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{ {false, false, true, true, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, true, true, true, false, true, true, false, false}}; @@ -296,7 +285,7 @@ TYPED_TEST(SegmentedReductionTest, AllExcludeNulls) {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{ {true, true, bool{XXX}, true, bool{XXX}, bool{XXX}, false, false, false}, {true, true, false, true, false, false, true, true, true}}; @@ -347,7 +336,7 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls) {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{6, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -388,10 +377,6 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls) TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls) { - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - // [1, 3, 5], [null, 3, 5], [1], [null], [null, null], [] // values: {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX} // offsets: {0, 3, 6, 7, 8, 10, 10} @@ -402,7 +387,7 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls) {1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{15, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -445,10 +430,6 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls) TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls) { - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - // [1, 2, 3], [1, null, 3], [1], [null], [null, null], [] // values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX} // offsets: {0, 3, 6, 7, 8, 10, 10} @@ -459,7 +440,7 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls) {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -500,10 +481,6 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls) TYPED_TEST(SegmentedReductionTest, MinIncludeNulls) { - if constexpr (std::is_same_v || std::is_same_v) { - if (getenv("LIBCUDF_MEMCHECK_ENABLED")) { return; } - } - // [1, 2, 3], [1, null, 3], [1], [null], [null, null], [] // values: {1, 2, 3, 1, XXX, 3, 1, XXX, XXX} // offsets: {0, 3, 6, 7, 8, 10, 10} @@ -514,7 +491,7 @@ TYPED_TEST(SegmentedReductionTest, MinIncludeNulls) {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 1, 0, 1, 1, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}}; @@ -566,7 +543,7 @@ TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls) {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}}; auto const offsets = std::vector{0, 3, 6, 9, 12, 12, 13, 14, 15, 17}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{ {false, bool{XXX}, true, bool{XXX}, bool{XXX}, false, true, bool{XXX}, bool{XXX}}, {true, false, true, false, false, true, true, false, false}}; @@ -629,7 +606,7 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls) {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}}; auto const offsets = std::vector{0, 3, 6, 6, 7, 8, 10, 13, 16, 17}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{ {true, bool{XXX}, bool{XXX}, true, bool{XXX}, bool{XXX}, false, bool{XXX}, false}, {true, false, false, true, false, false, true, false, true}}; @@ -694,7 +671,7 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction) {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}}; auto const offsets = std::vector{1, 3, 4}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{5, 4}, {true, true}}; auto res = @@ -745,7 +722,7 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput) auto const input = cudf::test::fixed_width_column_wrapper{1, 2, 3, 4, 5, 6, 7}; auto const offsets = std::vector{0, 1, 1, 3, 7}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{{1, XXX, 5, 22}, {true, false, true, true}}; @@ -791,7 +768,7 @@ TEST_F(SegmentedReductionTestUntyped, Mean) cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_mean_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -810,7 +787,7 @@ TEST_F(SegmentedReductionTestUntyped, MeanNulls) {10, 20, 30, 40, 50, 60, 0, 80, 90}, {true, true, true, true, true, true, false, true, true}); auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_mean_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -832,7 +809,7 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquares) cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_sum_of_squares_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT32}; @@ -852,7 +829,7 @@ TEST_F(SegmentedReductionTestUntyped, SumOfSquaresNulls) {10, 20, 30, 40, 50, 60, 0, 80, 90}, {true, true, true, true, true, true, false, true, true}); auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_sum_of_squares_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT64}; @@ -875,7 +852,7 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviation) cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_std_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -895,7 +872,7 @@ TEST_F(SegmentedReductionTestUntyped, StandardDeviationNulls) {10, 0, 20, 30, 54, 63, 0, 72, 81}, {true, false, true, true, true, true, false, true, true}); auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_std_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -918,7 +895,7 @@ TEST_F(SegmentedReductionTestUntyped, Variance) cudf::test::fixed_width_column_wrapper{10, 20, 30, 40, 50, 60, 70, 80, 90}; auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_variance_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT32}; @@ -938,7 +915,7 @@ TEST_F(SegmentedReductionTestUntyped, VarianceNulls) {10, 0, 20, 30, 54, 63, 0, 72, 81}, {true, false, true, true, true, true, false, true, true}); auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_variance_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::FLOAT64}; @@ -960,7 +937,7 @@ TEST_F(SegmentedReductionTestUntyped, NUnique) cudf::test::fixed_width_column_wrapper({10, 15, 20, 30, 60, 60, 70, 70, 80}); auto const offsets = std::vector{0, 1, 1, 2, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_nunique_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT32}; @@ -980,7 +957,7 @@ TEST_F(SegmentedReductionTestUntyped, NUniqueNulls) {10, 0, 20, 30, 60, 60, 70, 70, 0}, {true, false, true, true, true, true, true, true, false}); auto const offsets = std::vector{0, 1, 1, 2, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_nunique_aggregation(); auto const output_type = cudf::data_type{cudf::type_id::INT32}; @@ -1002,7 +979,7 @@ TEST_F(SegmentedReductionTestUntyped, Errors) {10, 0, 20, 30, 54, 63, 0, 72, 81}, {true, false, true, true, true, true, false, true, true}); auto const offsets = std::vector{0, 1, 1, 4, 9}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const null_policy = cudf::null_policy::EXCLUDE; auto const output_type = cudf::data_type{cudf::type_id::TIMESTAMP_DAYS}; auto const str_input = @@ -1071,7 +1048,7 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn) auto const input = cudf::test::fixed_width_column_wrapper{}; auto const offsets = std::vector{0}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{}; auto res = @@ -1108,7 +1085,7 @@ TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets) auto const input = cudf::test::fixed_width_column_wrapper{}; auto const offsets = std::vector{0, 0, 0, 0, 0, 0}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::fixed_width_column_wrapper{ {XXX, XXX, XXX, XXX, XXX}, {false, false, false, false, false}}; @@ -1157,7 +1134,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxWithNulls) auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_max_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1185,7 +1162,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinWithNulls) auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_min_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1213,7 +1190,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput) auto const offsets = std::vector{0, 3, 4, 4}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_max_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1238,7 +1215,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput) auto const offsets = std::vector{0, 3, 4, 4}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_min_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1263,7 +1240,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, Sum) auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_sum_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1301,7 +1278,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, Product) auto const offsets = std::vector{0, 3, 6, 7, 8, 12, 12}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_product_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1338,7 +1315,7 @@ TYPED_TEST(SegmentedReductionFixedPointTest, SumOfSquares) auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const agg = cudf::make_sum_of_squares_aggregation(); for (auto scale : {-2, 0, 5}) { @@ -1502,7 +1479,7 @@ TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets) auto const input = cudf::test::strings_column_wrapper{}; auto const offsets = std::vector{0, 0, 0, 0}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto const expect = cudf::test::strings_column_wrapper({XXX, XXX, XXX}, {false, false, false}); auto result = diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp index 9603ea44a76..fcee27305f2 100644 --- a/cpp/tests/replace/replace_nulls_tests.cpp +++ b/cpp/tests/replace/replace_nulls_tests.cpp @@ -674,7 +674,7 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsEmpty) cudf::test::fixed_width_column_wrapper input_empty_w({}); auto input_empty = cudf::dictionary::encode(input_empty_w); auto result = cudf::replace_nulls(input_empty->view(), input_empty->view()); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input_empty->view()); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), input_empty->view()); } TEST_F(ReplaceDictionaryTest, ReplaceNullsNoNulls) diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp index cd280302677..b3d9b2e2f5f 100644 --- a/cpp/tests/reshape/byte_cast_tests.cpp +++ b/cpp/tests/reshape/byte_cast_tests.cpp @@ -61,8 +61,8 @@ TEST_F(ByteCastTest, int16ValuesWithNulls) auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5); auto int16_expected = cudf::make_lists_column( 5, - std::move(cudf::test::fixed_width_column_wrapper{0, 0, 2, 2, 4, 4}.release()), - std::move(int16_data.release()), + cudf::test::fixed_width_column_wrapper{0, 0, 2, 2, 4, 4}.release(), + int16_data.release(), null_count, std::move(null_mask)); @@ -109,8 +109,8 @@ TEST_F(ByteCastTest, int32ValuesWithNulls) auto int32_expected = cudf::make_lists_column( 5, - std::move(cudf::test::fixed_width_column_wrapper{0, 4, 4, 8, 8, 12}.release()), - std::move(int32_data.release()), + cudf::test::fixed_width_column_wrapper{0, 4, 4, 8, 8, 12}.release(), + int32_data.release(), null_count, std::move(null_mask)); @@ -163,9 +163,8 @@ TEST_F(ByteCastTest, int64ValuesWithNulls) auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5); auto int64_expected = cudf::make_lists_column( 5, - std::move( - cudf::test::fixed_width_column_wrapper{0, 0, 8, 8, 16, 16}.release()), - std::move(int64_data.release()), + cudf::test::fixed_width_column_wrapper{0, 0, 8, 8, 16, 16}.release(), + int64_data.release(), null_count, std::move(null_mask)); @@ -226,8 +225,8 @@ TEST_F(ByteCastTest, fp32ValuesWithNulls) cudf::test::detail::make_null_mask(even_validity, even_validity + 5); auto fp32_expected = cudf::make_lists_column( 5, - std::move(cudf::test::fixed_width_column_wrapper{0, 4, 4, 8, 8, 12}.release()), - std::move(fp32_data.release()), + cudf::test::fixed_width_column_wrapper{0, 4, 4, 8, 8, 12}.release(), + fp32_data.release(), null_count, std::move(null_mask)); @@ -297,9 +296,8 @@ TEST_F(ByteCastTest, fp64ValuesWithNulls) auto [null_mask, null_count] = cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5); auto fp64_expected = cudf::make_lists_column( 5, - std::move( - cudf::test::fixed_width_column_wrapper{0, 0, 8, 8, 16, 16}.release()), - std::move(fp64_data.release()), + cudf::test::fixed_width_column_wrapper{0, 0, 8, 8, 16, 16}.release(), + fp64_data.release(), null_count, std::move(null_mask)); diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu index 5026954403b..2232aefefcd 100644 --- a/cpp/tests/scalar/scalar_device_view_test.cu +++ b/cpp/tests/scalar/scalar_device_view_test.cu @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -131,7 +132,7 @@ TEST_F(StringScalarDeviceViewTest, Value) auto scalar_device_view = cudf::get_scalar_device_view(s); rmm::device_scalar result{cudf::get_default_stream()}; auto value_v = cudf::detail::make_device_uvector_sync( - value, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + value, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); test_string_value<<<1, 1, 0, cudf::get_default_stream().value()>>>( scalar_device_view, value_v.data(), value.size(), result.data()); diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index da9666cbc74..79421a1fa30 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,10 @@ #include #include +#include #include +#include +#include #include #include @@ -338,3 +341,25 @@ TEST_F(SegmentedSortInt, Bool) result = cudf::stable_segmented_sorted_order(cudf::table_view({test_col}), segments); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected); } + +// Specific test for fix in https://github.com/rapidsai/cudf/pull/16463 +TEST_F(SegmentedSortInt, UnbalancedOffsets) +{ + auto h_input = std::vector(3535); + std::iota(h_input.begin(), h_input.end(), 1); + std::sort(h_input.begin(), h_input.end(), std::greater{}); + std::fill_n(h_input.begin(), 4, 0); + std::fill(h_input.begin() + 3533, h_input.end(), 10000); + auto d_input = cudf::detail::make_device_uvector_sync( + h_input, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); + auto input = cudf::column_view(cudf::device_span(d_input)); + auto segments = cudf::test::fixed_width_column_wrapper({0, 4, 3533, 3535}); + // full sort should match handcrafted input data here + auto expected = cudf::sort(cudf::table_view({input})); + + auto input_view = cudf::table_view({input}); + auto result = cudf::segmented_sort_by_key(input_view, input_view, segments); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected->view().column(0)); + result = cudf::stable_segmented_sort_by_key(input_view, input_view, segments); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected->view().column(0)); +} diff --git a/cpp/tests/streams/datetime_test.cpp b/cpp/tests/streams/datetime_test.cpp new file mode 100644 index 00000000000..82629156fa6 --- /dev/null +++ b/cpp/tests/streams/datetime_test.cpp @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +class DatetimeTest : public cudf::test::BaseFixture { + public: + cudf::test::fixed_width_column_wrapper timestamps{ + -23324234, // 1969-12-31 23:59:59.976675766 GMT + 23432424, // 1970-01-01 00:00:00.023432424 GMT + 987234623 // 1970-01-01 00:00:00.987234623 GMT + }; + cudf::test::fixed_width_column_wrapper months{{1, -1, 3}}; +}; + +TEST_F(DatetimeTest, ExtractYear) +{ + cudf::datetime::extract_year(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractMonth) +{ + cudf::datetime::extract_month(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractDay) +{ + cudf::datetime::extract_day(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractWeekday) +{ + cudf::datetime::extract_weekday(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractHour) +{ + cudf::datetime::extract_hour(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractMinute) +{ + cudf::datetime::extract_minute(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractSecond) +{ + cudf::datetime::extract_second(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractMillisecondFraction) +{ + cudf::datetime::extract_millisecond_fraction(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractMicrosecondFraction) +{ + cudf::datetime::extract_microsecond_fraction(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractNanosecondFraction) +{ + cudf::datetime::extract_nanosecond_fraction(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, LastDayOfMonth) +{ + cudf::datetime::last_day_of_month(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, DayOfYear) +{ + cudf::datetime::day_of_year(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, AddCalendricalMonths) +{ + cudf::datetime::add_calendrical_months(timestamps, months, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, AddCalendricalMonthsScalar) +{ + auto scalar = cudf::make_fixed_width_scalar(1, cudf::test::get_default_stream()); + + cudf::datetime::add_calendrical_months(timestamps, *scalar, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, IsLeapYear) +{ + cudf::datetime::is_leap_year(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, DaysInMonth) +{ + cudf::datetime::days_in_month(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, ExtractQuarter) +{ + cudf::datetime::extract_quarter(timestamps, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, CeilDatetimes) +{ + cudf::datetime::ceil_datetimes( + timestamps, cudf::datetime::rounding_frequency::HOUR, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, FloorDatetimes) +{ + cudf::datetime::floor_datetimes( + timestamps, cudf::datetime::rounding_frequency::HOUR, cudf::test::get_default_stream()); +} + +TEST_F(DatetimeTest, RoundDatetimes) +{ + cudf::datetime::round_datetimes( + timestamps, cudf::datetime::rounding_frequency::HOUR, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp deleted file mode 100644 index 9ba862585d0..00000000000 --- a/cpp/tests/streams/interop_test.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// These interop functions are deprecated. We keep the code in this -// test and will migrate the tests to export via the arrow C data -// interface with to_arrow_host which arrow can consume. For now, the -// test is commented out. - -#if 0 - -#include -#include -#include - -#include -#include -#include -#include - -struct ArrowTest : public cudf::test::BaseFixture {}; - -TEST_F(ArrowTest, ToArrow) -{ - int32_t const value{42}; - auto col = cudf::test::fixed_width_column_wrapper{{value}}; - cudf::table_view tbl{{col}}; - - std::vector metadata{{""}}; - cudf::to_arrow(tbl, metadata, cudf::test::get_default_stream()); -} - -TEST_F(ArrowTest, FromArrow) -{ - std::vector host_values = {1, 2, 3, 5, 6, 7, 8}; - std::vector host_validity = {true, true, true, false, true, true, true}; - - arrow::Int64Builder builder; - auto status = builder.AppendValues(host_values, host_validity); - auto maybe_array = builder.Finish(); - auto array = *maybe_array; - - auto field = arrow::field("", arrow::int32()); - auto schema = arrow::schema({field}); - auto table = arrow::Table::Make(schema, {array}); - cudf::from_arrow(*table, cudf::test::get_default_stream()); -} - -TEST_F(ArrowTest, ToArrowScalar) -{ - int32_t const value{42}; - auto cudf_scalar = - cudf::make_fixed_width_scalar(value, cudf::test::get_default_stream()); - - cudf::column_metadata metadata{""}; - cudf::to_arrow(*cudf_scalar, metadata, cudf::test::get_default_stream()); -} - -TEST_F(ArrowTest, FromArrowScalar) -{ - int32_t const value{42}; - auto arrow_scalar = arrow::MakeScalar(value); - cudf::from_arrow(*arrow_scalar, cudf::test::get_default_stream()); -} - -#endif diff --git a/cpp/tests/streams/join_test.cpp b/cpp/tests/streams/join_test.cpp new file mode 100644 index 00000000000..2811bb676fa --- /dev/null +++ b/cpp/tests/streams/join_test.cpp @@ -0,0 +1,219 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +class JoinTest : public cudf::test::BaseFixture { + static inline cudf::table make_table() + { + cudf::test::fixed_width_column_wrapper col0{{3, 1, 2, 0, 3}}; + cudf::test::strings_column_wrapper col1{{"s0", "s1", "s2", "s4", "s1"}}; + cudf::test::fixed_width_column_wrapper col2{{0, 1, 2, 4, 1}}; + + std::vector> columns; + columns.push_back(col0.release()); + columns.push_back(col1.release()); + columns.push_back(col2.release()); + + return cudf::table{std::move(columns)}; + } + + public: + cudf::table table0{make_table()}; + cudf::table table1{make_table()}; + cudf::table conditional0{make_table()}; + cudf::table conditional1{make_table()}; + cudf::ast::column_reference col_ref_left_0{0}; + cudf::ast::column_reference col_ref_right_0{0, cudf::ast::table_reference::RIGHT}; + cudf::ast::operation left_zero_eq_right_zero{ + cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0}; +}; + +TEST_F(JoinTest, InnerJoin) +{ + cudf::inner_join(table0, table1, cudf::null_equality::EQUAL, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, LeftJoin) +{ + cudf::left_join(table0, table1, cudf::null_equality::EQUAL, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, FullJoin) +{ + cudf::full_join(table0, table1, cudf::null_equality::EQUAL, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, LeftSemiJoin) +{ + cudf::left_semi_join( + table0, table1, cudf::null_equality::EQUAL, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, LeftAntiJoin) +{ + cudf::left_anti_join( + table0, table1, cudf::null_equality::EQUAL, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, CrossJoin) { cudf::cross_join(table0, table1, cudf::test::get_default_stream()); } + +TEST_F(JoinTest, ConditionalInnerJoin) +{ + cudf::conditional_inner_join( + table0, table1, left_zero_eq_right_zero, std::nullopt, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalLeftJoin) +{ + cudf::conditional_left_join( + table0, table1, left_zero_eq_right_zero, std::nullopt, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalFullJoin) +{ + cudf::conditional_full_join( + table0, table1, left_zero_eq_right_zero, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalLeftSemiJoin) +{ + cudf::conditional_left_semi_join( + table0, table1, left_zero_eq_right_zero, std::nullopt, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalLeftAntiJoin) +{ + cudf::conditional_left_anti_join( + table0, table1, left_zero_eq_right_zero, std::nullopt, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, MixedInnerJoin) +{ + cudf::mixed_inner_join(table0, + table1, + conditional0, + conditional1, + left_zero_eq_right_zero, + cudf::null_equality::EQUAL, + std::nullopt, + cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, MixedLeftJoin) +{ + cudf::mixed_left_join(table0, + table1, + conditional0, + conditional1, + left_zero_eq_right_zero, + cudf::null_equality::EQUAL, + std::nullopt, + cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, MixedFullJoin) +{ + cudf::mixed_full_join(table0, + table1, + conditional0, + conditional1, + left_zero_eq_right_zero, + cudf::null_equality::EQUAL, + std::nullopt, + cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, MixedLeftSemiJoin) +{ + cudf::mixed_left_semi_join(table0, + table1, + conditional0, + conditional1, + left_zero_eq_right_zero, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, MixedLeftAntiJoin) +{ + cudf::mixed_left_anti_join(table0, + table1, + conditional0, + conditional1, + left_zero_eq_right_zero, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, MixedInnerJoinSize) +{ + cudf::mixed_inner_join_size(table0, + table1, + conditional0, + conditional1, + left_zero_eq_right_zero, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, MixedLeftJoinSize) +{ + cudf::mixed_left_join_size(table0, + table1, + conditional0, + conditional1, + left_zero_eq_right_zero, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalInnerJoinSize) +{ + cudf::conditional_inner_join_size( + table0, table1, left_zero_eq_right_zero, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalLeftJoinSize) +{ + cudf::conditional_left_join_size( + table0, table1, left_zero_eq_right_zero, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalLeftSemiJoinSize) +{ + cudf::conditional_left_semi_join_size( + table0, table1, left_zero_eq_right_zero, cudf::test::get_default_stream()); +} + +TEST_F(JoinTest, ConditionalLeftAntiJoinSize) +{ + cudf::conditional_left_anti_join_size( + table0, table1, left_zero_eq_right_zero, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/streams/reduction_test.cpp b/cpp/tests/streams/reduction_test.cpp index e6438ac2834..b4f013fc960 100644 --- a/cpp/tests/streams/reduction_test.cpp +++ b/cpp/tests/streams/reduction_test.cpp @@ -23,6 +23,7 @@ #include #include #include +#include class ReductionTest : public cudf::test::BaseFixture {}; @@ -53,7 +54,7 @@ TEST_F(ReductionTest, SegmentedReductionSum) {true, true, true, true, false, true, true, false, false, false}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); auto res = cudf::segmented_reduce(input, @@ -71,7 +72,7 @@ TEST_F(ReductionTest, SegmentedReductionSumScalarInit) {true, true, true, true, false, true, true, false, false, false}}; auto const offsets = std::vector{0, 3, 6, 7, 8, 10, 10}; auto const d_offsets = cudf::detail::make_device_uvector_async( - offsets, cudf::test::get_default_stream(), rmm::mr::get_current_device_resource()); + offsets, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); auto const init_scalar = cudf::make_fixed_width_scalar(3, cudf::test::get_default_stream()); auto res = cudf::segmented_reduce(input, diff --git a/cpp/tests/streams/reshape_test.cpp b/cpp/tests/streams/reshape_test.cpp new file mode 100644 index 00000000000..d7c5da91bca --- /dev/null +++ b/cpp/tests/streams/reshape_test.cpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +class ReshapeTest : public cudf::test::BaseFixture {}; + +TEST_F(ReshapeTest, InterleaveColumns) +{ + auto a = cudf::test::fixed_width_column_wrapper({0, 3, 6}); + auto b = cudf::test::fixed_width_column_wrapper({1, 4, 7}); + auto c = cudf::test::fixed_width_column_wrapper({2, 5, 8}); + cudf::table_view in(std::vector{a, b, c}); + cudf::interleave_columns(in, cudf::test::get_default_stream()); +} + +TEST_F(ReshapeTest, Tile) +{ + auto a = cudf::test::fixed_width_column_wrapper({-1, 0, 1}); + cudf::table_view in(std::vector{a}); + cudf::tile(in, 2, cudf::test::get_default_stream()); +} + +TEST_F(ReshapeTest, ByteCast) +{ + auto a = cudf::test::fixed_width_column_wrapper({0, 100, -100, 1000, 1000}); + cudf::byte_cast(a, cudf::flip_endianness::YES, cudf::test::get_default_stream()); + cudf::byte_cast(a, cudf::flip_endianness::NO, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp index 56443870602..443f4548b2c 100644 --- a/cpp/tests/streams/stream_compaction_test.cpp +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -41,6 +41,7 @@ auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL; auto constexpr NAN_EQUAL = cudf::nan_equality::ALL_EQUAL; auto constexpr NAN_UNEQUAL = cudf::nan_equality::UNEQUAL; +using int16s_col = cudf::test::fixed_width_column_wrapper; using int32s_col = cudf::test::fixed_width_column_wrapper; using floats_col = cudf::test::fixed_width_column_wrapper; @@ -51,50 +52,9 @@ using cudf::test::iterators::no_nulls; using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; -struct StableDistinctKeepAny : public cudf::test::BaseFixture {}; +struct StreamCompactionTest : public cudf::test::BaseFixture {}; -struct StableDistinctKeepFirstLastNone : public cudf::test::BaseFixture {}; - -TEST_F(StableDistinctKeepAny, NoNullsTableWithNaNs) -{ - // Column(s) used to test KEEP_ANY needs to have same rows in contiguous - // groups for equivalent keys because KEEP_ANY is nondeterministic. - auto const col1 = int32s_col{6, 6, 6, 1, 1, 1, 3, 5, 8, 5}; - auto const col2 = floats_col{6, 6, 6, 1, 1, 1, 3, 4, 9, 4}; - auto const keys1 = int32s_col{20, 20, 20, 15, 15, 15, 20, 19, 21, 9}; - auto const keys2 = floats_col{19., 19., 19., NaN, NaN, NaN, 20., 20., 9., 21.}; - - auto const input = cudf::table_view{{col1, col2, keys1, keys2}}; - auto const key_idx = std::vector{2, 3}; - - // NaNs are unequal. - { - auto const exp_col1 = int32s_col{6, 1, 1, 1, 3, 5, 8, 5}; - auto const exp_col2 = floats_col{6, 1, 1, 1, 3, 4, 9, 4}; - auto const exp_keys1 = int32s_col{20, 15, 15, 15, 20, 19, 21, 9}; - auto const exp_keys2 = floats_col{19., NaN, NaN, NaN, 20., 20., 9., 21.}; - auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; - - auto const result = cudf::stable_distinct( - input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); - } - - // NaNs are equal. - { - auto const exp_col1 = int32s_col{6, 1, 3, 5, 8, 5}; - auto const exp_col2 = floats_col{6, 1, 3, 4, 9, 4}; - auto const exp_keys1 = int32s_col{20, 15, 20, 19, 21, 9}; - auto const exp_keys2 = floats_col{19., NaN, 20., 20., 9., 21.}; - auto const expected = cudf::table_view{{exp_col1, exp_col2, exp_keys1, exp_keys2}}; - - auto const result = cudf::stable_distinct( - input, key_idx, KEEP_ANY, NULL_EQUAL, NAN_EQUAL, cudf::test::get_default_stream()); - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); - } -} - -TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs) +TEST_F(StreamCompactionTest, StableDistinctKeepAny) { auto constexpr null{0.0}; // shadow the global `null` variable of type int @@ -150,7 +110,7 @@ TEST_F(StableDistinctKeepAny, InputWithNullsAndNaNs) } } -TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual) +TEST_F(StreamCompactionTest, StableDistinctKeepFirstLastNone) { // Column(s) used to test needs to have different rows for the same keys. auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6}; @@ -192,44 +152,313 @@ TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsEqual) } } -TEST_F(StableDistinctKeepFirstLastNone, InputWithNaNsUnequal) +TEST_F(StreamCompactionTest, DropNaNs) { - // Column(s) used to test needs to have different rows for the same keys. - auto const col = int32s_col{0, 1, 2, 3, 4, 5, 6, 7}; - auto const keys = floats_col{20., NaN, NaN, 19., 21., 19., 22., 20.}; - auto const input = cudf::table_view{{col, keys}}; - auto const key_idx = std::vector{1}; + auto const col1 = floats_col{{1., 2., NaN, NaN, 5., 6.}, nulls_at({2, 5})}; + auto const col2 = int32s_col{{10, 40, 70, 5, 2, 10}, nulls_at({2, 5})}; + auto const col3 = floats_col{{NaN, 40., 70., NaN, 2., 10.}, nulls_at({2, 5})}; + cudf::table_view input{{col1, col2, col3}}; + + std::vector keys{0, 2}; - // KEEP_FIRST { - auto const exp_col = int32s_col{0, 1, 2, 3, 4, 6}; - auto const exp_keys = floats_col{20., NaN, NaN, 19., 21., 22.}; - auto const expected = cudf::table_view{{exp_col, exp_keys}}; + // With keep_threshold + auto const col1_expected = floats_col{{1., 2., 3., 5., 6.}, nulls_at({2, 4})}; + auto const col2_expected = int32s_col{{10, 40, 70, 2, 10}, nulls_at({2, 4})}; + auto const col3_expected = floats_col{{NaN, 40., 70., 2., 10.}, nulls_at({2, 4})}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nans(input, keys, keys.size() - 1, cudf::test::get_default_stream()); - auto const result = cudf::stable_distinct( - input, key_idx, KEEP_FIRST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); } - // KEEP_LAST { - auto const exp_col = int32s_col{1, 2, 4, 5, 6, 7}; - auto const exp_keys = floats_col{NaN, NaN, 21., 19., 22., 20.}; - auto const expected = cudf::table_view{{exp_col, exp_keys}}; + // Without keep_threshold + auto const col1_expected = floats_col{{2., 3., 5., 6.}, nulls_at({1, 3})}; + auto const col2_expected = int32s_col{{40, 70, 2, 10}, nulls_at({1, 3})}; + auto const col3_expected = floats_col{{40., 70., 2., 10.}, nulls_at({1, 3})}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nans(input, keys, cudf::test::get_default_stream()); - auto const result = cudf::stable_distinct( - input, key_idx, KEEP_LAST, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); } +} + +TEST_F(StreamCompactionTest, DropNulls) +{ + auto const col1 = int16s_col{{1, 0, 1, 0, 1, 0}, nulls_at({2, 5})}; + auto const col2 = int32s_col{{10, 40, 70, 5, 2, 10}, nulls_at({2})}; + auto const col3 = floats_col{{10., 40., 70., 5., 2., 10.}, no_nulls()}; + cudf::table_view input{{col1, col2, col3}}; + std::vector keys{0, 1, 2}; - // KEEP_NONE { - auto const exp_col = int32s_col{1, 2, 4, 6}; - auto const exp_keys = floats_col{NaN, NaN, 21., 22.}; - auto const expected = cudf::table_view{{exp_col, exp_keys}}; + // With keep_threshold + auto const col1_expected = int16s_col{{1, 0, 0, 1, 0}, null_at(4)}; + auto const col2_expected = int32s_col{{10, 40, 5, 2, 10}, no_nulls()}; + auto const col3_expected = floats_col{{10., 40., 5., 2., 10.}, no_nulls()}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nulls(input, keys, keys.size() - 1, cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); + } + + { + // Without keep_threshold + auto const col1_expected = int16s_col{{1, 0, 0, 1}, no_nulls()}; + auto const col2_expected = int32s_col{{10, 40, 5, 2}, no_nulls()}; + auto const col3_expected = floats_col{{10., 40., 5., 2.}, no_nulls()}; + cudf::table_view expected{{col1_expected, col2_expected, col3_expected}}; + + auto result = cudf::drop_nulls(input, keys, cudf::test::get_default_stream()); - auto const result = cudf::stable_distinct( - input, key_idx, KEEP_NONE, NULL_UNEQUAL, NAN_UNEQUAL, cudf::test::get_default_stream()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); } } + +TEST_F(StreamCompactionTest, Unique) +{ + auto const col1 = int32s_col{5, 4, 3, 5, 8, 5}; + auto const col2 = floats_col{4., 5., 3., 4., 9., 4.}; + auto const col1_key = int32s_col{20, 20, 20, 19, 21, 9}; + auto const col2_key = int32s_col{19, 19, 20, 20, 9, 21}; + + cudf::table_view input{{col1, col2, col1_key, col2_key}}; + std::vector keys = {2, 3}; + + { + // KEEP_FIRST + auto const exp_col1_first = int32s_col{5, 3, 5, 8, 5}; + auto const exp_col2_first = floats_col{4., 3., 4., 9., 4.}; + auto const exp_col1_key_first = int32s_col{20, 20, 19, 21, 9}; + auto const exp_col2_key_first = int32s_col{19, 20, 20, 9, 21}; + cudf::table_view expected_first{ + {exp_col1_first, exp_col2_first, exp_col1_key_first, exp_col2_key_first}}; + + auto const result = cudf::unique(input, + keys, + cudf::duplicate_keep_option::KEEP_FIRST, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_first, *result); + } + + { + // KEEP_LAST + auto const exp_col1_last = int32s_col{4, 3, 5, 8, 5}; + auto const exp_col2_last = floats_col{5., 3., 4., 9., 4.}; + auto const exp_col1_key_last = int32s_col{20, 20, 19, 21, 9}; + auto const exp_col2_key_last = int32s_col{19, 20, 20, 9, 21}; + cudf::table_view expected_last{ + {exp_col1_last, exp_col2_last, exp_col1_key_last, exp_col2_key_last}}; + + auto const result = cudf::unique(input, + keys, + cudf::duplicate_keep_option::KEEP_LAST, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_last, *result); + } + + { + // KEEP_NONE + auto const exp_col1_unique = int32s_col{3, 5, 8, 5}; + auto const exp_col2_unique = floats_col{3., 4., 9., 4.}; + auto const exp_col1_key_unique = int32s_col{20, 19, 21, 9}; + auto const exp_col2_key_unique = int32s_col{20, 20, 9, 21}; + cudf::table_view expected_unique{ + {exp_col1_unique, exp_col2_unique, exp_col1_key_unique, exp_col2_key_unique}}; + + auto const result = cudf::unique(input, + keys, + cudf::duplicate_keep_option::KEEP_NONE, + cudf::null_equality::EQUAL, + cudf::test::get_default_stream()); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_unique, *result); + } +} + +TEST_F(StreamCompactionTest, Distinct) +{ + // Column(s) used to test needs to have different rows for the same keys. + auto const col1 = int32s_col{0, 1, 2, 3, 4, 5, 6}; + auto const col2 = floats_col{10, 11, 12, 13, 14, 15, 16}; + auto const keys1 = int32s_col{20, 20, 20, 20, 19, 21, 9}; + auto const keys2 = int32s_col{19, 19, 19, 20, 20, 9, 21}; + + auto const input = cudf::table_view{{col1, col2, keys1, keys2}}; + auto const key_idx = std::vector{2, 3}; + + // KEEP_FIRST + { + auto const exp_col1_sort = int32s_col{6, 4, 0, 3, 5}; + auto const exp_col2_sort = floats_col{16, 14, 10, 13, 15}; + auto const exp_keys1_sort = int32s_col{9, 19, 20, 20, 21}; + auto const exp_keys2_sort = int32s_col{21, 20, 19, 20, 9}; + auto const expected_sort = + cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}}; + + auto const result = cudf::distinct(input, + key_idx, + cudf::duplicate_keep_option::KEEP_FIRST, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + cudf::test::get_default_stream()); + auto const result_sort = + cudf::sort_by_key(*result, result->select(key_idx), {}, {}, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort); + } + + // KEEP_LAST + { + auto const exp_col1_sort = int32s_col{6, 4, 2, 3, 5}; + auto const exp_col2_sort = floats_col{16, 14, 12, 13, 15}; + auto const exp_keys1_sort = int32s_col{9, 19, 20, 20, 21}; + auto const exp_keys2_sort = int32s_col{21, 20, 19, 20, 9}; + auto const expected_sort = + cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}}; + + auto const result = cudf::distinct(input, + key_idx, + cudf::duplicate_keep_option::KEEP_LAST, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + cudf::test::get_default_stream()); + auto const result_sort = + cudf::sort_by_key(*result, result->select(key_idx), {}, {}, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort); + } + + // KEEP_NONE + { + auto const exp_col1_sort = int32s_col{6, 4, 3, 5}; + auto const exp_col2_sort = floats_col{16, 14, 13, 15}; + auto const exp_keys1_sort = int32s_col{9, 19, 20, 21}; + auto const exp_keys2_sort = int32s_col{21, 20, 20, 9}; + auto const expected_sort = + cudf::table_view{{exp_col1_sort, exp_col2_sort, exp_keys1_sort, exp_keys2_sort}}; + + auto const result = cudf::distinct(input, + key_idx, + cudf::duplicate_keep_option::KEEP_NONE, + cudf::null_equality::EQUAL, + cudf::nan_equality::ALL_EQUAL, + cudf::test::get_default_stream()); + auto const result_sort = + cudf::sort_by_key(*result, result->select(key_idx), {}, {}, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort, *result_sort); + } +} + +TEST_F(StreamCompactionTest, ApplyBooleanMask) +{ + auto const col = int32s_col{ + 9668, 9590, 9526, 9205, 9434, 9347, 9160, 9569, 9143, 9807, 9606, 9446, 9279, 9822, 9691}; + cudf::test::fixed_width_column_wrapper mask({false, + false, + true, + false, + false, + true, + false, + true, + false, + true, + false, + false, + true, + false, + true}); + cudf::table_view input({col}); + auto const col_expected = int32s_col{9526, 9347, 9569, 9807, 9279, 9691}; + cudf::table_view expected({col_expected}); + auto const result = cudf::apply_boolean_mask(input, mask, cudf::test::get_default_stream()); + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, *result); +} + +TEST_F(StreamCompactionTest, UniqueCountColumn) +{ + std::vector const input = {1, 3, 3, 4, 31, 1, 8, 2, 0, 4, 1, + 4, 10, 40, 31, 42, 0, 42, 8, 5, 4}; + + cudf::test::fixed_width_column_wrapper input_col(input.begin(), input.end()); + std::vector input_data(input.begin(), input.end()); + + auto const new_end = std::unique(input_data.begin(), input_data.end()); + auto const expected = std::distance(input_data.begin(), new_end); + EXPECT_EQ( + expected, + cudf::unique_count( + input_col, null_policy::INCLUDE, nan_policy::NAN_IS_VALID, cudf::test::get_default_stream())); +} + +TEST_F(StreamCompactionTest, UniqueCountTable) +{ + std::vector const input1 = {1, 3, 3, 3, 4, 31, 1, 8, 2, 0, 4, + 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4}; + std::vector const input2 = {3, 3, 3, 4, 31, 1, 8, 5, 0, 4, 1, + 4, 10, 40, 31, 42, 0, 42, 8, 5, 4, 1}; + + std::vector> pair_input; + std::transform(input1.begin(), + input1.end(), + input2.begin(), + std::back_inserter(pair_input), + [](int32_t a, int32_t b) { return std::pair(a, b); }); + + cudf::test::fixed_width_column_wrapper input_col1(input1.begin(), input1.end()); + cudf::test::fixed_width_column_wrapper input_col2(input2.begin(), input2.end()); + cudf::table_view input_table({input_col1, input_col2}); + + auto const new_end = std::unique(pair_input.begin(), pair_input.end()); + auto const result = std::distance(pair_input.begin(), new_end); + EXPECT_EQ( + result, + cudf::unique_count(input_table, null_equality::EQUAL, cudf::test::get_default_stream())); +} + +TEST_F(StreamCompactionTest, DistinctCountColumn) +{ + std::vector const input = {1, 3, 3, 4, 31, 1, 8, 2, 0, 4, 1, + 4, 10, 40, 31, 42, 0, 42, 8, 5, 4}; + + cudf::test::fixed_width_column_wrapper input_col(input.begin(), input.end()); + + auto const expected = + static_cast(std::set(input.begin(), input.end()).size()); + EXPECT_EQ( + expected, + cudf::distinct_count( + input_col, null_policy::INCLUDE, nan_policy::NAN_IS_VALID, cudf::test::get_default_stream())); +} + +TEST_F(StreamCompactionTest, DistinctCountTable) +{ + std::vector const input1 = {1, 3, 3, 3, 4, 31, 1, 8, 2, 0, 4, + 1, 4, 10, 40, 31, 42, 0, 42, 8, 5, 4}; + std::vector const input2 = {3, 3, 3, 4, 31, 1, 8, 5, 0, 4, 1, + 4, 10, 40, 31, 42, 0, 42, 8, 5, 4, 1}; + + std::vector> pair_input; + std::transform(input1.begin(), + input1.end(), + input2.begin(), + std::back_inserter(pair_input), + [](int32_t a, int32_t b) { return std::pair(a, b); }); + + cudf::test::fixed_width_column_wrapper input_col1(input1.begin(), input1.end()); + cudf::test::fixed_width_column_wrapper input_col2(input2.begin(), input2.end()); + cudf::table_view input_table({input_col1, input_col2}); + + auto const expected = static_cast( + std::set>(pair_input.begin(), pair_input.end()).size()); + EXPECT_EQ( + expected, + cudf::distinct_count(input_table, null_equality::EQUAL, cudf::test::get_default_stream())); +} diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp new file mode 100644 index 00000000000..9187672221c --- /dev/null +++ b/cpp/tests/streams/transform_test.cpp @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +class TransformTest : public cudf::test::BaseFixture {}; + +template +void test_udf(char const udf[], Data data_init, cudf::size_type size, bool is_ptx) +{ + auto all_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + auto data_iter = cudf::detail::make_counting_transform_iterator(0, data_init); + cudf::test::fixed_width_column_wrapper in( + data_iter, data_iter + size, all_valid); + cudf::transform( + in, udf, cudf::data_type(cudf::type_to_id()), is_ptx, cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, Transform) +{ + char const* cuda = + R"***( +__device__ inline void fdsf ( + float* C, + float a +) +{ + *C = a*a*a*a; +} +)***"; + + char const* ptx = + R"***( +// +// Generated by NVIDIA NVVM Compiler +// +// Compiler Build ID: CL-24817639 +// Cuda compilation tools, release 10.0, V10.0.130 +// Based on LLVM 3.4svn +// + +.version 6.3 +.target sm_70 +.address_size 64 + + // .globl _ZN8__main__7add$241Ef +.common .global .align 8 .u64 _ZN08NumbaEnv8__main__7add$241Ef; +.common .global .align 8 .u64 _ZN08NumbaEnv5numba7targets7numbers14int_power_impl12$3clocals$3e13int_power$242Efx; + +.visible .func (.param .b32 func_retval0) _ZN8__main__7add$241Ef( + .param .b64 _ZN8__main__7add$241Ef_param_0, + .param .b32 _ZN8__main__7add$241Ef_param_1 +) +{ + .reg .f32 %f<4>; + .reg .b32 %r<2>; + .reg .b64 %rd<2>; + + + ld.param.u64 %rd1, [_ZN8__main__7add$241Ef_param_0]; + ld.param.f32 %f1, [_ZN8__main__7add$241Ef_param_1]; + mul.f32 %f2, %f1, %f1; + mul.f32 %f3, %f2, %f2; + st.f32 [%rd1], %f3; + mov.u32 %r1, 0; + st.param.b32 [func_retval0+0], %r1; + ret; +} +)***"; + + auto data_init = [](cudf::size_type row) { return row % 3; }; + test_udf(cuda, data_init, 500, false); + test_udf(ptx, data_init, 500, true); +} + +TEST_F(TransformTest, ComputeColumn) +{ + auto c_0 = cudf::test::fixed_width_column_wrapper{3, 20, 1, 50}; + auto c_1 = cudf::test::fixed_width_column_wrapper{10, 7, 20, 0}; + auto table = cudf::table_view{{c_0, c_1}}; + auto col_ref_0 = cudf::ast::column_reference(0); + auto col_ref_1 = cudf::ast::column_reference(1); + auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, col_ref_1); + cudf::compute_column(table, expression, cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, BoolsToMask) +{ + std::vector input({1, 0, 1, 0, 1, 0, 1, 0}); + cudf::test::fixed_width_column_wrapper input_column(input.begin(), input.end()); + cudf::bools_to_mask(input_column, cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, MaskToBools) +{ + cudf::mask_to_bools(nullptr, 0, 0, cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, Encode) +{ + cudf::test::fixed_width_column_wrapper input{{1, 2, 3, 2, 3, 2, 1}}; + cudf::encode(cudf::table_view({input}), cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, OneHotEncode) +{ + auto input = cudf::test::fixed_width_column_wrapper{8, 8, 8, 9, 9}; + auto category = cudf::test::fixed_width_column_wrapper{8, 9}; + cudf::one_hot_encode(input, category, cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, NaNsToNulls) +{ + std::vector input = {1, 2, 3, 4, 5}; + std::vector mask = {true, true, true, true, false, false}; + auto input_column = + cudf::test::fixed_width_column_wrapper(input.begin(), input.end(), mask.begin()); + cudf::nans_to_nulls(input_column, cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, RowBitCount) +{ + std::vector strings{"abc", "ï", "", "z", "bananas", "warp", "", "zing"}; + cudf::test::strings_column_wrapper col(strings.begin(), strings.end()); + cudf::row_bit_count(cudf::table_view({col}), cudf::test::get_default_stream()); +} + +TEST_F(TransformTest, SegmentedRowBitCount) +{ + // clang-format off + std::vector const strings { "daïs", "def", "", "z", "bananas", "warp", "", "zing" }; + std::vector const valids { 1, 0, 0, 1, 0, 1, 1, 1 }; + // clang-format on + cudf::test::strings_column_wrapper const col(strings.begin(), strings.end(), valids.begin()); + auto const input = cudf::table_view({col}); + auto constexpr segment_length = 2; + cudf::segmented_row_bit_count(input, segment_length, cudf::test::get_default_stream()); +} diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp index 59423d5b927..acf850c7a66 100644 --- a/cpp/tests/strings/contains_tests.cpp +++ b/cpp/tests/strings/contains_tests.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "special_chars.h" + #include #include #include @@ -23,6 +25,7 @@ #include #include #include +#include #include #include @@ -298,10 +301,10 @@ TEST_F(StringsContainsTests, HexTest) {thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + count + 1}); auto d_chars = cudf::detail::make_device_uvector_sync( - ascii_chars, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + ascii_chars, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_offsets = std::make_unique( cudf::detail::make_device_uvector_sync( - offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), + offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()), rmm::device_buffer{}, 0); auto input = cudf::make_strings_column(count, std::move(d_offsets), d_chars.release(), 0, {}); @@ -612,6 +615,63 @@ TEST_F(StringsContainsTests, MultiLine) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count); } +TEST_F(StringsContainsTests, SpecialNewLines) +{ + auto input = cudf::test::strings_column_wrapper({"zzé" LINE_SEPARATOR "qqq" NEXT_LINE "zzé", + "qqq\rzzé" LINE_SEPARATOR "lll", + "zzé", + "", + "zzé" PARAGRAPH_SEPARATOR, + "abc\nzzé" NEXT_LINE}); + auto view = cudf::strings_column_view(input); + + auto pattern = std::string("^zzé$"); + auto prog = + cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::EXT_NEWLINE); + auto ml_flags = static_cast(cudf::strings::regex_flags::EXT_NEWLINE | + cudf::strings::regex_flags::MULTILINE); + auto prog_ml = cudf::strings::regex_program::create(pattern, ml_flags); + + auto expected = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 1, 0}); + auto results = cudf::strings::contains_re(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + expected = cudf::test::fixed_width_column_wrapper({1, 1, 1, 0, 1, 1}); + results = cudf::strings::contains_re(view, *prog_ml); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + expected = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 1, 0}); + results = cudf::strings::matches_re(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + expected = cudf::test::fixed_width_column_wrapper({1, 0, 1, 0, 1, 0}); + results = cudf::strings::matches_re(view, *prog_ml); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + auto counts = cudf::test::fixed_width_column_wrapper({0, 0, 1, 0, 1, 0}); + results = cudf::strings::count_re(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, counts); + counts = cudf::test::fixed_width_column_wrapper({2, 1, 1, 0, 1, 1}); + results = cudf::strings::count_re(view, *prog_ml); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, counts); + + pattern = std::string("q.*l"); + prog = cudf::strings::regex_program::create(pattern); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 0, 0, 0, 0}); + results = cudf::strings::contains_re(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + // inst ANY will stop matching on first 'newline' and so should not match anything here + prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::EXT_NEWLINE); + expected = cudf::test::fixed_width_column_wrapper({0, 0, 0, 0, 0, 0}); + results = cudf::strings::contains_re(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + // including the DOTALL flag accepts the newline characters + auto dot_flags = static_cast(cudf::strings::regex_flags::EXT_NEWLINE | + cudf::strings::regex_flags::DOTALL); + prog = cudf::strings::regex_program::create(pattern, dot_flags); + expected = cudf::test::fixed_width_column_wrapper({0, 1, 0, 0, 0, 0}); + results = cudf::strings::contains_re(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); +} + TEST_F(StringsContainsTests, EndOfString) { auto input = cudf::test::strings_column_wrapper( diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp index b26cbd5a549..1491da758d5 100644 --- a/cpp/tests/strings/extract_tests.cpp +++ b/cpp/tests/strings/extract_tests.cpp @@ -14,9 +14,12 @@ * limitations under the License. */ +#include "special_chars.h" + #include #include #include +#include #include #include @@ -200,6 +203,43 @@ TEST_F(StringsExtractTests, DotAll) CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected); } +TEST_F(StringsExtractTests, SpecialNewLines) +{ + auto input = cudf::test::strings_column_wrapper({"zzé" NEXT_LINE "qqq" LINE_SEPARATOR "zzé", + "qqq" LINE_SEPARATOR "zzé\rlll", + "zzé", + "", + "zzé" NEXT_LINE, + "abc" PARAGRAPH_SEPARATOR "zzé\n"}); + auto view = cudf::strings_column_view(input); + + auto prog = + cudf::strings::regex_program::create("(^zzé$)", cudf::strings::regex_flags::EXT_NEWLINE); + auto results = cudf::strings::extract(view, *prog); + auto expected = + cudf::test::strings_column_wrapper({"", "", "zzé", "", "zzé", ""}, {0, 0, 1, 0, 1, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected); + + auto both_flags = static_cast( + cudf::strings::regex_flags::EXT_NEWLINE | cudf::strings::regex_flags::MULTILINE); + auto prog_ml = cudf::strings::regex_program::create("^(zzé)$", both_flags); + results = cudf::strings::extract(view, *prog_ml); + expected = + cudf::test::strings_column_wrapper({"zzé", "zzé", "zzé", "", "zzé", "zzé"}, {1, 1, 1, 0, 1, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected); + + prog = cudf::strings::regex_program::create("q(q.*l)l"); + expected = cudf::test::strings_column_wrapper({"", "qq" LINE_SEPARATOR "zzé\rll", "", "", "", ""}, + {0, 1, 0, 0, 0, 0}); + results = cudf::strings::extract(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected); + // expect no matches here since the newline(s) interrupts the pattern + prog = cudf::strings::regex_program::create("q(q.*l)l", cudf::strings::regex_flags::EXT_NEWLINE); + expected = cudf::test::strings_column_wrapper({"", "", "", "", "", ""}, {0, 0, 0, 0, 0, 0}); + results = cudf::strings::extract(view, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected); +} + TEST_F(StringsExtractTests, EmptyExtractTest) { std::vector h_strings{nullptr, "AAA", "AAA_A", "AAA_AAA_", "A__", ""}; diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu index 35d648f16e0..90054e41d36 100644 --- a/cpp/tests/strings/factories_test.cu +++ b/cpp/tests/strings/factories_test.cu @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -79,7 +80,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair) h_offsets[idx + 1] = offset; } auto d_strings = cudf::detail::make_device_uvector_sync( - strings, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + strings, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); CUDF_CUDA_TRY(cudaMemcpy(d_buffer.data(), h_buffer.data(), memsize, cudaMemcpyDefault)); auto column = cudf::make_strings_column(d_strings); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); @@ -140,14 +141,14 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets) std::vector h_nulls{h_null_mask}; auto d_buffer = cudf::detail::make_device_uvector_sync( - h_buffer, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + h_buffer, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_offsets = std::make_unique( cudf::detail::make_device_uvector_sync( - h_offsets, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), + h_offsets, cudf::get_default_stream(), cudf::get_current_device_resource_ref()), rmm::device_buffer{}, 0); auto d_nulls = cudf::detail::make_device_uvector_sync( - h_nulls, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + h_nulls, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto column = cudf::make_strings_column( count, std::move(d_offsets), d_buffer.release(), null_count, d_nulls.release()); EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING}); @@ -191,7 +192,7 @@ TEST_F(StringsFactoriesTest, EmptyStringsColumn) auto d_chars = rmm::device_uvector(0, cudf::get_default_stream()); auto d_offsets = std::make_unique( cudf::detail::make_zeroed_device_uvector_sync( - 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()), + 1, cudf::get_default_stream(), cudf::get_current_device_resource_ref()), rmm::device_buffer{}, 0); rmm::device_uvector d_nulls{0, cudf::get_default_stream()}; diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp index 4582dcb1e38..47606b9b3ed 100644 --- a/cpp/tests/strings/findall_tests.cpp +++ b/cpp/tests/strings/findall_tests.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "special_chars.h" + #include #include #include @@ -80,6 +82,32 @@ TEST_F(StringsFindallTests, DotAll) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } +TEST_F(StringsFindallTests, SpecialNewLines) +{ + auto input = cudf::test::strings_column_wrapper({"zzé" PARAGRAPH_SEPARATOR "qqq\nzzé", + "qqq\nzzé" PARAGRAPH_SEPARATOR "lll", + "zzé", + "", + "zzé\r", + "zzé" LINE_SEPARATOR "zzé" NEXT_LINE}); + auto view = cudf::strings_column_view(input); + + auto prog = + cudf::strings::regex_program::create("(^zzé$)", cudf::strings::regex_flags::EXT_NEWLINE); + auto results = cudf::strings::findall(view, *prog); + using LCW = cudf::test::lists_column_wrapper; + LCW expected({LCW{}, LCW{}, LCW{"zzé"}, LCW{}, LCW{"zzé"}, LCW{}}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + + auto both_flags = static_cast( + cudf::strings::regex_flags::EXT_NEWLINE | cudf::strings::regex_flags::MULTILINE); + auto prog_ml = cudf::strings::regex_program::create("^(zzé)$", both_flags); + results = cudf::strings::findall(view, *prog_ml); + LCW expected_ml( + {LCW{"zzé", "zzé"}, LCW{"zzé"}, LCW{"zzé"}, LCW{}, LCW{"zzé"}, LCW{"zzé", "zzé"}}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected_ml); +} + TEST_F(StringsFindallTests, MediumRegex) { // This results in 15 regex instructions and falls in the 'medium' range. diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index 7a038fa6d75..ce5f68de3c9 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -295,7 +296,7 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger) h_integers.push_back(std::numeric_limits::min()); h_integers.push_back(std::numeric_limits::max()); auto const d_integers = cudf::detail::make_device_uvector_sync( - h_integers, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + h_integers, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto integers = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, (cudf::size_type)d_integers.size()); auto integers_view = integers->mutable_view(); diff --git a/cpp/tests/strings/ipv4_tests.cpp b/cpp/tests/strings/ipv4_tests.cpp index 3bfe0f9727e..ea3ac439e62 100644 --- a/cpp/tests/strings/ipv4_tests.cpp +++ b/cpp/tests/strings/ipv4_tests.cpp @@ -40,8 +40,8 @@ TEST_F(StringsConvertTest, IPv4ToIntegers) auto strings_view = cudf::strings_column_view(strings); auto results = cudf::strings::ipv4_to_integers(strings_view); - std::vector h_expected{0, 0, 0, 698875905, 2130706433, 700776449, 3232235521}; - cudf::test::fixed_width_column_wrapper expected( + std::vector h_expected{0, 0, 0, 698875905, 2130706433, 700776449, 3232235521}; + cudf::test::fixed_width_column_wrapper expected( h_expected.cbegin(), h_expected.cend(), thrust::make_transform_iterator(h_strings.begin(), @@ -59,8 +59,8 @@ TEST_F(StringsConvertTest, IntegersToIPv4) thrust::make_transform_iterator(h_strings.begin(), [](auto const str) { return str != nullptr; })); - std::vector h_column{3232235521, 167772161, 0, 0, 700055553, 700776449}; - cudf::test::fixed_width_column_wrapper column( + std::vector h_column{3232235521, 167772161, 0, 0, 700055553, 700776449}; + cudf::test::fixed_width_column_wrapper column( h_column.cbegin(), h_column.cend(), thrust::make_transform_iterator(h_strings.begin(), diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp index 8c0482653fb..9847d8d6bb5 100644 --- a/cpp/tests/strings/replace_regex_tests.cpp +++ b/cpp/tests/strings/replace_regex_tests.cpp @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "special_chars.h" + #include #include #include @@ -245,6 +247,53 @@ TEST_F(StringsReplaceRegexTest, Multiline) CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected); } +TEST_F(StringsReplaceRegexTest, SpecialNewLines) +{ + auto input = cudf::test::strings_column_wrapper({"zzé" NEXT_LINE "qqq" NEXT_LINE "zzé", + "qqq" NEXT_LINE "zzé" NEXT_LINE "lll", + "zzé", + "", + "zzé" PARAGRAPH_SEPARATOR, + "abc\rzzé\r"}); + auto view = cudf::strings_column_view(input); + auto repl = cudf::string_scalar("_"); + auto pattern = std::string("^zzé$"); + auto prog = + cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::EXT_NEWLINE); + auto results = cudf::strings::replace_re(view, *prog, repl); + auto expected = cudf::test::strings_column_wrapper({"zzé" NEXT_LINE "qqq" NEXT_LINE "zzé", + "qqq" NEXT_LINE "zzé" NEXT_LINE "lll", + "_", + "", + "_" PARAGRAPH_SEPARATOR, + "abc\rzzé\r"}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + + auto both_flags = static_cast( + cudf::strings::regex_flags::EXT_NEWLINE | cudf::strings::regex_flags::MULTILINE); + auto prog_ml = cudf::strings::regex_program::create(pattern, both_flags); + results = cudf::strings::replace_re(view, *prog_ml, repl); + expected = cudf::test::strings_column_wrapper({"_" NEXT_LINE "qqq" NEXT_LINE "_", + "qqq" NEXT_LINE "_" NEXT_LINE "lll", + "_", + "", + "_" PARAGRAPH_SEPARATOR, + "abc\r_\r"}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); + + auto repl_template = std::string("[\\1]"); + pattern = std::string("(^zzé$)"); + prog = cudf::strings::regex_program::create(pattern, both_flags); + results = cudf::strings::replace_with_backrefs(view, *prog, repl_template); + expected = cudf::test::strings_column_wrapper({"[zzé]" NEXT_LINE "qqq" NEXT_LINE "[zzé]", + "qqq" NEXT_LINE "[zzé]" NEXT_LINE "lll", + "[zzé]", + "", + "[zzé]" PARAGRAPH_SEPARATOR, + "abc\r[zzé]\r"}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected); +} + TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexTest) { std::vector h_strings{"the quick brown fox jumps over the lazy dog", diff --git a/cpp/tests/strings/slice_tests.cpp b/cpp/tests/strings/slice_tests.cpp index 52e439bd93f..7f7fd9d521b 100644 --- a/cpp/tests/strings/slice_tests.cpp +++ b/cpp/tests/strings/slice_tests.cpp @@ -268,6 +268,25 @@ TEST_F(StringsSliceTest, MaxPositions) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); } +TEST_F(StringsSliceTest, MultiByteChars) +{ + auto input = cudf::test::strings_column_wrapper({ + // clang-format off + "quick brown fox jumped over the lazy brown dog; the fat cats jump in place without moving " + "the following code snippet demonstrates how to use search for values in an ordered range " + // this placement tests proper multi-byte chars handling ------vvvvv + "it returns the last position where value could be inserted without the ééééé ordering ", + "algorithms execution is parallelized as determined by an execution policy; this is a 12345" + "continuation of previous row to make sure string boundaries are honored 012345678901234567" + // v--- this one also + "01234567890é34567890012345678901234567890" + // clang-format on + }); + + auto results = cudf::strings::slice_strings(cudf::strings_column_view(input), 0); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, input); +} + TEST_F(StringsSliceTest, Error) { cudf::test::strings_column_wrapper strings{"this string intentionally left blank"}; diff --git a/cpp/src/interop/detail/arrow_allocator.hpp b/cpp/tests/strings/special_chars.h similarity index 54% rename from cpp/src/interop/detail/arrow_allocator.hpp rename to cpp/tests/strings/special_chars.h index 75c1baa0dca..0d630f6bb52 100644 --- a/cpp/src/interop/detail/arrow_allocator.hpp +++ b/cpp/tests/strings/special_chars.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,19 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #pragma once -#include - -namespace cudf { -namespace detail { - -// unique_ptr because that is what AllocateBuffer returns -std::unique_ptr allocate_arrow_buffer(int64_t const size, arrow::MemoryPool* ar_mr); +namespace cudf::test { -// shared_ptr because that is what AllocateBitmap returns -std::shared_ptr allocate_arrow_bitmap(int64_t const size, arrow::MemoryPool* ar_mr); +// special new-line characters for use with regex_flags::EXT_NEWLINE +#define NEXT_LINE "\xC2\x85" +#define LINE_SEPARATOR "\xE2\x80\xA8" +#define PARAGRAPH_SEPARATOR "\xE2\x80\xA9" -} // namespace detail -} // namespace cudf +} // namespace cudf::test diff --git a/cpp/tests/strings/split_tests.cpp b/cpp/tests/strings/split_tests.cpp index 4c020cb4c29..7ece08b19f2 100644 --- a/cpp/tests/strings/split_tests.cpp +++ b/cpp/tests/strings/split_tests.cpp @@ -307,24 +307,46 @@ TEST_F(StringsSplitTest, SplitRecordWhitespaceWithMaxSplit) CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); } -TEST_F(StringsSplitTest, SplitRecordAllEmpty) +TEST_F(StringsSplitTest, SplitAllEmpty) { auto input = cudf::test::strings_column_wrapper({"", "", "", ""}); auto sv = cudf::strings_column_view(input); + auto empty = cudf::string_scalar(""); auto delimiter = cudf::string_scalar("s"); + + auto result = cudf::strings::split(sv, delimiter); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input); + result = cudf::strings::rsplit(sv, delimiter); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input); + + // whitespace hits a special case where nothing matches returns an all-null column + auto expected = cudf::test::strings_column_wrapper({"", "", "", ""}, {0, 0, 0, 0}); + result = cudf::strings::split(sv, empty); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected); + result = cudf::strings::rsplit(sv, empty); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), expected); +} + +TEST_F(StringsSplitTest, SplitRecordAllEmpty) +{ + auto input = cudf::test::strings_column_wrapper({"", "", "", ""}); + auto sv = cudf::strings_column_view(input); auto empty = cudf::string_scalar(""); + auto delimiter = cudf::string_scalar("s"); using LCW = cudf::test::lists_column_wrapper; - LCW expected({LCW{}, LCW{}, LCW{}, LCW{}}); + LCW expected({LCW{""}, LCW{""}, LCW{""}, LCW{""}}); + LCW expected_empty({LCW{}, LCW{}, LCW{}, LCW{}}); + auto result = cudf::strings::split_record(sv, delimiter); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); result = cudf::strings::split_record(sv, empty); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected_empty); result = cudf::strings::rsplit_record(sv, delimiter); CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); result = cudf::strings::rsplit_record(sv, empty); - CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected_empty); } TEST_F(StringsSplitTest, MultiByteDelimiters) @@ -575,6 +597,23 @@ TEST_F(StringsSplitTest, SplitRegexWordBoundary) } } +TEST_F(StringsSplitTest, SplitRegexAllEmpty) +{ + auto input = cudf::test::strings_column_wrapper({"", "", "", ""}); + auto sv = cudf::strings_column_view(input); + auto prog = cudf::strings::regex_program::create("[ _]"); + + auto result = cudf::strings::split_re(sv, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input); + result = cudf::strings::rsplit_re(sv, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view().column(0), input); + + auto rec_result = cudf::strings::split_record_re(sv, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), input); + rec_result = cudf::strings::rsplit_record_re(sv, *prog); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view().column(0), input); +} + TEST_F(StringsSplitTest, RSplitRecord) { std::vector h_strings{ diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp index df005dfa1dc..f0010fc1ed9 100644 --- a/cpp/tests/structs/structs_column_tests.cpp +++ b/cpp/tests/structs/structs_column_tests.cpp @@ -448,12 +448,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList) cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); auto [null_mask, null_count] = detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5); - auto list_of_struct_of_list = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), - std::move(struct_of_lists_col), - null_count, - std::move(null_mask)); + auto list_of_struct_of_list = + cudf::make_lists_column(5, + fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release(), + std::move(struct_of_lists_col), + null_count, + std::move(null_mask)); // Compare with expected values. @@ -468,12 +468,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, ListOfStructOfList) std::tie(null_mask, null_count) = detail::make_null_mask(list_of_struct_of_list_validity, list_of_struct_of_list_validity + 5); - auto expected_level3_list = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0, 0, 2, 4, 4, 6}.release()), - std::move(expected_level2_struct), - null_count, - std::move(null_mask)); + auto expected_level3_list = + cudf::make_lists_column(5, + fixed_width_column_wrapper{0, 0, 2, 4, 4, 6}.release(), + std::move(expected_level2_struct), + null_count, + std::move(null_mask)); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*list_of_struct_of_list, *expected_level3_list); } @@ -498,12 +498,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 3; }); auto [null_mask, null_count] = detail::make_null_mask(list_validity, list_validity + 5); - auto lists_col = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), - std::move(structs_col), - null_count, - std::move(null_mask)); + auto lists_col = + cudf::make_lists_column(5, + fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release(), + std::move(structs_col), + null_count, + std::move(null_mask)); std::vector> cols; cols.push_back(std::move(lists_col)); @@ -519,12 +519,12 @@ TYPED_TEST(TypedStructColumnWrapperTest, StructOfListOfStruct) std::tie(null_mask, null_count) = detail::make_null_mask(list_validity, list_validity + 5); - auto expected_lists_col = cudf::make_lists_column( - 5, - std::move(fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release()), - std::move(expected_structs_col), - null_count, - std::move(null_mask)); + auto expected_lists_col = + cudf::make_lists_column(5, + fixed_width_column_wrapper{0, 2, 4, 6, 8, 10}.release(), + std::move(expected_structs_col), + null_count, + std::move(null_mask)); // Test that the lists child column is as expected. CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected_lists_col, struct_of_list_of_struct->child(0)); diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp index e5ff700a242..c33eedf9bd9 100644 --- a/cpp/tests/structs/utilities_tests.cpp +++ b/cpp/tests/structs/utilities_tests.cpp @@ -30,6 +30,7 @@ #include #include #include +#include template using nums = cudf::test::fixed_width_column_wrapper; @@ -60,7 +61,7 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns()); } @@ -82,7 +83,7 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(table, flattened_table->flattened_columns()); } @@ -114,7 +115,7 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -147,7 +148,7 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -196,7 +197,7 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -246,7 +247,7 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -297,7 +298,7 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -348,7 +349,7 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels) {}, cudf::structs::detail::column_nullability::FORCE, cudf::get_default_stream(), - rmm::mr::get_current_device_resource()); + cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_TABLES_EQUAL(expected, flattened_table->flattened_columns()); } @@ -363,7 +364,7 @@ void test_non_struct_columns(cudf::column_view const& input) { // push_down_nulls() on non-struct columns should return the input column, unchanged. auto [superimposed, backing_data] = cudf::structs::detail::push_down_nulls( - input, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + input, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); CUDF_TEST_EXPECT_COLUMNS_EQUAL(input, superimposed); EXPECT_TRUE(backing_data.new_null_masks.empty()); @@ -427,7 +428,7 @@ TYPED_TEST(TypedSuperimposeTest, BasicStruct) make_lists_member(cudf::test::iterators::nulls_at({4, 5}))); auto [output, backing_data] = cudf::structs::detail::push_down_nulls( - structs_view, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + structs_view, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); // After push_down_nulls(), the struct nulls (i.e. at index-0) should have been pushed // down to the children. All members should have nulls at row-index 0. @@ -453,7 +454,7 @@ TYPED_TEST(TypedSuperimposeTest, NonNullableParentStruct) .release(); auto [output, backing_data] = cudf::structs::detail::push_down_nulls( - structs_input->view(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + structs_input->view(), cudf::get_default_stream(), cudf::get_current_device_resource_ref()); // After push_down_nulls(), none of the child structs should have changed, // because the parent had no nulls to begin with. @@ -487,8 +488,10 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNonNullable) auto structs_of_structs = cudf::test::structs_column_wrapper{std::move(outer_struct_members)}.release(); - auto [output, backing_data] = cudf::structs::detail::push_down_nulls( - structs_of_structs->view(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto [output, backing_data] = + cudf::structs::detail::push_down_nulls(structs_of_structs->view(), + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); // After push_down_nulls(), outer-struct column should not have pushed nulls to child // structs. But the child struct column must push its nulls to its own children. @@ -530,8 +533,10 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNullable) cudf::detail::set_null_mask( structs_of_structs_view.null_mask(), 1, 2, false, cudf::get_default_stream()); - auto [output, backing_data] = cudf::structs::detail::push_down_nulls( - structs_of_structs->view(), cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + auto [output, backing_data] = + cudf::structs::detail::push_down_nulls(structs_of_structs->view(), + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); // After push_down_nulls(), outer-struct column should not have pushed nulls to child // structs. But the child struct column must push its nulls to its own children. @@ -587,7 +592,7 @@ TYPED_TEST(TypedSuperimposeTest, Struct_Sliced) // lists_member: 00111 auto [output, backing_data] = cudf::structs::detail::push_down_nulls( - sliced_structs, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + sliced_structs, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); // After push_down_nulls(), the null masks should be: // STRUCT: 11110 @@ -640,7 +645,7 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced) // lists_member: 00110 auto [output, backing_data] = cudf::structs::detail::push_down_nulls( - sliced_structs, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + sliced_structs, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); // After push_down_nulls(), the null masks will be: // STRUCT: 11101 diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu index 77b3c6c475c..a393c655fbb 100644 --- a/cpp/tests/table/table_view_tests.cu +++ b/cpp/tests/table/table_view_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -47,7 +48,7 @@ void row_comparison(cudf::table_view input1, auto device_table_1 = cudf::table_device_view::create(input1, stream); auto device_table_2 = cudf::table_device_view::create(input2, stream); auto d_column_order = cudf::detail::make_device_uvector_sync( - column_order, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + column_order, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto comparator = cudf::row_lexicographic_comparator( cudf::nullate::NO{}, *device_table_1, *device_table_2, d_column_order.data()); diff --git a/cpp/tests/text/minhash_tests.cpp b/cpp/tests/text/minhash_tests.cpp index 7575a3ba846..e23f3f6e7d8 100644 --- a/cpp/tests/text/minhash_tests.cpp +++ b/cpp/tests/text/minhash_tests.cpp @@ -139,6 +139,41 @@ TEST_F(MinHashTest, MultiSeedWithNullInputRow) CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results64, expected64); } +TEST_F(MinHashTest, WordsMinHash) +{ + using LCWS = cudf::test::lists_column_wrapper; + auto validity = cudf::test::iterators::null_at(1); + + LCWS input( + {LCWS({"hello", "abcdéfgh"}), + LCWS{}, + LCWS({"rapids", "moré", "test", "text"}), + LCWS({"The", "quick", "brown", "fox", "jumpéd", "over", "the", "lazy", "brown", "dog"})}, + validity); + + auto view = cudf::lists_column_view(input); + + auto seeds = cudf::test::fixed_width_column_wrapper({1, 2}); + auto results = nvtext::word_minhash(view, cudf::column_view(seeds)); + using LCW32 = cudf::test::lists_column_wrapper; + LCW32 expected({LCW32{2069617641u, 1975382903u}, + LCW32{}, + LCW32{657297235u, 1010955999u}, + LCW32{644643885u, 310002789u}}, + validity); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + auto seeds64 = cudf::test::fixed_width_column_wrapper({11, 22}); + auto results64 = nvtext::word_minhash64(view, cudf::column_view(seeds64)); + using LCW64 = cudf::test::lists_column_wrapper; + LCW64 expected64({LCW64{1940333969930105370ul, 272615362982418219ul}, + LCW64{}, + LCW64{5331949571924938590ul, 2088583894581919741ul}, + LCW64{3400468157617183341ul, 2398577492366130055ul}}, + validity); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results64, expected64); +} + TEST_F(MinHashTest, EmptyTest) { auto input = cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING}); diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu index 21e56de4621..f18e9afc09c 100644 --- a/cpp/tests/types/type_dispatcher_test.cu +++ b/cpp/tests/types/type_dispatcher_test.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -70,7 +71,7 @@ CUDF_KERNEL void dispatch_test_kernel(cudf::type_id id, bool* d_result) TYPED_TEST(TypedDispatcherTest, DeviceDispatch) { auto result = cudf::detail::make_zeroed_device_uvector_sync( - 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + 1, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); @@ -131,7 +132,7 @@ CUDF_KERNEL void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch) { auto result = cudf::detail::make_zeroed_device_uvector_sync( - 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + 1, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); double_dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>( cudf::type_to_id(), cudf::type_to_id(), result.data()); CUDF_CUDA_TRY(cudaDeviceSynchronize()); diff --git a/cpp/tests/utilities/tdigest_utilities.cu b/cpp/tests/utilities/tdigest_utilities.cu index ec3ea0d9a83..233a307cde4 100644 --- a/cpp/tests/utilities/tdigest_utilities.cu +++ b/cpp/tests/utilities/tdigest_utilities.cu @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -65,11 +66,11 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv, } auto d_expected_src = cudf::detail::make_device_uvector_async( - h_expected_src, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + h_expected_src, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_expected_mean = cudf::detail::make_device_uvector_async( - h_expected_mean, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + h_expected_mean, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_expected_weight = cudf::detail::make_device_uvector_async( - h_expected_weight, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + h_expected_weight, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto iter = thrust::make_counting_iterator(0); thrust::for_each( diff --git a/cpp/tests/utilities_tests/batched_memset_tests.cu b/cpp/tests/utilities_tests/batched_memset_tests.cu new file mode 100644 index 00000000000..bed0f40d70e --- /dev/null +++ b/cpp/tests/utilities_tests/batched_memset_tests.cu @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +template +struct MultiBufferTestIntegral : public cudf::test::BaseFixture {}; + +TEST(MultiBufferTestIntegral, BasicTest1) +{ + std::vector const BUF_SIZES{ + 50000, 4, 1000, 0, 250000, 1, 100, 8000, 0, 1, 100, 1000, 10000, 100000, 0, 1, 100000}; + + // Device init + auto stream = cudf::get_default_stream(); + auto mr = cudf::get_current_device_resource_ref(); + + // Creating base vector for data and setting it to all 0xFF + std::vector> expected; + std::transform(BUF_SIZES.begin(), BUF_SIZES.end(), std::back_inserter(expected), [](auto size) { + return std::vector(size + 2000, std::numeric_limits::max()); + }); + + // set buffer region to other value + std::for_each(thrust::make_zip_iterator(thrust::make_tuple(expected.begin(), BUF_SIZES.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected.end(), BUF_SIZES.end())), + [](auto elem) { + std::fill_n( + thrust::get<0>(elem).begin() + 1000, thrust::get<1>(elem), 0xEEEEEEEEEEEEEEEE); + }); + + // Copy host vector data to device + std::vector> device_bufs; + std::transform(expected.begin(), + expected.end(), + std::back_inserter(device_bufs), + [stream, mr](auto const& vec) { + return cudf::detail::make_device_uvector_async(vec, stream, mr); + }); + + // Initialize device buffers for memset + std::vector> memset_bufs; + std::transform( + thrust::make_zip_iterator(thrust::make_tuple(device_bufs.begin(), BUF_SIZES.begin())), + thrust::make_zip_iterator(thrust::make_tuple(device_bufs.end(), BUF_SIZES.end())), + std::back_inserter(memset_bufs), + [](auto const& elem) { + return cudf::device_span(thrust::get<0>(elem).data() + 1000, thrust::get<1>(elem)); + }); + + // Function Call + cudf::io::detail::batched_memset(memset_bufs, uint64_t{0}, stream); + + // Set all buffer regions to 0 for expected comparison + std::for_each( + thrust::make_zip_iterator(thrust::make_tuple(expected.begin(), BUF_SIZES.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected.end(), BUF_SIZES.end())), + [](auto elem) { std::fill_n(thrust::get<0>(elem).begin() + 1000, thrust::get<1>(elem), 0UL); }); + + // Compare to see that only given buffers are zeroed out + std::for_each( + thrust::make_zip_iterator(thrust::make_tuple(device_bufs.begin(), expected.begin())), + thrust::make_zip_iterator(thrust::make_tuple(device_bufs.end(), expected.end())), + [stream](auto const& elem) { + auto after_memset = cudf::detail::make_std_vector_async(thrust::get<0>(elem), stream); + EXPECT_TRUE( + std::equal(thrust::get<1>(elem).begin(), thrust::get<1>(elem).end(), after_memset.begin())); + }); +} diff --git a/cpp/tests/utilities_tests/pinned_memory_tests.cpp b/cpp/tests/utilities_tests/pinned_memory_tests.cpp index 93259fd63ee..ae7c6fa8b8c 100644 --- a/cpp/tests/utilities_tests/pinned_memory_tests.cpp +++ b/cpp/tests/utilities_tests/pinned_memory_tests.cpp @@ -25,7 +25,6 @@ #include #include -#include class PinnedMemoryTest : public cudf::test::BaseFixture { size_t prev_copy_threshold; diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu index 30496728083..019d6adc007 100644 --- a/cpp/tests/utilities_tests/span_tests.cu +++ b/cpp/tests/utilities_tests/span_tests.cu @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -253,7 +254,7 @@ CUDF_KERNEL void simple_device_kernel(device_span result) { result[0] = tr TEST(SpanTest, CanUseDeviceSpan) { auto d_message = cudf::detail::make_zeroed_device_uvector_async( - 1, cudf::get_default_stream(), rmm::mr::get_current_device_resource()); + 1, cudf::get_default_stream(), cudf::get_current_device_resource_ref()); auto d_span = device_span(d_message.data(), d_message.size()); diff --git a/dependencies.yaml b/dependencies.yaml index a3f3e8bb959..6909eb7168d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -10,25 +10,31 @@ files: - build_all - build_cpp - build_python_common - - build_python_cudf - cuda - cuda_version + - depends_on_cupy + - depends_on_libkvikio + - depends_on_librmm + - depends_on_rmm - develop - docs - - libarrow_build - notebooks - py_version + - pyarrow_run - rapids_build_skbuild - rapids_build_setuptools - run_common - run_cudf + - run_cudf_polars + - run_pylibcudf - run_dask_cudf - run_custreamz - test_cpp - test_python_common - test_python_cudf - test_python_dask_cudf - - depends_on_cupy + - test_python_pylibcudf + - test_python_cudf_pandas test_static_build: output: none includes: @@ -37,16 +43,28 @@ files: output: none includes: - cuda_version - - libarrow_run - test_cpp - test_python: + test_python_cudf_pandas: output: none includes: - cuda_version - py_version - - pyarrow_run - test_python_common - test_python_cudf + - test_python_cudf_pandas + test_python_cudf: + output: none + includes: + - cuda_version + - py_version + - test_python_common + - test_python_cudf + test_python_other: + output: none + includes: + - cuda_version + - py_version + - test_python_common - test_python_dask_cudf test_java: output: none @@ -55,7 +73,6 @@ files: - build_all - cuda - cuda_version - - libarrow_run - test_java test_notebooks: output: none @@ -74,16 +91,15 @@ files: - cuda - cuda_version - docs - - libarrow_run - py_version - py_rapids_build_cudf: + py_build_cudf: output: pyproject pyproject_dir: python/cudf extras: table: build-system includes: - rapids_build_skbuild - py_build_cudf: + py_rapids_build_cudf: output: pyproject pyproject_dir: python/cudf extras: @@ -92,7 +108,10 @@ files: includes: - build_base - build_python_common - - build_python_cudf + - depends_on_pylibcudf + - depends_on_libcudf + - depends_on_librmm + - depends_on_rmm py_run_cudf: output: pyproject pyproject_dir: python/cudf @@ -103,6 +122,9 @@ files: - run_cudf - pyarrow_run - depends_on_cupy + - depends_on_libcudf + - depends_on_pylibcudf + - depends_on_rmm py_test_cudf: output: pyproject pyproject_dir: python/cudf @@ -112,6 +134,62 @@ files: includes: - test_python_common - test_python_cudf + py_build_libcudf: + output: pyproject + pyproject_dir: python/libcudf + extras: + table: build-system + includes: + - rapids_build_skbuild + py_rapids_build_libcudf: + output: pyproject + pyproject_dir: python/libcudf + extras: + table: tool.rapids-build-backend + key: requires + includes: + - build_base + - build_cpp + - depends_on_libkvikio + - depends_on_librmm + py_build_pylibcudf: + output: pyproject + pyproject_dir: python/pylibcudf + extras: + table: build-system + includes: + - rapids_build_skbuild + py_rapids_build_pylibcudf: + output: pyproject + pyproject_dir: python/pylibcudf + extras: + table: tool.rapids-build-backend + key: requires + includes: + - build_base + - build_python_common + - depends_on_libcudf + - depends_on_librmm + - depends_on_rmm + py_run_pylibcudf: + output: pyproject + pyproject_dir: python/pylibcudf + extras: + table: project + includes: + - depends_on_libcudf + - depends_on_rmm + - pyarrow_run + - run_pylibcudf + py_test_pylibcudf: + output: pyproject + pyproject_dir: python/pylibcudf + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common + - test_python_pylibcudf py_test_pandas_cudf: output: pyproject pyproject_dir: python/cudf @@ -142,7 +220,7 @@ files: table: project includes: - run_cudf_polars - - depends_on_cudf + - depends_on_pylibcudf py_test_cudf_polars: output: pyproject pyproject_dir: python/cudf_polars @@ -177,14 +255,14 @@ files: includes: - test_python_common - test_python_dask_cudf - py_rapids_build_cudf_kafka: + py_build_cudf_kafka: output: pyproject pyproject_dir: python/cudf_kafka extras: table: build-system includes: - rapids_build_skbuild - py_build_cudf_kafka: + py_rapids_build_cudf_kafka: output: pyproject pyproject_dir: python/cudf_kafka extras: @@ -286,14 +364,12 @@ dependencies: common: - output_types: conda packages: - - fmt>=10.1.1,<11 + - fmt>=11.0.2,<12 - flatbuffers==24.3.25 - - librmm==24.8.* - - libkvikio==24.8.* - - librdkafka>=1.9.0,<1.10.0a0 + - librdkafka>=2.5.0,<2.6.0a0 # Align nvcomp version with rapids-cmake - - nvcomp==3.0.6 - - spdlog>=1.12.0,<1.13 + - nvcomp==4.0.1 + - spdlog>=1.14.1,<1.15 rapids_build_skbuild: common: - output_types: [conda, requirements, pyproject] @@ -301,10 +377,10 @@ dependencies: - &rapids_build_backend rapids-build-backend>=0.3.0,<0.4.0.dev0 - output_types: conda packages: - - scikit-build-core>=0.7.0 + - scikit-build-core>=0.10.0 - output_types: [requirements, pyproject] packages: - - scikit-build-core[pyproject]>=0.7.0 + - scikit-build-core[pyproject]>=0.10.0 rapids_build_setuptools: common: - output_types: [requirements, pyproject] @@ -317,69 +393,11 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - cython>=3.0.3 - # Hard pin the patch version used during the build. This must be kept - # in sync with the version pinned in get_arrow.cmake. - - pyarrow==16.1.0.* - - output_types: pyproject - packages: - # Hard pin the patch version used during the build. - # Sync with conda build constraint & wheel run constraint. - # TODO: Change to `2.0.*` for NumPy 2 - - numpy==1.23.* - build_python_cudf: - common: - - output_types: conda - packages: - - &rmm_unsuffixed rmm==24.8.* - - pip - - pip: - - git+https://github.com/python-streamz/streamz.git@master - - output_types: requirements - packages: - # pip recognizes the index as a global option for the requirements.txt file - # This index is needed for rmm-cu{11,12}. - - --extra-index-url=https://pypi.nvidia.com - - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple - - git+https://github.com/python-streamz/streamz.git@master - specific: - - output_types: [requirements, pyproject] - matrices: - - matrix: - cuda: "12.*" - cuda_suffixed: "true" - packages: - - rmm-cu12==24.8.* - - matrix: - cuda: "11.*" - cuda_suffixed: "true" - packages: - - rmm-cu11==24.8.* - - {matrix: null, packages: [*rmm_unsuffixed]} - libarrow_build: - common: - - output_types: conda - packages: - # Hard pin the Arrow patch version used during the build. This must - # be kept in sync with the version pinned in get_arrow.cmake. - - libarrow-acero==16.1.0.* - - libarrow-dataset==16.1.0.* - - libarrow==16.1.0.* - - libparquet==16.1.0.* - libarrow_run: - common: - - output_types: conda - packages: - # Allow runtime version to float up to patch version - - libarrow-acero>=16.1.0,<16.2.0a0 - - libarrow-dataset>=16.1.0,<16.2.0a0 - - libarrow>=16.1.0,<16.2.0a0 - - libparquet>=16.1.0,<16.2.0a0 pyarrow_run: common: - output_types: [conda, requirements, pyproject] packages: - # Allow runtime version to float up to patch version - - pyarrow>=16.1.0,<16.2.0a0 + - pyarrow>=14.0.0,<18.0.0a0 cuda_version: specific: - output_types: conda @@ -510,7 +528,7 @@ dependencies: - output_types: [conda] packages: - breathe>=4.35.0 - - dask-cuda==24.8.* + - dask-cuda==24.10.*,>=0.0.0a0 - *doxygen - make - myst-nb @@ -537,10 +555,6 @@ dependencies: specific: - output_types: conda matrices: - - matrix: - py: "3.9" - packages: - - python=3.9 - matrix: py: "3.10" packages: @@ -549,17 +563,43 @@ dependencies: py: "3.11" packages: - python=3.11 + - matrix: + py: "3.12" + packages: + - python=3.12 - matrix: packages: - - python>=3.9,<3.12 + - python>=3.10,<3.13 run_common: common: - output_types: [conda, requirements, pyproject] packages: - fsspec>=0.6.0 - # TODO: Update `numpy` in `build_python_common` when dropping `<2.0a0` - - numpy>=1.23,<2.0a0 + - &numpy numpy>=1.23,<3.0a0 - pandas>=2.0,<2.2.3dev0 + run_pylibcudf: + common: + - output_types: [conda, requirements, pyproject] + packages: + - nvtx>=0.2.1 + - packaging + - typing_extensions>=4.0.0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for rmm. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [conda, requirements, pyproject] + matrices: + - matrix: {cuda: "12.*"} + packages: + - cuda-python>=12.0,<13.0a0 + - matrix: {cuda: "11.*"} + packages: &run_pylibcudf_packages_all_cu11 + - cuda-python>=11.7.1,<12.0a0 + - {matrix: null, packages: *run_pylibcudf_packages_all_cu11} run_cudf: common: - output_types: [conda, requirements, pyproject] @@ -570,9 +610,6 @@ dependencies: - packaging - rich - typing_extensions>=4.0.0 - - output_types: conda - packages: - - *rmm_unsuffixed - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -593,7 +630,7 @@ dependencies: matrices: - matrix: {cuda: "12.*"} packages: - - &pynvjitlink_unsuffixed pynvjitlink + - &pynvjitlink_unsuffixed pynvjitlink>=0.0.0a0 - matrix: {cuda: "11.*"} packages: - &cubinlinker_unsuffixed cubinlinker @@ -604,19 +641,16 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - rmm-cu12==24.8.* - - pynvjitlink-cu12 + - pynvjitlink-cu12>=0.0.0a0 - matrix: cuda: "12.*" cuda_suffixed: "false" packages: - - *rmm_unsuffixed - *pynvjitlink_unsuffixed - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - rmm-cu11==24.8.* - cubinlinker-cu11 - ptxcompiler-cu11 - matrix: @@ -625,29 +659,28 @@ dependencies: packages: &run_cudf_cu11_unsuffixed - *cubinlinker_unsuffixed - *ptxcompiler_unsuffixed - - *rmm_unsuffixed - {matrix: null, packages: *run_cudf_cu11_unsuffixed} run_cudf_polars: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.6 + - polars>=1.8,<1.9 run_dask_cudf: common: - output_types: [conda, requirements, pyproject] packages: - - rapids-dask-dependency==24.8.* + - rapids-dask-dependency==24.10.*,>=0.0.0a0 run_custreamz: common: - output_types: conda packages: - - python-confluent-kafka>=1.9.0,<1.10.0a0 + - python-confluent-kafka>=2.5.0,<2.6.0a0 - output_types: [conda, requirements, pyproject] packages: - streamz - output_types: [requirements, pyproject] packages: - - confluent-kafka>=1.9.0,<1.10.0a0 + - confluent-kafka>=2.5.0,<2.6.0a0 test_cpp: common: - output_types: conda @@ -673,6 +706,7 @@ dependencies: - *cmake_ver - maven - openjdk=8.* + - boost test_python_common: common: - output_types: [conda, requirements, pyproject] @@ -680,6 +714,43 @@ dependencies: - pytest<8 - pytest-cov - pytest-xdist + specific: + # Define additional constraints for testing with oldest dependencies. + - output_types: [conda, requirements] + matrices: + - matrix: {dependencies: "oldest"} + packages: + - numba==0.57.* + - pandas==2.0.* + - matrix: + packages: + - output_types: conda + matrices: + - matrix: {dependencies: "oldest", arch: "aarch64", cuda: "12.*"} + packages: + - cupy==12.2.0 # cupy 12.2.0 is the earliest with CUDA 12 ARM packages. + - matrix: {dependencies: "oldest"} + packages: + - cupy==12.0.0 + - matrix: + packages: + - output_types: requirements + # Using --constraints for pip install, so we list cupy multiple times + matrices: + - matrix: {dependencies: "oldest"} + packages: + - cupy-cuda11x==12.0.0 + - cupy-cuda12x==12.0.0 + - matrix: + packages: + test_python_pylibcudf: + common: + - output_types: [conda, requirements, pyproject] + packages: + - fastavro>=0.22.9 + - hypothesis + - *numpy + - pandas test_python_cudf: common: - output_types: [conda, requirements, pyproject] @@ -705,6 +776,14 @@ dependencies: - &transformers transformers==4.39.3 - tzdata specific: + - output_types: [conda, requirements] + matrices: + - matrix: {dependencies: "oldest"} + packages: + - numpy==1.23.* + - pyarrow==14.0.0 + - matrix: + packages: - output_types: conda matrices: - matrix: @@ -722,13 +801,72 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - dask-cuda==24.8.* + - dask-cuda==24.10.*,>=0.0.0a0 - *numba + specific: + - output_types: [conda, requirements] + matrices: + - matrix: {dependencies: "oldest"} + packages: + - numpy==1.24.* + - pyarrow==14.0.1 + - matrix: + packages: + depends_on_libcudf: + common: + - output_types: conda + packages: + - &libcudf_unsuffixed libcudf==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for libcudf-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libcudf-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libcudf-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*libcudf_unsuffixed]} + depends_on_pylibcudf: + common: + - output_types: conda + packages: + - &pylibcudf_unsuffixed pylibcudf==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for rmm, cubinlinker, ptxcompiler. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - pylibcudf-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - pylibcudf-cu11==24.10.*,>=0.0.0a0 + - {matrix: null, packages: [*pylibcudf_unsuffixed]} depends_on_cudf: common: - output_types: conda packages: - - &cudf_unsuffixed cudf==24.8.* + - &cudf_unsuffixed cudf==24.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -742,18 +880,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf-cu12==24.8.* + - cudf-cu12==24.10.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf-cu11==24.8.* + - cudf-cu11==24.10.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_unsuffixed]} depends_on_cudf_kafka: common: - output_types: conda packages: - - &cudf_kafka_unsuffixed cudf_kafka==24.8.* + - &cudf_kafka_unsuffixed cudf_kafka==24.10.*,>=0.0.0a0 - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -767,12 +905,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - cudf_kafka-cu12==24.8.* + - cudf_kafka-cu12==24.10.*,>=0.0.0a0 - matrix: cuda: "11.*" cuda_suffixed: "true" packages: - - cudf_kafka-cu11==24.8.* + - cudf_kafka-cu11==24.10.*,>=0.0.0a0 - {matrix: null, packages: [*cudf_kafka_unsuffixed]} depends_on_cupy: common: @@ -789,6 +927,85 @@ dependencies: packages: &cupy_packages_cu11 - cupy-cuda11x>=12.0.0 - {matrix: null, packages: *cupy_packages_cu11} + depends_on_libkvikio: + common: + - output_types: conda + packages: + - &libkvikio_unsuffixed libkvikio==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libkvikio-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libkvikio-cu11==24.10.*,>=0.0.0a0 + - matrix: + packages: + - *libkvikio_unsuffixed + depends_on_librmm: + common: + - output_types: conda + packages: + - &librmm_unsuffixed librmm==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for librmm-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - librmm-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - librmm-cu11==24.10.*,>=0.0.0a0 + - matrix: + packages: + - *librmm_unsuffixed + depends_on_rmm: + common: + - output_types: conda + packages: + - &rmm_unsuffixed rmm==24.10.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for rmm-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - rmm-cu12==24.10.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - rmm-cu11==24.10.*,>=0.0.0a0 + - matrix: + packages: + - *rmm_unsuffixed test_python_pandas_cudf: common: - output_types: [requirements, pyproject] @@ -800,9 +1017,13 @@ dependencies: # installation issues with `psycopg2`. - pandas[test, pyarrow, performance, computation, fss, excel, parquet, feather, hdf5, spss, html, xml, plot, output-formatting, clipboard, compression] - pytest-reportlog + - ipython test_python_cudf_pandas: common: - - output_types: [requirements, pyproject] + - output_types: [conda, requirements, pyproject] packages: - ipython + - jupyter_client + - nbconvert + - nbformat - openpyxl diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index f544536fb31..95813907bf4 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -342,6 +342,7 @@ def clean_all_xml_files(path): "cudf.Series": ("cudf.core.series.Series", "cudf.Series"), "cudf.Index": ("cudf.core.index.Index", "cudf.Index"), "cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"), + "DeviceBuffer": ("rmm._lib.device_buffer.DeviceBuffer", "rmm.DeviceBuffer"), } @@ -383,6 +384,7 @@ def _generate_namespaces(namespaces): # Cython types that don't alias cleanly because of # https://github.com/cython/cython/issues/5609 "size_type", + "size_t", "type_id", # Unknown base types "int32_t", @@ -556,10 +558,24 @@ def on_missing_reference(app, env, node, contnode): ("py:class", "Dtype"), # The following are erroneously warned due to # https://github.com/sphinx-doc/sphinx/issues/11225 + ("py:obj", "cudf.DatetimeIndex.time"), + ("py:obj", "cudf.DatetimeIndex.date"), ("py:obj", "cudf.Index.values_host"), + ("py:obj", "cudf.Index.transpose"), + ("py:obj", "cudf.Index.T"), + ("py:obj", "cudf.Index.to_flat_index"), + ("py:obj", "cudf.MultiIndex.to_flat_index"), + ("py:meth", "pyarrow.Table.to_pandas"), + ("py:class", "pd.DataFrame"), + ("py:class", "pandas.core.indexes.frozen.FrozenList"), ("py:class", "pa.Array"), ("py:class", "ScalarLike"), ("py:class", "ParentType"), + ("py:class", "pyarrow.lib.DataType"), + ("py:class", "pyarrow.lib.Table"), + ("py:class", "pyarrow.lib.Scalar"), + ("py:class", "pyarrow.lib.ChunkedArray"), + ("py:class", "pyarrow.lib.Array"), ("py:class", "ColumnLike"), # TODO: Remove this when we figure out why typing_extensions doesn't seem # to map types correctly for intersphinx diff --git a/docs/cudf/source/cudf_pandas/faq.md b/docs/cudf/source/cudf_pandas/faq.md index cdf32216619..34b657488c1 100644 --- a/docs/cudf/source/cudf_pandas/faq.md +++ b/docs/cudf/source/cudf_pandas/faq.md @@ -32,7 +32,7 @@ pandas. You can learn more about these edge cases in We also run nightly tests that track interactions between `cudf.pandas` and other third party libraries. See -[Third-Party Library Compatibility](#does-it-work-with-third-party-libraries). +[Third-Party Library Compatibility](#does-cudf-pandas-work-with-third-party-libraries). ## How can I tell if `cudf.pandas` is active? @@ -69,7 +69,38 @@ performance, try to use only functionality that can run entirely on GPU. This helps reduce the number of memory transfers needed to fallback to CPU. -## Does it work with third-party libraries? +## How can I improve performance of my workflow with `cudf.pandas`? + +Most workflows will see significant performance improvements with +`cudf.pandas`. However, sometimes things can be slower than expected. +First, it's important to note that GPUs are good at parallel processing +of large amounts of data. Small data sizes may be slower on GPU than +CPU, because of the cost of data transfers. cuDF achieves the highest +performance with many rows of data. As a _very rough_ rule of thumb, +`cudf.pandas` shines on workflows with more than 10,000 - 100,000 rows +of data, depending on the algorithms, data types, and other factors. +Datasets that are several gigabytes in size and/or have millions of +rows are a great fit for `cudf.pandas`. + +Here are some more tips to improve workflow performance: + +- Reshape data so it is long rather than wide (more rows, fewer + columns). This improves cuDF's ability to execute in parallel on the + entire GPU! +- Avoid element-wise iteration and mutation. If you can, use pandas + functions to manipulate an entire column at once rather than writing + raw `for` loops that compute and assign. +- If your data is really an n-dimensional array with lots of columns + where you aim to do lots of math (like adding matrices), + [CuPy](https://cupy.dev/) or [NumPy](https://numpy.org/) may be a + better choice than pandas or `cudf.pandas`. Array libraries are built + for different use cases than DataFrame libraries, and will get optimal + performance from using contiguous memory for multidimensional array + storage. Use the `.values` method to convert a DataFrame or Series to + an array. + +(does-cudf-pandas-work-with-third-party-libraries)= +## Does `cudf.pandas` work with third-party libraries? `cudf.pandas` is tested with numerous popular third-party libraries. `cudf.pandas` will not only work but will accelerate pandas operations @@ -97,7 +128,7 @@ common interactions with the following Python libraries: Please review the section on [Known Limitations](#are-there-any-known-limitations) for details about what is expected not to work (and why). -## Can I use this with Dask or PySpark? +## Can I use `cudf.pandas` with Dask or PySpark? `cudf.pandas` is not designed for distributed or out-of-core computing (OOC) workflows today. If you are looking for accelerated OOC and @@ -111,6 +142,7 @@ cuDF (learn more in [this blog](https://medium.com/rapids-ai/easy-cpu-gpu-arrays-and-dataframes-run-your-dask-code-where-youd-like-e349d92351d)) and the [RAPIDS Accelerator for Apache Spark](https://nvidia.github.io/spark-rapids/) provides a similar configuration-based plugin for Spark. +(are-there-any-known-limitations)= ## Are there any known limitations? There are a few known limitations that you should be aware of: @@ -119,11 +151,6 @@ There are a few known limitations that you should be aware of: [value mutability](https://pandas.pydata.org/pandas-docs/stable/getting_started/overview.html#mutability-and-copying-of-data) of Pandas objects is not always guaranteed. You should follow the pandas recommendation to favor immutable operations. -- `cudf.pandas` can't currently interface smoothly with functions that - interact with objects using a C API (such as the Python or NumPy C - API) - - For example, you can write `torch.tensor(df.values)` but not - `torch.from_numpy(df.values)`, as the latter uses the NumPy C API - For performance reasons, joins and join-based operations are not currently implemented to maintain the same row ordering as standard pandas diff --git a/docs/cudf/source/cudf_pandas/usage.md b/docs/cudf/source/cudf_pandas/usage.md index 0398a8d7086..41838e01dd9 100644 --- a/docs/cudf/source/cudf_pandas/usage.md +++ b/docs/cudf/source/cudf_pandas/usage.md @@ -120,3 +120,23 @@ To profile a script being run from the command line, pass the ```bash python -m cudf.pandas --profile script.py ``` + +### cudf.pandas CLI Features + +Several of the ways to provide input to the `python` interpreter also work with `python -m cudf.pandas`, such as the REPL, the `-c` flag, and reading from stdin. + +Executing `python -m cudf.pandas` with no script name will enter a REPL (read-eval-print loop) similar to the behavior of the normal `python` interpreter. + +The `-c` flag accepts a code string to run, like this: + +```bash +$ python -m cudf.pandas -c "import pandas; print(pandas)" + +``` + +Users can also provide code to execute from stdin, like this: + +```bash +$ echo "import pandas; print(pandas)" | python -m cudf.pandas + +``` diff --git a/docs/cudf/source/cudf_polars/index.rst b/docs/cudf/source/cudf_polars/index.rst index cc7aabd124f..0a3a0d86b2c 100644 --- a/docs/cudf/source/cudf_polars/index.rst +++ b/docs/cudf/source/cudf_polars/index.rst @@ -1,7 +1,7 @@ -cuDF-based GPU backend for Polars [Open Beta] -============================================= +Polars GPU engine +================= -cuDF supports an in-memory, GPU-accelerated execution engine for Python users of the Polars Lazy API. +cuDF provides an in-memory, GPU-accelerated execution engine for Python users of the Polars Lazy API. The engine supports most of the core expressions and data types as well as a growing set of more advanced dataframe manipulations and data file formats. When using the GPU engine, Polars will convert expressions into an optimized query plan and determine whether the plan is supported on the GPU. If it is not, the execution will transparently fall back to the standard Polars engine @@ -16,7 +16,7 @@ We reproduced the `Polars Decision Support (PDS) `__ on the Polars website. +The GPU engine for Polars is now available in Open Beta and the engine is undergoing rapid development. To learn more, visit the `GPU Support page `__ on the Polars website. Launch on Google Colab ---------------------- @@ -38,4 +38,4 @@ Launch on Google Colab :width: 200px :target: https://colab.research.google.com/github/rapidsai-community/showcase/blob/main/accelerated_data_processing_examples/polars_gpu_engine_demo.ipynb - Take the cuDF backend for Polars for a test-drive in a free GPU-enabled notebook environment using your Google account by `launching on Colab `__. + Try out the GPU engine for Polars in a free GPU notebook environment. Sign in with your Google account and `launch the demo on Colab `__. diff --git a/docs/cudf/source/developer_guide/pylibcudf.md b/docs/cudf/source/developer_guide/pylibcudf.md index 0b881b2b057..39840e72e21 100644 --- a/docs/cudf/source/developer_guide/pylibcudf.md +++ b/docs/cudf/source/developer_guide/pylibcudf.md @@ -13,10 +13,8 @@ To satisfy the goals of pylibcudf, we impose the following set of design princip - Every public function or method should be `cpdef`ed. This allows it to be used in both Cython and Python code. This incurs some slight overhead over `cdef` functions, but we assume that this is acceptable because 1) the vast majority of users will be using pure Python rather than Cython, and 2) the overhead of a `cpdef` function over a `cdef` function is on the order of a nanosecond, while CUDA kernel launch overhead is on the order of a microsecond, so these function overheads should be washed out by typical usage of pylibcudf. - Every variable used should be strongly typed and either be a primitive type (int, float, etc) or a cdef class. Any enums in C++ should be mirrored using `cpdef enum`, which will create both a C-style enum in Cython and a PEP 435-style Python enum that will automatically be used in Python. - All typing in code should be written using Cython syntax, not PEP 484 Python typing syntax. Not only does this ensure compatibility with Cython < 3, but even with Cython 3 PEP 484 support remains incomplete as of this writing. -- All cudf code should interact only with pylibcudf, never with libcudf directly. -- All imports should be relative so that pylibcudf can be easily extracted from cudf later - - Exception: All imports of libcudf API bindings in `cudf._lib.cpp` should use absolute imports of `cudf._lib.cpp as libcudf`. We should convert the `cpp` directory into a proper package so that it can be imported as `libcudf` in that fashion. When moving pylibcudf into a separate package, it will be renamed to `libcudf` and only the imports will need to change. -- Ideally, pylibcudf should depend on nothing other than rmm and pyarrow. This will allow it to be extracted into a a largely standalone library and used in environments where the larger dependency tree of cudf may be cumbersome. +- All cudf code should interact only with pylibcudf, never with libcudf directly. This is not currently the case, but is the direction that the library is moving towards. +- Ideally, pylibcudf should depend on no RAPIDS component other than rmm, and should in general have minimal runtime dependencies. ## Relationship to libcudf @@ -112,6 +110,9 @@ Then, a corresponding pylibcudf fixture may be created using a simple `from_arro This approach ensures consistent global coverage across types for various tests. In general, pylibcudf tests should prefer validating against a corresponding pyarrow implementation rather than hardcoding data. +If there is no pyarrow implementation, another alternative is to write a pure Python implementation that loops over the values +of the Table/Column, if a scalar Python equivalent of the pylibcudf implementation exists (this is especially relevant for string methods). + This approach is more resilient to changes to input data, particularly given the fixture strategy outlined above. Standard tools for comparing between pylibcudf and pyarrow types are provided in the utils module. @@ -149,7 +150,7 @@ Some guidelines on what should be tested: - Exception: In special cases where constructing suitable large tests is difficult in C++ (such as creating suitable input data for I/O testing), tests may be added to pylibcudf instead. - Nullable data should always be tested. - Expected exceptions should be tested. Tests should be written from the user's perspective in mind, and if the API is not currently throwing the appropriate exception it should be updated. - - Important note: If the exception should be produced by libcudf, the underlying libcudf API should be updated to throw the desired exception in C++. Such changes may require consultation with libcudf devs in nontrivial cases. [This issue](https://github.com/rapidsai/cudf/issues/12885) provides an overview and an indication of acceptable exception types that should cover most use cases. In rare cases a new C++ exception may need to be introduced in [`error.hpp`](https://github.com/rapidsai/cudf/blob/branch-24.04/cpp/include/cudf/utilities/error.hpp). If so, this exception will also need to be mapped to a suitable Python exception in [`exception_handler.pxd`](https://github.com/rapidsai/cudf/blob/branch-24.04/python/cudf/cudf/_lib/exception_handler.pxd). + - Important note: If the exception should be produced by libcudf, the underlying libcudf API should be updated to throw the desired exception in C++. Such changes may require consultation with libcudf devs in nontrivial cases. [This issue](https://github.com/rapidsai/cudf/issues/12885) provides an overview and an indication of acceptable exception types that should cover most use cases. In rare cases a new C++ exception may need to be introduced in [`error.hpp`](https://github.com/rapidsai/cudf/blob/branch-24.04/cpp/include/cudf/utilities/error.hpp). If so, this exception will also need to be mapped to a suitable Python exception in `exception_handler.pxd`. Some guidelines on how best to use pytests. - By default, fixtures producing device data containers should be of module scope and treated as immutable by tests. Allocating data on the GPU is expensive and slows tests. Almost all pylibcudf operations are out of place operations, so module-scoped fixtures should not typically be problematic to work with. Session-scoped fixtures would also work, but they are harder to reason about since they live in a different module, and if they need to change for any reason they could affect an arbitrarily large number of tests. Module scope is a good balance. @@ -185,7 +186,7 @@ Here is an example of appropriate enum usage. ```cython -# cpp/copying.pxd +# pylibcudf/libcudf/copying.pxd cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: # cpdef here so that we export both a cdef enum class and a Python enum.Enum. cpdef enum class out_of_bounds_policy(bool): @@ -193,8 +194,9 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: DONT_CHECK -# cpp/copying.pyx -# This file is empty, but is required to compile the Python enum in cpp/copying.pxd +# pylibcudf/libcudf/copying.pyx +# This file is empty, but is required to compile the Python enum in pylibcudf/libcudf/copying.pxd +# Ensure this file is included in pylibcudf/libcudf/CMakeLists.txt # pylibcudf/copying.pxd @@ -202,21 +204,21 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil: # cimport the enum using the exact name # Once https://github.com/cython/cython/issues/5609 is resolved, # this import should instead be -# from cudf._lib.cpp.copying cimport out_of_bounds_policy as OutOfBoundsPolicy -from cudf._lib.cpp.copying cimport out_of_bounds_policy +# from pylibcudf.libcudf.copying cimport out_of_bounds_policy as OutOfBoundsPolicy +from pylibcudf.libcudf.copying cimport out_of_bounds_policy # pylibcudf/copying.pyx # Access cpp.copying members that aren't part of this module's public API via # this module alias -from cudf._lib.cpp cimport copying as cpp_copying -from cudf._lib.cpp.copying cimport out_of_bounds_policy +from pylibcudf.libcudf cimport copying as cpp_copying +from pylibcudf.libcudf.copying cimport out_of_bounds_policy # This import exposes the enum in the public API of this module. # It requires a no-cython-lint tag because it will be unused: all typing of # parameters etc will need to use the Cython name `out_of_bounds_policy` until # the Cython bug is resolved. -from cudf._lib.cpp.copying import \ +from pylibcudf.libcudf.copying import \ out_of_bounds_policy as OutOfBoundsPolicy # no-cython-lint ``` @@ -242,3 +244,8 @@ cpdef ColumnOrTable empty_like(ColumnOrTable input) [Cython supports specializing the contents of fused-type functions based on the argument types](https://cython.readthedocs.io/en/latest/src/userguide/fusedtypes.html#type-checking-specializations), so any type-specific logic may be encoded using the appropriate conditionals. See the pylibcudf source for examples of how to implement such functions. + +In the event that libcudf provides multiple overloads for the same function with differing numbers of arguments, specify the maximum number of arguments in the Cython definition, +and set arguments not shared between overloads to `None`. If a user tries to pass in an unsupported argument for a specific overload type, you should raise `ValueError`. + +Finally, consider making an libcudf issue if you think this inconsistency can be addressed on the libcudf side. diff --git a/docs/cudf/source/libcudf_docs/api_docs/index.rst b/docs/cudf/source/libcudf_docs/api_docs/index.rst index c077a7cd452..96ff0eb7850 100644 --- a/docs/cudf/source/libcudf_docs/api_docs/index.rst +++ b/docs/cudf/source/libcudf_docs/api_docs/index.rst @@ -7,6 +7,7 @@ libcudf documentation cudf_namespace default_stream + memory_resource cudf_classes column_apis datetime_apis diff --git a/docs/cudf/source/libcudf_docs/api_docs/memory_resource.rst b/docs/cudf/source/libcudf_docs/api_docs/memory_resource.rst new file mode 100644 index 00000000000..e32f8a9beb0 --- /dev/null +++ b/docs/cudf/source/libcudf_docs/api_docs/memory_resource.rst @@ -0,0 +1,5 @@ +Memory Resource Management +========================== + +.. doxygengroup:: memory_resource + :members: diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb index c3da2558db8..95f5f9734dd 100644 --- a/docs/cudf/source/user_guide/10min.ipynb +++ b/docs/cudf/source/user_guide/10min.ipynb @@ -5,9 +5,9 @@ "id": "4c6c548b", "metadata": {}, "source": [ - "# 10 Minutes to cuDF and Dask-cuDF\n", + "# 10 Minutes to cuDF and Dask cuDF\n", "\n", - "Modelled after 10 Minutes to Pandas, this is a short introduction to cuDF and Dask-cuDF, geared mainly towards new users.\n", + "Modelled after 10 Minutes to Pandas, this is a short introduction to cuDF and Dask cuDF, geared mainly towards new users.\n", "\n", "## What are these Libraries?\n", "\n", @@ -15,12 +15,17 @@ "\n", "[Dask](https://dask.org/) is a flexible library for parallel computing in Python that makes scaling out your workflow smooth and simple. On the CPU, Dask uses Pandas to execute operations in parallel on DataFrame partitions.\n", "\n", - "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed using cuDF GPU DataFrames instead of Pandas DataFrames. For instance, when you call `dask_cudf.read_csv(...)`, your cluster's GPUs do the work of parsing the CSV file(s) by calling [`cudf.read_csv()`](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.read_csv.html).\n", + "[Dask cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed using cuDF GPU DataFrames instead of Pandas DataFrames. For instance, when you call `dask_cudf.read_csv(...)`, your cluster's GPUs do the work of parsing the CSV file(s) by calling [`cudf.read_csv()`](https://docs.rapids.ai/api/cudf/stable/api_docs/api/cudf.read_csv.html).\n", "\n", "\n", - "## When to use cuDF and Dask-cuDF\n", + "
\n", + "Note: This notebook uses the explicit Dask cuDF API (dask_cudf) for clarity. However, we strongly recommend that you use Dask's configuration infrastructure to set the \"dataframe.backend\" option to \"cudf\", and work with the Dask DataFrame API directly. Please see the Dask cuDF documentation for more information.\n", + "
\n", "\n", - "If your workflow is fast enough on a single GPU or your data comfortably fits in memory on a single GPU, you would want to use cuDF. If you want to distribute your workflow across multiple GPUs, have more data than you can fit in memory on a single GPU, or want to analyze data spread across many files at once, you would want to use Dask-cuDF." + "\n", + "## When to use cuDF and Dask cuDF\n", + "\n", + "If your workflow is fast enough on a single GPU or your data comfortably fits in memory on a single GPU, you would want to use cuDF. If you want to distribute your workflow across multiple GPUs, have more data than you can fit in memory on a single GPU, or want to analyze data spread across many files at once, you would want to use Dask cuDF." ] }, { @@ -111,7 +116,7 @@ "source": [ "ds = dask_cudf.from_cudf(s, npartitions=2)\n", "# Note the call to head here to show the first few entries, unlike\n", - "# cuDF objects, dask-cuDF objects do not have a printing\n", + "# cuDF objects, Dask-cuDF objects do not have a printing\n", "# representation that shows values since they may not be in local\n", "# memory.\n", "ds.head(n=3)" @@ -327,11 +332,11 @@ "id": "b17db919", "metadata": {}, "source": [ - "Now we will convert our cuDF dataframe into a dask-cuDF equivalent. Here we call out a key difference: to inspect the data we must call a method (here `.head()` to look at the first few values). In the general case (see the end of this notebook), the data in `ddf` will be distributed across multiple GPUs.\n", + "Now we will convert our cuDF dataframe into a Dask-cuDF equivalent. Here we call out a key difference: to inspect the data we must call a method (here `.head()` to look at the first few values). In the general case (see the end of this notebook), the data in `ddf` will be distributed across multiple GPUs.\n", "\n", - "In this small case, we could call `ddf.compute()` to obtain a cuDF object from the dask-cuDF object. In general, we should avoid calling `.compute()` on large dataframes, and restrict ourselves to using it when we have some (relatively) small postprocessed result that we wish to inspect. Hence, throughout this notebook we will generally call `.head()` to inspect the first few values of a dask-cuDF dataframe, occasionally calling out places where we use `.compute()` and why.\n", + "In this small case, we could call `ddf.compute()` to obtain a cuDF object from the Dask-cuDF object. In general, we should avoid calling `.compute()` on large dataframes, and restrict ourselves to using it when we have some (relatively) small postprocessed result that we wish to inspect. Hence, throughout this notebook we will generally call `.head()` to inspect the first few values of a Dask-cuDF dataframe, occasionally calling out places where we use `.compute()` and why.\n", "\n", - "*To understand more of the differences between how cuDF and dask-cuDF behave here, visit the [10 Minutes to Dask](https://docs.dask.org/en/stable/10-minutes-to-dask.html) tutorial after this one.*" + "*To understand more of the differences between how cuDF and Dask cuDF behave here, visit the [10 Minutes to Dask](https://docs.dask.org/en/stable/10-minutes-to-dask.html) tutorial after this one.*" ] }, { @@ -1676,7 +1681,7 @@ "id": "7aa0089f", "metadata": {}, "source": [ - "Note here we call `compute()` rather than `head()` on the dask-cuDF dataframe since we are happy that the number of matching rows will be small (and hence it is reasonable to bring the entire result back)." + "Note here we call `compute()` rather than `head()` on the Dask-cuDF dataframe since we are happy that the number of matching rows will be small (and hence it is reasonable to bring the entire result back)." ] }, { @@ -2389,7 +2394,7 @@ "id": "f6094cbe", "metadata": {}, "source": [ - "Applying functions to a `Series`. Note that applying user defined functions directly with Dask-cuDF is not yet implemented. For now, you can use [map_partitions](http://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.map_partitions.html) to apply a function to each partition of the distributed dataframe." + "Applying functions to a `Series`. Note that applying user defined functions directly with Dask cuDF is not yet implemented. For now, you can use [map_partitions](http://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.map_partitions.html) to apply a function to each partition of the distributed dataframe." ] }, { @@ -3488,7 +3493,7 @@ "id": "5ac3b004", "metadata": {}, "source": [ - "Transposing a dataframe, using either the `transpose` method or `T` property. Currently, all columns must have the same type. Transposing is not currently implemented in Dask-cuDF." + "Transposing a dataframe, using either the `transpose` method or `T` property. Currently, all columns must have the same type. Transposing is not currently implemented in Dask cuDF." ] }, { @@ -4177,7 +4182,7 @@ "id": "aa8a445b", "metadata": {}, "source": [ - "To convert the first few entries to pandas, we similarly call `.head()` on the dask-cuDF dataframe to obtain a local cuDF dataframe, which we can then convert." + "To convert the first few entries to pandas, we similarly call `.head()` on the Dask-cuDF dataframe to obtain a local cuDF dataframe, which we can then convert." ] }, { @@ -4895,7 +4900,7 @@ "id": "787eae14", "metadata": {}, "source": [ - "Note that for the dask-cuDF case, we use `dask_cudf.read_csv` in preference to `dask_cudf.from_cudf(cudf.read_csv)` since the former can parallelize across multiple GPUs and handle larger CSV files that would fit in memory on a single GPU." + "Note that for the Dask-cuDF case, we use `dask_cudf.read_csv` in preference to `dask_cudf.from_cudf(cudf.read_csv)` since the former can parallelize across multiple GPUs and handle larger CSV files that would fit in memory on a single GPU." ] }, { diff --git a/docs/cudf/source/user_guide/api_docs/groupby.rst b/docs/cudf/source/user_guide/api_docs/groupby.rst index 80811efa33f..ca29087cbf9 100644 --- a/docs/cudf/source/user_guide/api_docs/groupby.rst +++ b/docs/cudf/source/user_guide/api_docs/groupby.rst @@ -68,7 +68,6 @@ Computations / descriptive stats GroupBy.std GroupBy.sum GroupBy.var - GroupBy.corr GroupBy.cov The following methods are available in both ``SeriesGroupBy`` and @@ -81,6 +80,7 @@ application to columns of a specific data type. :toctree: api/ DataFrameGroupBy.bfill + DataFrameGroupBy.corr DataFrameGroupBy.count DataFrameGroupBy.cumcount DataFrameGroupBy.cummax @@ -102,5 +102,6 @@ The following methods are available only for ``SeriesGroupBy`` objects. .. autosummary:: :toctree: api/ + SeriesGroupBy.corr SeriesGroupBy.nunique SeriesGroupBy.unique diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst index 739305af5d4..4b2b213b6c3 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/aggregation.rst @@ -2,5 +2,5 @@ aggregation =========== -.. automodule:: cudf._lib.pylibcudf.aggregation +.. automodule:: pylibcudf.aggregation :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst index e5bc6aa7cda..8bbbfbf88c1 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst @@ -2,5 +2,5 @@ binaryop ======== -.. automodule:: cudf._lib.pylibcudf.binaryop +.. automodule:: pylibcudf.binaryop :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst index d1105d356b4..d26c8737cf4 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/column.rst @@ -2,5 +2,5 @@ Column ====== -.. automodule:: cudf._lib.pylibcudf.column +.. automodule:: pylibcudf.column :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst index c858135b6ce..8dfaa4bae03 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/column_factories.rst @@ -2,5 +2,5 @@ column_factories ================ -.. automodule:: cudf._lib.pylibcudf.column_factories +.. automodule:: pylibcudf.column_factories :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst index e83739056f4..7912cb83767 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/concatenate.rst @@ -2,5 +2,5 @@ concatenate =========== -.. automodule:: cudf._lib.pylibcudf.concatenate +.. automodule:: pylibcudf.concatenate :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst index fddd3ea440f..25e3ef50e6a 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/copying.rst @@ -2,5 +2,5 @@ copying ======= -.. automodule:: cudf._lib.pylibcudf.copying +.. automodule:: pylibcudf.copying :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst index 558268ea495..71f7874cfbe 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst @@ -2,5 +2,5 @@ datetime ======== -.. automodule:: cudf._lib.pylibcudf.datetime +.. automodule:: pylibcudf.datetime :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst index 03f769ee861..5493d4662a9 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/expressions.rst @@ -2,5 +2,5 @@ expressions =========== -.. automodule:: cudf._lib.pylibcudf.expressions +.. automodule:: pylibcudf.expressions :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst index 542a5e12bc4..0d328a0b0e9 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst @@ -2,5 +2,5 @@ filling ======== -.. automodule:: cudf._lib.pylibcudf.filling +.. automodule:: pylibcudf.filling :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst index dffc7c24e02..5515a74adcc 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/gpumemoryview.rst @@ -2,5 +2,5 @@ gpumemoryview ============= -.. automodule:: cudf._lib.pylibcudf.gpumemoryview +.. automodule:: pylibcudf.gpumemoryview :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst index d6e994f7dbc..27cda383818 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/groupby.rst @@ -2,5 +2,5 @@ groupby ======= -.. automodule:: cudf._lib.pylibcudf.groupby +.. automodule:: pylibcudf.groupby :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst index 505765bba0f..e21536e2e97 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst @@ -21,8 +21,11 @@ This page provides API documentation for pylibcudf. groupby interop join + labeling lists merge + null_mask + partitioning quantiles reduce replace @@ -36,6 +39,7 @@ This page provides API documentation for pylibcudf. table traits transform + transpose types unary diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst index 881ab8d7be4..0d2cb55212e 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/interop.rst @@ -2,5 +2,5 @@ interop ======= -.. automodule:: cudf._lib.pylibcudf.interop +.. automodule:: pylibcudf.interop :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst index 495bd505fdc..1c57a6157f5 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/avro.rst @@ -2,5 +2,5 @@ Avro ==== -.. automodule:: cudf._lib.pylibcudf.io.avro +.. automodule:: pylibcudf.io.avro :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst index 5a2276f8b2d..59f7d8fe54c 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/csv.rst @@ -2,5 +2,5 @@ CSV === -.. automodule:: cudf._lib.pylibcudf.io.csv +.. automodule:: pylibcudf.io.csv :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst index e2d342ffe47..53638f071cc 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst @@ -5,7 +5,7 @@ I/O I/O Utility Classes =================== -.. automodule:: cudf._lib.pylibcudf.io.types +.. automodule:: pylibcudf.io.types :members: @@ -19,3 +19,4 @@ I/O Functions csv json parquet + timezone diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst index 6aeae1f322a..a4626f43cc3 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/json.rst @@ -2,5 +2,5 @@ JSON ==== -.. automodule:: cudf._lib.pylibcudf.io.json +.. automodule:: pylibcudf.io.json :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst index 9dfbadfa216..07c2503ab28 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/parquet.rst @@ -2,5 +2,5 @@ Parquet ======= -.. automodule:: cudf._lib.pylibcudf.io.parquet +.. automodule:: pylibcudf.io.parquet :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst new file mode 100644 index 00000000000..20c1ffc2e93 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst @@ -0,0 +1,6 @@ +======== +Timezone +======== + +.. automodule:: pylibcudf.io.timezone + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst index 05b9709d116..de065e4fc40 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/join.rst @@ -2,5 +2,5 @@ join ==== -.. automodule:: cudf._lib.pylibcudf.join +.. automodule:: pylibcudf.join :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/labeling.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/labeling.rst new file mode 100644 index 00000000000..3f3ae4c5a77 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/labeling.rst @@ -0,0 +1,6 @@ +======== +labeling +======== + +.. automodule:: pylibcudf.labeling + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst index a127dd6006a..0fe1a876073 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst @@ -2,5 +2,5 @@ lists ===== -.. automodule:: cudf._lib.pylibcudf.lists +.. automodule:: pylibcudf.lists :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst index ef1189a064a..3f634ffcfd7 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst @@ -2,5 +2,5 @@ merge ===== -.. automodule:: cudf._lib.pylibcudf.merge +.. automodule:: pylibcudf.merge :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst new file mode 100644 index 00000000000..4799c62eace --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst @@ -0,0 +1,6 @@ +========= +null_mask +========= + +.. automodule:: pylibcudf.null_mask + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst new file mode 100644 index 00000000000..6951dbecca0 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst @@ -0,0 +1,6 @@ +============ +partitioning +============ + +.. automodule:: pylibcudf.partitioning + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst index 3417c1ff59d..0f0f701b5dc 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst @@ -2,5 +2,5 @@ quantiles ========= -.. automodule:: cudf._lib.pylibcudf.quantiles +.. automodule:: pylibcudf.quantiles :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst index e6f1b02331d..047f217c276 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/reduce.rst @@ -2,5 +2,5 @@ reduce ====== -.. automodule:: cudf._lib.pylibcudf.reduce +.. automodule:: pylibcudf.reduce :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst index 7f846872fca..7410b20e1b0 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/replace.rst @@ -2,5 +2,5 @@ replace ======= -.. automodule:: cudf._lib.pylibcudf.replace +.. automodule:: pylibcudf.replace :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst index 964cef04923..09ec0501bb9 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst @@ -2,5 +2,5 @@ reshape ======= -.. automodule:: cudf._lib.pylibcudf.reshape +.. automodule:: pylibcudf.reshape :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst index 0817d117a94..1f8da467e84 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/rolling.rst @@ -2,5 +2,5 @@ rolling ======= -.. automodule:: cudf._lib.pylibcudf.rolling +.. automodule:: pylibcudf.rolling :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst index c97fda12301..e064357cbd1 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/round.rst @@ -2,5 +2,5 @@ round ===== -.. automodule:: cudf._lib.pylibcudf.round +.. automodule:: pylibcudf.round :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst index b12f47618fb..a9100c6bb2d 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/scalar.rst @@ -2,5 +2,5 @@ Scalar ====== -.. automodule:: cudf._lib.pylibcudf.scalar +.. automodule:: pylibcudf.scalar :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst index aa57bcd9d92..02307037994 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/search.rst @@ -2,5 +2,5 @@ search ====== -.. automodule:: cudf._lib.pylibcudf.search +.. automodule:: pylibcudf.search :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst index e9441366eeb..b8fd8fda9bd 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst @@ -2,5 +2,5 @@ sorting ======= -.. automodule:: cudf._lib.pylibcudf.sorting +.. automodule:: pylibcudf.sorting :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst index 00b479446d8..0252d0684d9 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/stream_compaction.rst @@ -2,5 +2,5 @@ stream_compaction ================= -.. automodule:: cudf._lib.pylibcudf.stream_compaction +.. automodule:: pylibcudf.stream_compaction :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/capitalize.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/capitalize.rst new file mode 100644 index 00000000000..6b9ed8d47e7 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/capitalize.rst @@ -0,0 +1,6 @@ +========== +capitalize +========== + +.. automodule:: pylibcudf.strings.capitalize + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/char_types.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/char_types.rst new file mode 100644 index 00000000000..896fa6086db --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/char_types.rst @@ -0,0 +1,6 @@ +========== +char_types +========== + +.. automodule:: pylibcudf.strings.char_types + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst index e5745331bc7..d2d164be638 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/contains.rst @@ -2,5 +2,5 @@ contains ======== -.. automodule:: cudf._lib.pylibcudf.strings.contains +.. automodule:: pylibcudf.strings.contains :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/extract.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/extract.rst new file mode 100644 index 00000000000..06f74a38709 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/extract.rst @@ -0,0 +1,6 @@ +======= +extract +======= + +.. automodule:: pylibcudf.strings.extract + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/find.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/find.rst new file mode 100644 index 00000000000..7c540e99929 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/find.rst @@ -0,0 +1,6 @@ +==== +find +==== + +.. automodule:: pylibcudf.strings.find + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst new file mode 100644 index 00000000000..9850ee10098 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/findall.rst @@ -0,0 +1,6 @@ +==== +find +==== + +.. automodule:: pylibcudf.strings.findall + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst index 7affae6673f..9b1a6b72a88 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst @@ -4,7 +4,15 @@ strings .. toctree:: :maxdepth: 1 + capitalize + char_types contains + extract + find + findall + regex_flags + regex_program + repeat replace slice strip diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_flags.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_flags.rst new file mode 100644 index 00000000000..53fd712d864 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_flags.rst @@ -0,0 +1,6 @@ +=========== +regex_flags +=========== + +.. automodule:: pylibcudf.strings.regex_flags + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_program.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_program.rst new file mode 100644 index 00000000000..6f3d2f6681c --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/regex_program.rst @@ -0,0 +1,6 @@ +============= +regex_program +============= + +.. automodule:: pylibcudf.strings.regex_program + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/repeat.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/repeat.rst new file mode 100644 index 00000000000..0041fe4c3da --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/repeat.rst @@ -0,0 +1,6 @@ +====== +repeat +====== + +.. automodule:: pylibcudf.strings.repeat + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst index 9575ec226a7..d5417adac43 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst @@ -2,5 +2,5 @@ replace ======= -.. automodule:: cudf._lib.pylibcudf.strings.replace +.. automodule:: pylibcudf.strings.replace :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst index 0ee5af71c03..e9908904512 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/slice.rst @@ -2,5 +2,5 @@ slice ===== -.. automodule:: cudf._lib.pylibcudf.strings.slice +.. automodule:: pylibcudf.strings.slice :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/strip.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/strip.rst index 32f87e013ad..a79774b8e67 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/strip.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/strip.rst @@ -2,5 +2,5 @@ strip ===== -.. automodule:: cudf._lib.pylibcudf.strings.strip +.. automodule:: pylibcudf.strings.strip :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst index d8337b6596d..e39ca18a12b 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/table.rst @@ -2,5 +2,5 @@ Table ===== -.. automodule:: cudf._lib.pylibcudf.table +.. automodule:: pylibcudf.table :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst index 294ca8dc78c..2cce7b9d7d7 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst @@ -2,5 +2,5 @@ traits ====== -.. automodule:: cudf._lib.pylibcudf.traits +.. automodule:: pylibcudf.traits :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst index ef04bbad7e6..839163f83fc 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/transform.rst @@ -2,5 +2,5 @@ transform ========= -.. automodule:: cudf._lib.pylibcudf.transform +.. automodule:: pylibcudf.transform :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/transpose.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/transpose.rst new file mode 100644 index 00000000000..6241295e770 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/transpose.rst @@ -0,0 +1,6 @@ +========= +transpose +========= + +.. automodule:: pylibcudf.transpose + :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst index 8d5409bbd97..75521ac2f4d 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/types.rst @@ -2,5 +2,5 @@ types ===== -.. automodule:: cudf._lib.pylibcudf.types +.. automodule:: pylibcudf.types :members: diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst index add4baa0a54..34077242b90 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/unary.rst @@ -2,5 +2,5 @@ unary ===== -.. automodule:: cudf._lib.pylibcudf.unary +.. automodule:: pylibcudf.unary :members: diff --git a/docs/cudf/source/user_guide/io/io.md b/docs/cudf/source/user_guide/io/io.md index adcdaa51e7e..97b961b455b 100644 --- a/docs/cudf/source/user_guide/io/io.md +++ b/docs/cudf/source/user_guide/io/io.md @@ -75,7 +75,6 @@ IO format. - **Notes:** - \[¹\] - Not all orientations are GPU-accelerated. @@ -177,4 +176,9 @@ If no value is set, behavior will be the same as the "STABLE" option. +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ | DEFLATE | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | Experimental | Experimental | ❌ | +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ + | LZ4 | ❌ | ❌ | Stable | Stable | ❌ | ❌ | Stable | Stable | ❌ | + +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ + | GZIP | ❌ | ❌ | Experimental | Experimental | ❌ | ❌ | ❌ | ❌ | ❌ | + +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+ + ``` diff --git a/docs/dask_cudf/source/best_practices.rst b/docs/dask_cudf/source/best_practices.rst new file mode 100644 index 00000000000..41263ebf589 --- /dev/null +++ b/docs/dask_cudf/source/best_practices.rst @@ -0,0 +1,332 @@ +.. _best-practices: + +Dask cuDF Best Practices +======================== + +This page outlines several important guidelines for using `Dask cuDF +`__ effectively. + +.. note:: + Since Dask cuDF is a backend extension for + `Dask DataFrame `__, + the guidelines discussed in the `Dask DataFrames Best Practices + `__ + documentation also apply to Dask cuDF (excluding any pandas-specific + details). + + +Deployment and Configuration +---------------------------- + +Use Dask-CUDA +~~~~~~~~~~~~~ + +To execute a Dask workflow on multiple GPUs, a Dask cluster must +be deployed with `Dask-CUDA `__ +and `Dask.distributed `__. + +When running on a single machine, the `LocalCUDACluster `__ +convenience function is strongly recommended. No matter how many GPUs are +available on the machine (even one!), using `Dask-CUDA has many advantages +`__ +over default (threaded) execution. Just to list a few: + +* Dask-CUDA makes it easy to pin workers to specific devices. +* Dask-CUDA makes it easy to configure memory-spilling options. +* The distributed scheduler collects useful diagnostic information that can be viewed on a dashboard in real time. + +Please see `Dask-CUDA's API `__ +and `Best Practices `__ +documentation for detailed information. Typical ``LocalCUDACluster`` usage +is also illustrated within the multi-GPU section of `Dask cuDF's +`__ documentation. + +.. note:: + When running on cloud infrastructure or HPC systems, it is usually best to + leverage system-specific deployment libraries like `Dask Operator + `__ and `Dask-Jobqueue + `__. + + Please see `the RAPIDS deployment documentation `__ + for further details and examples. + + +Use diagnostic tools +~~~~~~~~~~~~~~~~~~~~ + +The Dask ecosystem includes several diagnostic tools that you should absolutely use. +These tools include an intuitive `browser dashboard +`__ as well as a dedicated +`API for collecting performance profiles +`__. + +No matter the workflow, using the dashboard is strongly recommended. +It provides a visual representation of the worker resources and compute +progress. It also shows basic GPU memory and utilization metrics (under +the ``GPU`` tab). To visualize more detailed GPU metrics in JupyterLab, +use `NVDashboard `__. + + +Enable cuDF spilling +~~~~~~~~~~~~~~~~~~~~ + +When using Dask cuDF for classic ETL workloads, it is usually best +to enable `native spilling support in cuDF +`__. +When using :func:`LocalCUDACluster`, this is easily accomplished by +setting ``enable_cudf_spill=True``. + +When a Dask cuDF workflow includes conversion between DataFrame and Array +representations, native cuDF spilling may be insufficient. For these cases, +`JIT-unspill `__ +is likely to produce better protection from out-of-memory (OOM) errors. +Please see `Dask-CUDA's spilling documentation +`__ for further details +and guidance. + +Use RMM +~~~~~~~ + +Memory allocations in cuDF are significantly faster and more efficient when +the `RAPIDS Memory Manager (RMM) `__ +library is configured appropriately on worker processes. In most cases, the best way to manage +memory is by initializing an RMM pool on each worker before executing a +workflow. When using :func:`LocalCUDACluster`, this is easily accomplished +by setting ``rmm_pool_size`` to a large fraction (e.g. ``0.9``). + +See the `Dask-CUDA memory-management documentation +`__ +for more details. + +Use the Dask DataFrame API +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Although Dask cuDF provides a public ``dask_cudf`` Python module, we +strongly recommended that you use the CPU/GPU portable ``dask.dataframe`` +API instead. Simply `use the Dask configuration system +`__ +to set the ``"dataframe.backend"`` option to ``"cudf"``, and the +``dask_cudf`` module will be imported and used implicitly. + +Be sure to use the :func:`to_backend` method if you need to convert +between the different DataFrame backends. For example:: + + df = df.to_backend("pandas") # This gives us a pandas-backed collection + +.. note:: + Although :func:`to_backend` makes it easy to move data between pandas + and cuDF, repetitive CPU-GPU data movement can degrade performance + significantly. For optimal results, keep your data on the GPU as much + as possible. + +Avoid eager execution +~~~~~~~~~~~~~~~~~~~~~ + +Although Dask DataFrame collections are lazy by default, there are several +notable methods that will result in the immediate execution of the +underlying task graph: + +:func:`compute`: Calling ``ddf.compute()`` will materialize the result of +``ddf`` and return a single cuDF object. This is done by executing the entire +task graph associated with ``ddf`` and concatenating its partitions in +local memory on the client process. + +.. note:: + Never call :func:`compute` on a large collection that cannot fit comfortably + in the memory of a single GPU! + +:func:`persist`: Like :func:`compute`, calling ``ddf.persist()`` will +execute the entire task graph associated with ``ddf``. The most important +difference is that the computed partitions will remain in distributed +worker memory instead of being concatenated together on the client process. +Another difference is that :func:`persist` will return immediately when +executing on a distributed cluster. If you need a blocking synchronization +point in your workflow, simply use the :func:`wait` function:: + + ddf = ddf.persist() + wait(ddf) + +.. note:: + Avoid calling :func:`persist` on a large collection that cannot fit comfortably + in global worker memory. If the total sum of the partition sizes is larger + than the sum of all GPU memory, calling persist will result in significant + spilling from device memory. If the individual partition sizes are large, this + is likely to produce an OOM error. + +:func:`len` / :func:`head` / :func:`tail`: Although these operations are used +often within pandas/cuDF code to quickly inspect data, it is best to avoid +them in Dask DataFrame. In most cases, these operations will execute some or all +of the underlying task graph to materialize the collection. + +:func:`sort_values` / :func:`set_index` : These operations both require Dask to +eagerly collect quantile information about the column(s) being targeted by the +global sort operation. See the next section for notes on sorting considerations. + +.. note:: + When using :func:`set_index`, be sure to pass in ``sort=False`` whenever the + global collection does not **need** to be sorted by the new index. + +Avoid Sorting +~~~~~~~~~~~~~ + +`The design of Dask DataFrame `__ +makes it advantageous to work with data that is already sorted along its index at +creation time. For most other cases, it is best to avoid sorting unless the logic +of the workflow makes global ordering absolutely necessary. + +If the purpose of a :func:`sort_values` operation is to ensure that all unique +values in ``by`` will be moved to the same output partition, then `shuffle +`__ +is often the better option. + + +Reading Data +------------ + +Tune the partition size +~~~~~~~~~~~~~~~~~~~~~~~ + +The ideal partition size is usually between 1/32 and 1/8 the memory +capacity of a single GPU. Increasing the partition size will typically +reduce the number of tasks in your workflow and improve the GPU utilization +for each task. However, if the partitions are too large, the risk of OOM +errors can become significant. + +.. note:: + As a general rule of thumb, start with 1/32-1/16 for shuffle-intensive workflows + (e.g. large-scale sorting and joining), and 1/16-1/8 otherwise. For pathologically + skewed data distributions, it may be necessary to target 1/64 or smaller. + This rule of thumb comes from anecdotal optimization and OOM-debugging + experience. Since every workflow is different, choosing the best partition + size is both an art and a science. + +The easiest way to tune the partition size is when the DataFrame collection +is first created by a function like :func:`read_parquet`, :func:`read_csv`, +or :func:`from_map`. For example, both :func:`read_parquet` and :func:`read_csv` +expose a ``blocksize`` argument for adjusting the maximum partition size. + +If the partition size cannot be tuned effectively at creation time, the +`repartition `__ +method can be used as a last resort. + + +Use Parquet +~~~~~~~~~~~ + +`Parquet `__ is the recommended +file format for Dask cuDF. It provides efficient columnar storage and enables +Dask to perform valuable query optimizations like column projection and +predicate pushdown. + +The most important arguments to :func:`read_parquet` are ``blocksize`` and +``aggregate_files``: + +``blocksize``: Use this argument to specify the maximum partition size. +The default is `"256 MiB"`, but larger values are usually more performant +on GPUs with more than 8 GiB of memory. Dask will use the ``blocksize`` +value to map a discrete number of Parquet row-groups (or files) to each +output partition. This mapping will only account for the uncompressed +storage size of each row group, which is usually smaller than the +correspondng ``cudf.DataFrame``. + +``aggregate_files``: Use this argument to specify whether Dask should +map multiple files to the same DataFrame partition. The default is +``False``, but ``aggregate_files=True`` is usually more performant when +the dataset contains many files that are smaller than half of ``blocksize``. + +If you know that your files correspond to a reasonable partition size +before splitting or aggregation, set ``blocksize=None`` to disallow +file splitting. In the absence of column-projection pushdown, this will +result in a simple 1-to-1 mapping between files and output partitions. + +.. note:: + If your workflow requires a strict 1-to-1 mapping between files and + partitions, use :func:`from_map` to manually construct your partitions + with ``cudf.read_parquet``. When :func:`dd.read_parquet` is used, + query-planning optimizations may automatically aggregate distinct files + into the same partition (even when ``aggregate_files=False``). + +.. note:: + Metadata collection can be extremely slow when reading from remote + storage (e.g. S3 and GCS). When reading many remote files that all + correspond to a reasonable partition size, use ``blocksize=None`` + to avoid unnecessary metadata collection. + +.. note:: + When reading from remote storage (e.g. S3 and GCS), performance will + likely improve with ``filesystem="arrow"``. When this option is set, + PyArrow will be used to perform IO on multiple CPU threads. Please be + aware that this feature is experimental, and behavior may change in + the future (without deprecation). Do not pass in ``blocksize`` or + ``aggregate_files`` when this feature is used. Instead, set the + ``"dataframe.parquet.minimum-partition-size"`` config to control + file aggregation. + +Use :func:`from_map` +~~~~~~~~~~~~~~~~~~~~ + +To implement custom DataFrame-creation logic that is not covered by +existing APIs (like :func:`read_parquet`), use :func:`dask.dataframe.from_map` +whenever possible. The :func:`from_map` API has several advantages +over :func:`from_delayed`: + +* It allows proper lazy execution of your custom logic +* It enables column projection (as long as the mapped function supports a ``columns`` key-word argument) + +See the `from_map API documentation `__ +for more details. + +.. note:: + Whenever possible, be sure to specify the ``meta`` argument to + :func:`from_map`. If this argument is excluded, Dask will need to + materialize the first partition eagerly. If a large RMM pool is in + use on the first visible device, this eager execution on the client + may lead to an OOM error. + + +Sorting, Joining, and Grouping +------------------------------ + +Sorting, joining, and grouping operations all have the potential to +require the global shuffling of data between distinct partitions. +When the initial data fits comfortably in global GPU memory, these +"all-to-all" operations are typically bound by worker-to-worker +communication. When the data is larger than global GPU memory, the +bottleneck is typically device-to-host memory spilling. + +Although every workflow is different, the following guidelines +are often recommended: + +* Use a distributed cluster with `Dask-CUDA `__ workers + +* Use native cuDF spilling whenever possible (`Dask-CUDA spilling documentation `__) + +* Avoid shuffling whenever possible + * Use ``split_out=1`` for low-cardinality groupby aggregations + * Use ``broadcast=True`` for joins when at least one collection comprises a small number of partitions (e.g. ``<=5``) + +* `Use UCX `__ if communication is a bottleneck. + +.. note:: + UCX enables Dask-CUDA workers to communicate using high-performance + tansport technologies like `NVLink `__ + and Infiniband. Without UCX, inter-process communication will rely + on TCP sockets. + + +User-defined functions +---------------------- + +Most real-world Dask DataFrame workflows use `map_partitions +`__ +to map user-defined functions across every partition of the underlying data. +This API is a fantastic way to apply custom operations in an intuitive and +scalable way. With that said, the :func:`map_partitions` method will produce +an opaque DataFrame expression that blocks the query-planning `optimizer +`__ from performing +useful optimizations (like projection and filter pushdown). + +Since column-projection pushdown is often the most effective optimization, +it is important to select the necessary columns both before and after calling +:func:`map_partitions`. You can also add explicit filter operations to further +mitigate the loss of filter pushdown. diff --git a/docs/dask_cudf/source/conf.py b/docs/dask_cudf/source/conf.py index dc40254312e..5daa8245695 100644 --- a/docs/dask_cudf/source/conf.py +++ b/docs/dask_cudf/source/conf.py @@ -78,6 +78,7 @@ "cudf": ("https://docs.rapids.ai/api/cudf/stable/", None), "dask": ("https://docs.dask.org/en/stable/", None), "pandas": ("https://pandas.pydata.org/docs/", None), + "dask-cuda": ("https://docs.rapids.ai/api/dask-cuda/stable/", None), } numpydoc_show_inherited_class_members = True diff --git a/docs/dask_cudf/source/index.rst b/docs/dask_cudf/source/index.rst index 9a216690384..c2891ebc15e 100644 --- a/docs/dask_cudf/source/index.rst +++ b/docs/dask_cudf/source/index.rst @@ -3,39 +3,46 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Welcome to dask-cudf's documentation! +Welcome to Dask cuDF's documentation! ===================================== -**Dask-cuDF** (pronounced "DASK KOO-dee-eff") is an extension +**Dask cuDF** (pronounced "DASK KOO-dee-eff") is an extension library for the `Dask `__ parallel computing -framework that provides a `cuDF -`__-backed distributed -dataframe with the same API as `Dask dataframes -`__. +framework. When installed, Dask cuDF is automatically registered +as the ``"cudf"`` dataframe backend for +`Dask DataFrame `__. + +.. note:: + Neither Dask cuDF nor Dask DataFrame provide support for multi-GPU + or multi-node execution on their own. You must also deploy a + `dask.distributed `__ cluster + to leverage multiple GPUs. We strongly recommend using :doc:`dask-cuda:index` + to simplify the setup of the cluster, taking advantage of all features + of the GPU and networking hardware. If you are familiar with Dask and `pandas `__ or -`cuDF `__, then Dask-cuDF +`cuDF `__, then Dask cuDF should feel familiar to you. If not, we recommend starting with `10 minutes to Dask `__ followed -by `10 minutes to cuDF and Dask-cuDF +by `10 minutes to cuDF and Dask cuDF `__. -When running on multi-GPU systems, `Dask-CUDA -`__ is recommended to -simplify the setup of the cluster, taking advantage of all features of -the GPU and networking hardware. +After reviewing the sections below, please see the +:ref:`Best Practices ` page for further guidance on +using Dask cuDF effectively. -Using Dask-cuDF + +Using Dask cuDF --------------- -When installed, Dask-cuDF registers itself as a dataframe backend for -Dask. This means that in many cases, using cuDF-backed dataframes requires -only small changes to an existing workflow. The minimal change is to -select cuDF as the dataframe backend in :doc:`Dask's -configuration `. To do so, we must set the option -``dataframe.backend`` to ``cudf``. From Python, this can be achieved -like so:: +The Dask DataFrame API (Recommended) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Simply use the `Dask configuration +`__ +system to set the ``"dataframe.backend"`` option to ``"cudf"``. +From Python, this can be achieved like so:: import dask @@ -44,52 +51,157 @@ like so:: Alternatively, you can set ``DASK_DATAFRAME__BACKEND=cudf`` in the environment before running your code. -Dataframe creation from on-disk formats -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If your workflow creates Dask dataframes from on-disk formats -(for example using :func:`dask.dataframe.read_parquet`), then setting -the backend may well be enough to migrate your workflow. +Once this is done, the public Dask DataFrame API will leverage +``cudf`` automatically when a new DataFrame collection is created +from an on-disk format using any of the following ``dask.dataframe`` +functions: -For example, consider reading a dataframe from parquet:: +* :func:`read_parquet` +* :func:`read_json` +* :func:`read_csv` +* :func:`read_orc` +* :func:`read_hdf` +* :func:`from_dict` - import dask.dataframe as dd - - # By default, we obtain a pandas-backed dataframe - df = dd.read_parquet("data.parquet", ...) +For example:: + import dask.dataframe as dd -To obtain a cuDF-backed dataframe, we must set the -``dataframe.backend`` configuration option:: + # By default, we obtain a pandas-backed dataframe + df = dd.read_parquet("data.parquet", ...) import dask - import dask.dataframe as dd dask.config.set({"dataframe.backend": "cudf"}) - # This gives us a cuDF-backed dataframe + # This now gives us a cuDF-backed dataframe df = dd.read_parquet("data.parquet", ...) -This code will use cuDF's GPU-accelerated :func:`parquet reader -` to read partitions of the data. +When other functions are used to create a new collection +(e.g. :func:`from_map`, :func:`from_pandas`, :func:`from_delayed`, +and :func:`from_array`), the backend of the new collection will +depend on the inputs to those functions. For example:: + + import pandas as pd + import cudf + + # This gives us a pandas-backed dataframe + dd.from_pandas(pd.DataFrame({"a": range(10)})) + + # This gives us a cuDF-backed dataframe + dd.from_pandas(cudf.DataFrame({"a": range(10)})) + +An existing collection can always be moved to a specific backend +using the :func:`dask.dataframe.DataFrame.to_backend` API:: + + # This ensures that we have a cuDF-backed dataframe + df = df.to_backend("cudf") + + # This ensures that we have a pandas-backed dataframe + df = df.to_backend("pandas") + +The explicit Dask cuDF API +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In addition to providing the ``"cudf"`` backend for Dask DataFrame, +Dask cuDF also provides an explicit ``dask_cudf`` API:: + + import dask_cudf + + # This always gives us a cuDF-backed dataframe + df = dask_cudf.read_parquet("data.parquet", ...) + +This API is used implicitly by the Dask DataFrame API when the ``"cudf"`` +backend is enabled. Therefore, using it directly will not provide any +performance benefit over the CPU/GPU-portable ``dask.dataframe`` API. +Also, using some parts of the explicit API are incompatible with +automatic query planning (see the next section). + +Query Planning +~~~~~~~~~~~~~~ + +Dask cuDF now provides automatic query planning by default (RAPIDS 24.06+). +As long as the ``"dataframe.query-planning"`` configuration is set to +``True`` (the default) when ``dask.dataframe`` is first imported, `Dask +Expressions `__ will be used under the hood. + +For example, the following code will automatically benefit from predicate +pushdown when the result is computed:: + + df = dd.read_parquet("/my/parquet/dataset/") + result = df.sort_values('B')['A'] + +Unoptimized expression graph (``df.pprint()``):: + + Projection: columns='A' + SortValues: by=['B'] shuffle_method='tasks' options={} + ReadParquetFSSpec: path='/my/parquet/dataset/' ... + +Simplified expression graph (``df.simplify().pprint()``):: + + Projection: columns='A' + SortValues: by=['B'] shuffle_method='tasks' options={} + ReadParquetFSSpec: path='/my/parquet/dataset/' columns=['A', 'B'] ... + +.. note:: + Dask will automatically simplify the expression graph (within + :func:`optimize`) when the result is converted to a task graph + (via :func:`compute` or :func:`persist`). You do not need to call + :func:`simplify` yourself. + + +Using Multiple GPUs and Multiple Nodes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Whenever possible, Dask cuDF (i.e. Dask DataFrame) will automatically try +to partition your data into small-enough tasks to fit comfortably in the +memory of a single GPU. This means the necessary compute tasks needed to +compute a query can often be streamed to a single GPU process for +out-of-core computing. This also means that the compute tasks can be +executed in parallel over a multi-GPU cluster. + +In order to execute your Dask workflow on multiple GPUs, you will +typically need to use :doc:`dask-cuda:index` +to deploy distributed Dask cluster, and +`Distributed `__ +to define a client object. For example:: + + from dask_cuda import LocalCUDACluster + from distributed import Client + + if __name__ == "__main__": + + client = Client( + LocalCUDACluster( + CUDA_VISIBLE_DEVICES="0,1", # Use two workers (on devices 0 and 1) + rmm_pool_size=0.9, # Use 90% of GPU memory as a pool for faster allocations + enable_cudf_spill=True, # Improve device memory stability + local_directory="/fast/scratch/", # Use fast local storage for spilling + ) + ) + + df = dd.read_parquet("/my/parquet/dataset/") + agg = df.groupby('B').sum() + agg.compute() # This will use the cluster defined above + +.. note:: + This example uses :func:`compute` to materialize a concrete + ``cudf.DataFrame`` object in local memory. Never call :func:`compute` + on a large collection that cannot fit comfortably in the memory of a + single GPU! See Dask's `documentation on managing computation + `__ + for more details. -Dataframe creation from in-memory formats -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Please see the :doc:`dask-cuda:index` +documentation for more information about deploying GPU-aware clusters +(including `best practices +`__). -If you already have a dataframe in memory and want to convert it to a -cuDF-backend one, there are two options depending on whether the -dataframe is already a Dask one or not. If you have a Dask dataframe, -then you can call :func:`dask.dataframe.to_backend` passing ``"cudf"`` -as the backend; if you have a pandas dataframe then you can either -call :func:`dask.dataframe.from_pandas` followed by -:func:`~dask.dataframe.to_backend` or first convert the dataframe with -:func:`cudf.from_pandas` and then parallelise this with -:func:`dask_cudf.from_cudf`. API Reference ------------- -Generally speaking, Dask-cuDF tries to offer exactly the same API as -Dask itself. There are, however, some minor differences mostly because +Generally speaking, Dask cuDF tries to offer exactly the same API as +Dask DataFrame. There are, however, some minor differences mostly because cuDF does not :doc:`perfectly mirror ` the pandas API, or because cuDF provides additional configuration flags (these mostly occur in data reading and writing interfaces). @@ -97,7 +209,7 @@ flags (these mostly occur in data reading and writing interfaces). As a result, straightforward workflows can be migrated without too much trouble, but more complex ones that utilise more features may need a bit of tweaking. The API documentation describes details of the -differences and all functionality that Dask-cuDF supports. +differences and all functionality that Dask cuDF supports. .. toctree:: :maxdepth: 2 diff --git a/java/ci/Dockerfile.rocky b/java/ci/Dockerfile.rocky index 6b87f3ed34e..152af22f7e4 100644 --- a/java/ci/Dockerfile.rocky +++ b/java/ci/Dockerfile.rocky @@ -28,7 +28,7 @@ ARG TARGETPLATFORM=linux/amd64 FROM --platform=$TARGETPLATFORM nvidia/cuda:$CUDA_VERSION-devel-rockylinux$OS_RELEASE ARG TOOLSET_VERSION=11 ### Install basic requirements -RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build +RUN dnf --enablerepo=powertools install -y scl-utils gcc-toolset-${TOOLSET_VERSION} git zlib-devel maven tar wget patch ninja-build boost-devel ## pre-create the CMAKE_INSTALL_PREFIX folder, set writable by any user for Jenkins RUN mkdir /usr/local/rapids /rapids && chmod 777 /usr/local/rapids /rapids diff --git a/java/ci/README.md b/java/ci/README.md index ca8432f9d1e..ccb9efb50b6 100644 --- a/java/ci/README.md +++ b/java/ci/README.md @@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.8.0-devel-rocky8 bash You can download the cuDF repo in the docker container or you can mount it into the container. Here I choose to download again in the container. ```bash -git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.08 +git clone --recursive https://github.com/rapidsai/cudf.git -b branch-24.10 ``` ### Build cuDF jar with devtoolset @@ -47,4 +47,4 @@ scl enable gcc-toolset-11 "java/ci/build-in-docker.sh" ### The output -You can find the cuDF jar in java/target/ like cudf-24.08.3-SNAPSHOT-cuda11.jar. +You can find the cuDF jar in java/target/ like cudf-24.10.0-SNAPSHOT-cuda11.jar. diff --git a/java/pom.xml b/java/pom.xml index 837bdc2220d..e4f1cdf64e7 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,6 +1,6 @@