Skip to content

Commit

Permalink
fix: errors in existing cuda_kernels (#2877)
Browse files Browse the repository at this point in the history
* fix: order of parameters

* fix: thread_id value

* remove Identities kernels from the list
  • Loading branch information
ManasviGoyal authored Dec 11, 2023
1 parent f161bd1 commit 7687108
Show file tree
Hide file tree
Showing 6 changed files with 7 additions and 13 deletions.
3 changes: 0 additions & 3 deletions dev/generate-kernel-signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
"awkward_ListArray_validity",
"awkward_BitMaskedArray_to_ByteMaskedArray",
"awkward_ListArray_compact_offsets",
"awkward_new_Identities",
"awkward_Identities32_to_Identities64",
"awkward_ListOffsetArray_flatten_offsets",
"awkward_IndexedArray_overlay_mask",
"awkward_IndexedArray_fill_count",
Expand All @@ -36,7 +34,6 @@
"awkward_RegularArray_getitem_next_range_spreadadvanced",
"awkward_RegularArray_getitem_next_array",
"awkward_missing_repeat",
"awkward_Identities_getitem_carry",
"awkward_RegularArray_getitem_jagged_expand",
"awkward_ListArray_getitem_jagged_expand",
"awkward_ListArray_getitem_next_array",
Expand Down
3 changes: 0 additions & 3 deletions dev/generate-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,8 +464,6 @@ def gencpukerneltests(specdict):
"awkward_ListArray_validity",
"awkward_BitMaskedArray_to_ByteMaskedArray",
"awkward_ListArray_compact_offsets",
"awkward_new_Identities",
"awkward_Identities32_to_Identities64",
"awkward_ListOffsetArray_flatten_offsets",
"awkward_IndexedArray_overlay_mask",
"awkward_IndexedArray_fill_count",
Expand All @@ -485,7 +483,6 @@ def gencpukerneltests(specdict):
"awkward_RegularArray_getitem_next_range_spreadadvanced",
"awkward_RegularArray_getitem_next_array",
"awkward_missing_repeat",
"awkward_Identities_getitem_carry",
"awkward_RegularArray_getitem_jagged_expand",
"awkward_ListArray_getitem_jagged_expand",
"awkward_ListArray_getitem_next_array",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
// def f(grid, block, args):
// (tocarry, mask, length, validwhen, invocation_index, err_code) = args
// scan_in_array = cupy.empty(length, dtype=cupy.int64)
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_a', tocarry.dtype, mask.dtype]))(grid, block, (tocarry, mask, validwhen, length, scan_in_array, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_a', tocarry.dtype, mask.dtype]))(grid, block, (tocarry, mask, length, validwhen, scan_in_array, invocation_index, err_code))
// scan_in_array = inclusive_scan(grid, block, (scan_in_array, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_b', tocarry.dtype, mask.dtype]))(grid, block, (tocarry, mask, validwhen, length, scan_in_array, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_getitem_nextcarry_b', tocarry.dtype, mask.dtype]))(grid, block, (tocarry, mask, length, validwhen, scan_in_array, invocation_index, err_code))
// out["awkward_ByteMaskedArray_getitem_nextcarry_a", {dtype_specializations}] = None
// out["awkward_ByteMaskedArray_getitem_nextcarry_b", {dtype_specializations}] = None
// END PYTHON
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
// def f(grid, block, args):
// (nextcarry, nextparents, outindex, mask, parents, length, validwhen, invocation_index, err_code) = args
// scan_in_array = cupy.empty(length, dtype=cupy.int64)
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_a', nextcarry.dtype, nextparents.dtype, outindex.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, validwhen, length, scan_in_array, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_a', nextcarry.dtype, nextparents.dtype, outindex.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, length, validwhen, scan_in_array, invocation_index, err_code))
// scan_in_array = inclusive_scan(grid, block, (scan_in_array, length, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_b', nextcarry.dtype, nextparents.dtype, outindex.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, validwhen, length, scan_in_array, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(['awkward_ByteMaskedArray_reduce_next_64_b', nextcarry.dtype, nextparents.dtype, outindex.dtype]))(grid, block, (nextcarry, nextparents, outindex, mask, parents, length, validwhen, scan_in_array, invocation_index, err_code))
// out["awkward_ByteMaskedArray_reduce_next_64_a", {dtype_specializations}] = None
// out["awkward_ByteMaskedArray_reduce_next_64_b", {dtype_specializations}] = None
// END PYTHON
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
// (nextshifts, mask, length, valid_when, invocation_index, err_code) = args
// scan_in_array_k = cupy.empty(length, dtype=cupy.int64)
// scan_in_array_nullsum = cupy.empty(length, dtype=cupy.int64)
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, valid_when, length, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, length, valid_when, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code))
// scan_in_array_k = inclusive_scan(grid, block, (scan_in_array_k, invocation_index, err_code))
// scan_in_array_nullsum = inclusive_scan(grid, block, (scan_in_array_nullsum, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, valid_when, length, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code))
// cuda_kernel_templates.get_function(fetch_specialization(["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", nextshifts.dtype, mask.dtype]))(grid, block, (nextshifts, mask, length, valid_when, scan_in_array_k, scan_in_array_nullsum, invocation_index, err_code))
// out["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_a", {dtype_specializations}] = None
// out["awkward_ByteMaskedArray_reduce_next_nonlocal_nextshifts_64_b", {dtype_specializations}] = None
// END PYTHON
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ awkward_ListArray_getitem_jagged_expand(T* multistarts,
uint64_t invocation_index,
uint64_t* err_code) {
if (err_code[0] == NO_ERROR) {
int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) % length;
int64_t thread_id = (blockIdx.x * blockDim.x + threadIdx.x) / jaggedsize;
int64_t thready_id = (blockIdx.x * blockDim.x + threadIdx.x) % jaggedsize;
W start = fromstarts[thread_id];
X stop = fromstops[thread_id];
Expand Down

0 comments on commit 7687108

Please sign in to comment.