Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions vortex-cuda/kernels/src/decimal_cast.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "config.cuh"
#include "types.cuh"
#include <stdint.h>
#include <type_traits>

// Arrow decimal schemas fix the physical values buffer width:
// - Decimal128: 16 bytes per value.
// - Decimal256: 32 bytes per value.
//
// Vortex may use narrower decimal storage, so Arrow Device export widens values
// to match the schema-implied physical layout consumed by cuDF and other Arrow
// readers.
// Converts a decimal storage value to Arrow's 128-bit decimal physical representation.
template <typename Input>
__device__ __forceinline__ int128_t decimal_to_i128(Input value) {
if constexpr (std::is_same_v<Input, int128_t>) {
return value;
} else if constexpr (std::is_same_v<Input, int256_t>) {
return int128_t {value.parts[0], value.parts[1]};
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this end up truncating? I think it is currently possible in vortex to do this:

  let array = DecimalArray::from_iter(
      [i256::from_parts(0, 1)], // 2^128, so does not fit into i128
      DecimalDType::new(38, 0), // this is normally i128
  );

when exporting this array we would pick the i256 -> i128 kernel and truncate the values without checking for overflow.

probably the right fix is for vortex to reject constructing such arrays

} else {
const int64_t lo = static_cast<int64_t>(value);
const int64_t hi = value < 0 ? -1 : 0;
return int128_t {lo, hi};
}
}

// Converts one decimal value to the requested Arrow decimal physical representation.
template <typename Output, typename Input>
__device__ __forceinline__ Output decimal_cast_value(Input value) {
if constexpr (std::is_same_v<Output, int256_t> && std::is_same_v<Input, int256_t>) {
return value;
} else if constexpr (std::is_same_v<Output, int128_t>) {
return decimal_to_i128(value);
} else {
static_assert(std::is_same_v<Output, int256_t>);
const int128_t value128 = decimal_to_i128(value);
const int64_t sign = value128.hi < 0 ? -1 : 0;
return int256_t {{value128.lo, value128.hi, sign, sign}};
}
}

// Widens a contiguous decimal values buffer on the device.
template <typename Input, typename Output>
__device__ void
decimal_cast_device(const Input *__restrict input, Output *__restrict output, uint64_t array_len) {
const uint64_t worker = blockIdx.x * blockDim.x + threadIdx.x;
const uint64_t startElem = start_elem(worker, array_len);
const uint64_t stopElem = stop_elem(worker, array_len);

if (startElem >= array_len) {
return;
}

for (uint64_t idx = startElem; idx < stopElem; idx++) {
output[idx] = decimal_cast_value<Output>(input[idx]);
}
}

// Generates Arrow Decimal128 and Decimal256 widening kernels for one input storage type.
#define GENERATE_DECIMAL_CAST_KERNELS(input_suffix, InputType) \
extern "C" __global__ void decimal_cast_##input_suffix##_i128(const InputType *__restrict input, \
int128_t *__restrict output, \
uint64_t array_len) { \
decimal_cast_device(input, output, array_len); \
} \
extern "C" __global__ void decimal_cast_##input_suffix##_i256(const InputType *__restrict input, \
int256_t *__restrict output, \
uint64_t array_len) { \
decimal_cast_device(input, output, array_len); \
}

FOR_EACH_SIGNED_INT(GENERATE_DECIMAL_CAST_KERNELS)
FOR_EACH_LARGE_DECIMAL(GENERATE_DECIMAL_CAST_KERNELS)
Loading
Loading