mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-13 22:18:36 +08:00
replace std::min with min inside CUDA kernel
This commit is contained in:
parent
ca34331e98
commit
6dfa8df479
@ -612,7 +612,7 @@ static __global__ void oneShotAllReduceKernel(AllReduceParams params)
|
||||
|
||||
// Start and end offsets of the thread
|
||||
size_t const chunk_start = bidx * params.elts_per_block + tidx * PACKED_ELTS;
|
||||
size_t const chunk_end = std::min((bidx + 1) * params.elts_per_block, params.elts_total);
|
||||
size_t const chunk_end = min((bidx + 1) * params.elts_per_block, params.elts_total);
|
||||
|
||||
T* buffers[RANKS_PER_NODE];
|
||||
#pragma unroll
|
||||
|
||||
Loading…
Reference in New Issue
Block a user