mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-27 22:23:25 +08:00
Merge branch 'dev-jiaganc-fix-b300-gptoss-trtllm' into 'feat/b300_cu13'
[None][fix] fix trtllm moe backend error when running gptoss on b300 See merge request ftp/tekit!9680 Signed-off-by: Jiagan Cheng <jiaganc@nvidia.com>
This commit is contained in:
commit
f14c7402c1
@ -46,8 +46,8 @@ torch::Tensor dtype_mxe2m1_block_scale_moe_runner(torch::Tensor const& routing_l
|
||||
std::optional<double> const routed_scaling_factor, int64_t const tile_tokens_dim, int64_t const routing_method_type,
|
||||
btg::Dtype const dtype, MoeRunnerType& moe_runner, int64_t moeConfigIndex)
|
||||
{
|
||||
auto const sm = tensorrt_llm::common::getSMVersion();
|
||||
TORCH_CHECK(sm == 100, "Only SM100 is supported by FP4 block scale MOE");
|
||||
auto const sm = tensorrt_llm::common::getSMFamily();
|
||||
TORCH_CHECK(sm == 100, "Only SM100f is supported by MXFP4 block scale MOE");
|
||||
TORCH_CHECK(tile_tokens_dim == 8 || tile_tokens_dim == 16 || tile_tokens_dim == 32 || tile_tokens_dim == 64,
|
||||
"tile_tokens_dim must be 8, 16, 32, 64");
|
||||
TORCH_CHECK(routing_logits.scalar_type() == at::ScalarType::Float
|
||||
|
||||
Loading…
Reference in New Issue
Block a user