[https://nvbugs/5880261][fix] fix cacheTransceiver (#11409)

Signed-off-by: Chuang Zhu <111838961+chuangz0@users.noreply.github.com>
This commit is contained in:
Chuang Zhu 2026-02-15 10:40:44 +08:00 committed by GitHub
parent 29e44dd749
commit 0a9ddf8c17
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 15 additions and 17 deletions

View File

@ -179,8 +179,8 @@ void AgentConnection::sendRequestAndBufferInfo(batch_manager::RequestInfo& reque
TLLM_CHECK(deviceId == mAgentConnectionManager->getDeviceId());
for (size_t i = 0; i < preAllocateBuffers.size(); i++)
{
bufferDescs.emplace_back(
reinterpret_cast<uintptr_t>(preAllocateBuffers[i]->data()), preAllocateBuffers[i]->getSize(), deviceId);
bufferDescs.emplace_back(reinterpret_cast<uintptr_t>(preAllocateBuffers[i]->data()),
preAllocateBuffers[i]->getSizeInBytes(), deviceId);
}
std::string address = mAgentConnectionManager->getAgent()->getLocalConnectionInfo();
std::optional<std::string> metadataOpt = std::nullopt;

View File

@ -1606,8 +1606,8 @@ TEST_P(UnexpectedTerminationRaceTest, UnexpectedTerminationRaceTest)
if (mIsContext || mIsGeneration)
{
bool enableDP = mIsContext ? contextDP : generationDP;
setUpCacheManager(
numLayers, numHeads, sizePerHead, tokensPerBlock, dataType, kvFactor, isMLA, enableDP, isWindow);
setUpCacheManager(numLayers, numHeads, sizePerHead, tokensPerBlock, dataType, kvFactor, isMLA, enableDP,
isWindow, isIndexerKCache, indexerDimPerHead, indexerKCacheQuantBlockSize);
setUpCacheTransceiver();
std::vector<std::shared_ptr<WrappedLlmRequest>> requests;
int requestId = 0;

View File

@ -436,11 +436,11 @@ def run_disaggregated_test(example_dir,
cleanup_output_files()
run_env = env.copy()
# on some CI nodes , we set UCX_TLS to "^ib" to avoid the issue that IB equipped but not available.
# on some CI nodes , we set UCX_TLS to "^ib,gdr_copy" to avoid the issue that IB equipped but not available, and gdr_copy pin buffer failed.
# we set UCX_MM_ERROR_HANDLING to "y" to avoid the issue that NIXL cannot use IB or TCP for notify on some CI nodes,
# setting it to "y" will enable NIXL to use system memory for notify.
run_env["UCX_TLS"] = "^ib"
run_env["UCX_TLS"] = "^ib,gdr_copy"
run_env["UCX_MM_ERROR_HANDLING"] = "y"
num_ranks, config_file = get_test_config(test_desc, example_dir,
os.path.dirname(__file__))
@ -1260,7 +1260,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx(disaggregated_test_root,
os.symlink(src, dst, target_is_directory=True)
env = llm_venv._new_env.copy()
env["TRTLLM_USE_UCX_KVCACHE"] = "1"
env["UCX_TLS"] = "^ib"
env["UCX_TLS"] = "^ib,gdr_copy"
run_disaggregated_test(disaggregated_example_root,
"deepseek_v3_lite_fp8_ucx",
env=env,
@ -1287,7 +1287,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_nixl(disaggregated_test_root,
os.symlink(src, dst, target_is_directory=True)
env = llm_venv._new_env.copy()
env["TRTLLM_USE_NIXL_KVCACHE"] = "1"
env["UCX_TLS"] = "^ib"
env["UCX_TLS"] = "^ib,gdr_copy"
env["UCX_MM_ERROR_HANDLING"] = "y"
run_disaggregated_test(disaggregated_example_root,
"deepseek_v3_lite_fp8_nixl",
@ -1313,7 +1313,7 @@ def test_disaggregated_deepseek_v3_lite_fp8_ucx_tp1_single_gpu(
os.symlink(src, dst, target_is_directory=True)
env = llm_venv._new_env.copy()
env["TRTLLM_USE_UCX_KVCACHE"] = "1"
env["UCX_TLS"] = "^ib"
env["UCX_TLS"] = "^ib,gdr_copy"
run_disaggregated_test(disaggregated_example_root,
"deepseek_v3_lite_fp8_tp1",
@ -1595,7 +1595,7 @@ def run_disaggregated_benchmark(example_dir,
skip_warmup=False):
"""Run disaggregated test with given configuration."""
run_env = env.copy()
run_env["UCX_TLS"] = "^ib"
run_env["UCX_TLS"] = "^ib,gdr_copy"
run_env["UCX_MM_ERROR_HANDLING"] = "y"
workers_cmd = [
'mpirun', '--allow-run-as-root', '--oversubscribe', '-n',
@ -1776,7 +1776,7 @@ def run_disaggregated_aiperf(config_file,
"""
cleanup_output_files()
run_env = env.copy()
run_env["UCX_TLS"] = "^ib"
run_env["UCX_TLS"] = "^ib,gdr_copy"
run_env["UCX_MM_ERROR_HANDLING"] = "y"
workers_cmd = [
@ -2302,7 +2302,7 @@ def run_disaggregated_cancel_test(example_dir,
"""Run disaggregated test with request cancellation stress test."""
cleanup_output_files()
run_env = env.copy()
run_env["UCX_TLS"] = "^ib"
run_env["UCX_TLS"] = "^ib,gdr_copy"
num_ranks, config_file = get_test_config(test_desc, example_dir,
os.path.dirname(__file__))

View File

@ -186,7 +186,7 @@ def verify_disaggregated(model, generation_overlap, enable_cuda_graph, prompt,
with MPIPoolExecutor(max_workers=2,
env={
"UCX_TLS": "^ib",
"UCX_TLS": "^ib,gdr_copy",
"UCX_MM_ERROR_HANDLING": "y"
}) as executor:
futures = []
@ -336,7 +336,7 @@ def test_disaggregated_llama_context_capacity(model, enable_cuda_graph,
with MPIPoolExecutor(max_workers=2,
env={
"UCX_TLS": "^ib",
"UCX_TLS": "^ib,gdr_copy",
"UCX_MM_ERROR_HANDLING": "y"
}) as executor:
futures = []
@ -447,7 +447,7 @@ def test_disaggregated_spec_dec_batch_slot_limit(model, spec_dec_model_path,
mpi_info.Set("oversubscribe", "true")
with MPIPoolExecutor(max_workers=2,
env={
"UCX_TLS": "^ib",
"UCX_TLS": "^ib,gdr_copy",
"UCX_MM_ERROR_HANDLING": "y",
"OMPI_MCA_rmaps_base_oversubscribe": "1"
},

View File

@ -337,8 +337,6 @@ accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_guided_dec
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2] SKIP (https://nvbugs/5879620)
accuracy/test_llm_api_pytorch.py::TestNemotronV3Super::test_auto_dtype_4gpus[4-1-False-False-False] SKIP (https://nvbugs/5879625)
accuracy/test_llm_api_pytorch.py::TestNemotronV3Super::test_auto_dtype_4gpus[4-1-True-True-False] SKIP (https://nvbugs/5879625)
cpp/test_multi_gpu.py::test_cache_transceiver[8proc-nixl_kvcache-90] SKIP (https://nvbugs/5880261)
cpp/test_multi_gpu.py::test_cache_transceiver[8proc-ucx_kvcache-90] SKIP (https://nvbugs/5880261)
accuracy/test_llm_api_pytorch.py::TestNemotronV3Super::test_auto_dtype_4gpus[4-4-False-True-False] SKIP (https://nvbugs/5879625)
unittest/_torch/thop/serial/test_moe.py::TestMoeFp4::test_gptoss_style_nvfp4[limitinf-beta0-alpha0.1-RoutingGPTOSS-512-512-1] SKIP (https://nvbugs/5819042)
disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] SKIP (https://nvbugs/5839137)