mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[https://nvbugs/5457504][fix] fix kv cache event test in disaggregated worker tests (#7028)
Signed-off-by: zhengd-nv <200704041+zhengd-nv@users.noreply.github.com>
This commit is contained in:
parent
d8bd8843fc
commit
1a929a1490
@ -298,7 +298,8 @@ class KvCacheEventWorkerTester(BasicWorkerTester):
|
||||
|
||||
if check_match_count:
|
||||
assert ctx_match_count > 0
|
||||
assert gen_match_count >= ctx_match_count
|
||||
assert gen_match_count > 0
|
||||
assert gen_match_count >= ctx_match_count or gen_evicted
|
||||
return request["prompt"]
|
||||
|
||||
async def test_multi_round_request(self,
|
||||
@ -310,6 +311,8 @@ class KvCacheEventWorkerTester(BasicWorkerTester):
|
||||
for prompt in init_prompts
|
||||
]
|
||||
prompts = await asyncio.gather(*chat_threads)
|
||||
# send a request to flush events
|
||||
await self.multi_round_request(session, init_prompts[0], 1, False)
|
||||
await asyncio.gather(*[
|
||||
self.multi_round_request(session, prompt, 1, True)
|
||||
for prompt in prompts
|
||||
|
||||
@ -287,7 +287,6 @@ test_e2e.py::test_ptp_quickstart_advanced[Llama3.1-70B-FP8-llama-3.1-model/Llama
|
||||
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype SKIP (https://nvbugs/5454875)
|
||||
examples/test_llm_api_with_mpi.py::test_llm_api_single_gpu_with_mpirun[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5434372)
|
||||
triton_server/test_triton.py::test_gpt_ib[gpt-ib] SKIP (https://nvbugs/5431116)
|
||||
disaggregated/test_workers.py::test_workers_kv_cache_events[TinyLlama-1.1B-Chat-v1.0] SKIP (https://nvbugs/5457504)
|
||||
accuracy/test_llm_api.py::TestMistralNemo12B::test_fp8 SKIP (https://nvbugs/5413197)
|
||||
triton_server/test_triton.py::test_gpt_ib_streaming[gpt-ib-streaming] SKIP (https://nvbugs/5371349)
|
||||
triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning] SKIP (https://nvbugs/5445624)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user