[https://nvbugs/5458874][fix] Fix Nemotron-H flaky CUDA graph / overlap scheduler test (#6996)

Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com>
2026-01-14 06:27:45 +08:00 · 2025-08-19 15:45:06 +03:00 · 2025-08-19 15:45:06 +03:00 · f0bfb49219
commit f0bfb49219
parent a54c53652b
1 changed files with 14 additions and 4 deletions
--- a/tests/unittest/_torch/modeling/test_modeling_nemotron_h.py
+++ b/tests/unittest/_torch/modeling/test_modeling_nemotron_h.py
@ -247,7 +247,6 @@ def test_nemotron_h_correctness(mamba_ssm_cache_dtype):
        nemotron_h.shutdown()


-@pytest.mark.skip(reason="https://nvbugs/5458874")
 def test_nemotron_h_cuda_graph_overlap_scheduler():
    prompts = [
        "The sky is blue because",
@ -317,12 +316,23 @@ def test_nemotron_h_cuda_graph_overlap_scheduler():
            f"Prompt {i}: with/without CG (no overlap) logprobs for all selected tokens {x}"
        )

+        # Similar comparison for with / without overlap scheduler, compare logits of first generation step (2nd generated token)
        # overlap scheduler should have no effect on all logits - low tolerance
        torch.testing.assert_close(
-            with_cg_no_overlap.outputs[0].generation_logits,
-            with_cg_with_overlap.outputs[0].generation_logits,
+            with_cg_no_overlap.outputs[0].generation_logits[1, :],
+            with_cg_with_overlap.outputs[0].generation_logits[1, :],
            atol=0.05,
            rtol=0.05,
            msg=lambda x:
-            f"Prompt {i}: with/without overlap (no CG) all generation logits {x}"
+            f"Prompt {i}: with/without overlap scheduler (with CG) logits for first generated step {x}"
+        )
+
+        # compare logprobs of all generated tokens
+        torch.testing.assert_close(
+            extract_decode_logprobs(with_cg_no_overlap),
+            extract_decode_logprobs(with_cg_with_overlap),
+            atol=0.05,
+            rtol=0.05,
+            msg=lambda x:
+            f"Prompt {i}: with/without overlap scheduler (with CG) logprobs for all selected tokens {x}"
        )