clean (#3061)

Signed-off-by: Enwei Zhu <21126786+syuoni@users.noreply.github.com> Co-authored-by: QI JUN <22017000+QiJune@users.noreply.github.com>
2026-02-03 09:41:30 +08:00 · 2025-03-25 21:55:08 +08:00 · 2025-03-25 21:55:08 +08:00 · f93ac9672e
commit f93ac9672e
parent 8ee840159b
5 changed files with 29 additions and 49 deletions
--- a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml
+++ b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml
@ -21,17 +21,6 @@ gpt2-medium:
    accuracy: 22.249
 gpt-next:
  - accuracy: 25.516
-EleutherAI/gpt-j-6b:
-  - accuracy: 27.883
-  - dtype: float32
-    accuracy: 26.449
-  - quant_algo: FP8
-    kv_cache_quant_algo: FP8
-    accuracy: 27.883
-  - extra_acc_spec: max_attention_window_size=960
-    accuracy: 27.822
-  - extra_acc_spec: max_attention_window_size=960;beam_width=4
-    accuracy: 0
 microsoft/phi-2:
  - accuracy: 31.255
 microsoft/Phi-3-mini-4k-instruct:
@ -77,6 +66,8 @@ meta-llama/Llama-2-7b-hf:
    accuracy: 27.999
 TinyLlama/TinyLlama-1.1B-Chat-v1.0:
  - accuracy: 28.328
+  - dtype: float32
+    accuracy: 28.082
  - quant_algo: W8A16
    accuracy: 28.003
  - quant_algo: W8A16
@ -141,6 +132,10 @@ meta-llama/Llama-3.2-1B:
  - quant_algo: FP8_PER_CHANNEL_PER_TOKEN
    extra_acc_spec: meta_recipe
    accuracy: 27.614
+  - extra_acc_spec: max_attention_window_size=960
+    accuracy: 27.259
+  - extra_acc_spec: max_attention_window_size=960;beam_width=4
+    accuracy: 0
 mistralai/Mixtral-8x7B-v0.1:
  - accuracy: 28.810
  - quant_algo: FP8
--- a/tests/integration/defs/accuracy/test_accuracy.py
+++ b/tests/integration/defs/accuracy/test_accuracy.py
@ -174,36 +174,6 @@ class TestMinitron4BBase(AccuracyTestHarness):
                 kv_cache_quant_algo=QuantAlgo.FP8)


-class TestGptJ6B(AccuracyTestHarness):
-    MODEL_NAME = "EleutherAI/gpt-j-6b"
-    MODEL_PATH = f"{llm_models_root()}/gpt-j-6b"
-    EXAMPLE_FOLDER = "models/contrib/gptj"
-
-    def test_auto_dtype(self):
-        # float16
-        self.run(dtype='auto')
-
-    def test_float32(self):
-        self.run(dtype='float32')
-
-    @skip_pre_ada
-    def test_fp8(self):
-        self.run(quant_algo=QuantAlgo.FP8, kv_cache_quant_algo=QuantAlgo.FP8)
-
-    @pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5166352")
-    def test_cyclic_kv_cache(self):
-        self.run(extra_acc_spec="max_attention_window_size=960",
-                 extra_summarize_args=["--max_attention_window_size=960"])
-
-    @pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5166352")
-    def test_cyclic_kv_cache_beam_search(self):
-        self.run(extra_acc_spec="max_attention_window_size=960;beam_width=4",
-                 extra_build_args=["--max_beam_width=4"],
-                 extra_summarize_args=[
-                     "--max_attention_window_size=960", "--num_beams=4"
-                 ])
-
-
 class TestPhi2(AccuracyTestHarness):
    MODEL_NAME = "microsoft/phi-2"
    MODEL_PATH = f"{llm_models_root()}/phi-2"
@ -486,6 +456,9 @@ class TestTinyLlama1_1BChat(AccuracyTestHarness):
    def test_auto_dtype(self):
        self.run(dtype='auto')

+    def test_float32(self):
+        self.run(dtype='float32')
+
    @pytest.mark.parametrize("precision", ["int8", "int4"])
    def test_weight_only(self, precision: str):
        quant_algo = QuantAlgo.W8A16 if precision == "int8" else QuantAlgo.W4A16
@ -768,6 +741,18 @@ class TestLlama3_2_1B(AccuracyTestHarness):
            self.extra_summarize_args = [f"--gpu_weights_percent={gpu_percent}"]
            self.evaluate()

+    def test_cyclic_kv_cache(self):
+        self.run(extra_acc_spec="max_attention_window_size=960",
+                 extra_summarize_args=["--max_attention_window_size=960"])
+
+    @pytest.mark.skip(reason="https://nvbugspro.nvidia.com/bug/5166352")
+    def test_cyclic_kv_cache_beam_search(self):
+        self.run(extra_acc_spec="max_attention_window_size=960;beam_width=4",
+                 extra_build_args=["--max_beam_width=4"],
+                 extra_summarize_args=[
+                     "--max_attention_window_size=960", "--num_beams=4"
+                 ])
+

 class TestMixtral8x7B(AccuracyTestHarness):
    MODEL_NAME = "mistralai/Mixtral-8x7B-v0.1"
--- a/tests/integration/test_lists/qa/examples_test_list.txt
+++ b/tests/integration/test_lists/qa/examples_test_list.txt
@ -349,9 +349,6 @@ accuracy/test_accuracy.py::TestStarcoder2_15B::test_smooth_quant_ootb
 accuracy/test_accuracy.py::TestGptNext::test_auto_dtype
 accuracy/test_accuracy.py::TestMinitron4BBase::test_auto_dtype
 accuracy/test_accuracy.py::TestMinitron4BBase::test_fp8
-accuracy/test_accuracy.py::TestGptJ6B::test_float32
-accuracy/test_accuracy.py::TestGptJ6B::test_cyclic_kv_cache
-accuracy/test_accuracy.py::TestGptJ6B::test_cyclic_kv_cache_beam_search
 accuracy/test_accuracy.py::TestPhi2::test_auto_dtype
 accuracy/test_accuracy.py::TestPhi2::test_tp2
 accuracy/test_accuracy.py::TestPhi3Mini4kInstruct::test_auto_dtype
@ -388,6 +385,7 @@ accuracy/test_accuracy.py::TestLlama2_7B::test_int4_awq_tp2
 accuracy/test_accuracy.py::TestLlama2_7B::test_int4_awq_pre_quantized_tp2
 accuracy/test_accuracy.py::TestLlama2_7B::test_int4_gptq_pre_quantized_tp2
 accuracy/test_accuracy.py::TestLlama2_7B::test_weight_sparsity
+accuracy/test_accuracy.py::TestTinyLlama1_1BChat::test_float32
 accuracy/test_accuracy.py::TestTinyLlama1_1BChat::test_weight_only[int8]
 accuracy/test_accuracy.py::TestTinyLlama1_1BChat::test_weight_only[int4]
 accuracy/test_accuracy.py::TestTinyLlama1_1BChat::test_weight_only_int8_kv_cache[int8]
@ -422,6 +420,8 @@ accuracy/test_accuracy.py::TestLlama3_2_1B::test_fp8
 accuracy/test_accuracy.py::TestLlama3_2_1B::test_fp8_pp2
 accuracy/test_accuracy.py::TestLlama3_2_1B::test_fp8_rowwise
 accuracy/test_accuracy.py::TestLlama3_2_1B::test_weight_streaming[1.0]
+accuracy/test_accuracy.py::TestLlama3_2_1B::test_cyclic_kv_cache
+accuracy/test_accuracy.py::TestLlama3_2_1B::test_cyclic_kv_cache_beam_search
 accuracy/test_accuracy.py::TestMixtral8x7B::test_fp8_tp2pp2
 accuracy/test_accuracy.py::TestMixtral8x7B::test_fp8_tp2pp2_manage_weights
 accuracy/test_accuracy.py::TestMixtral8x7B::test_nvfp4_pre_quantized
--- a/tests/integration/test_lists/test-db/l0_a10.yml
+++ b/tests/integration/test_lists/test-db/l0_a10.yml
@ -112,9 +112,9 @@ l0_a10:
  - examples/test_mamba.py::test_llm_mamba_1gpu[mamba2-130m-float16-disable_gemm_plugin]
  - examples/test_mamba.py::test_llm_mamba_1gpu[mamba-codestral-7B-v0.1-float16-disable_gemm_plugin] # 4 mins
  - accuracy/test_accuracy.py::TestGpt2::test_context_fmha_disabled # 1 min
-  - accuracy/test_accuracy.py::TestGptJ6B::test_cyclic_kv_cache
-  - accuracy/test_accuracy.py::TestGptJ6B::test_cyclic_kv_cache_beam_search
  - accuracy/test_accuracy.py::TestLlama7B::test_auto_dtype # 2 mins
+  - accuracy/test_accuracy.py::TestLlama3_2_1B::test_cyclic_kv_cache
+  - accuracy/test_accuracy.py::TestLlama3_2_1B::test_cyclic_kv_cache_beam_search
  - accuracy/test_accuracy.py::TestGpt2::test_attention_ootb
  - accuracy/test_accuracy.py::TestStarcoder2_3B::test_auto_dtype
  - accuracy/test_accuracy.py::TestPhi2::test_auto_dtype # 2 mins
--- a/tests/integration/test_lists/test-db/l0_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_h100.yml
@ -123,9 +123,6 @@ l0_h100:
  - accuracy/test_accuracy.py::TestGpt2::test_cuda_graph # 1 min
  - accuracy/test_accuracy.py::TestGpt2::test_context_fmha_disabled # 1 min
  - accuracy/test_accuracy.py::TestGptNext::test_auto_dtype # 1.5 mins
-  - accuracy/test_accuracy.py::TestGptJ6B::test_float32 # 4 mins
-  - accuracy/test_accuracy.py::TestGptJ6B::test_cyclic_kv_cache
-  - accuracy/test_accuracy.py::TestGptJ6B::test_cyclic_kv_cache_beam_search
  - accuracy/test_accuracy.py::TestSantacoder::test_auto_dtype # 1.5 mins
  - accuracy/test_accuracy.py::TestMamba130M::test_auto_dtype # 1 min
  - accuracy/test_accuracy.py::TestVicuna7B::test_lookahead # 5 mins
@ -139,11 +136,14 @@ l0_h100:
  - accuracy/test_accuracy.py::TestGpt2::test_attention_ootb
  - accuracy/test_accuracy.py::TestStarcoder2_3B::test_auto_dtype
  - accuracy/test_accuracy.py::TestMinitron4BBase::test_fp8
+  - accuracy/test_accuracy.py::TestTinyLlama1_1BChat::test_float32
  - accuracy/test_accuracy.py::TestLlama3_8BInstructGradient1048k::test_long_context
  - accuracy/test_accuracy.py::TestLlama3_8BInstructGradient1048k::test_long_context_ppl
  - accuracy/test_accuracy.py::TestLlama3_1_8B::test_fp8
  - accuracy/test_accuracy.py::TestLlama3_1_8B::test_autoq
  - accuracy/test_accuracy.py::TestLlama3_2_1B::test_auto_dtype
+  - accuracy/test_accuracy.py::TestLlama3_2_1B::test_cyclic_kv_cache
+  - accuracy/test_accuracy.py::TestLlama3_2_1B::test_cyclic_kv_cache_beam_search
  - accuracy/test_accuracy.py::TestGemma2_9BIt::test_auto_dtype
  - examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b]
  - examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-disable_weight_only]