[None][chore] Update constaintfor release (#8211)

Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
Ivy Zhang 2025-10-13 11:14:24 +08:00 committed by GitHub
parent 04bded7c40
commit bca5e29387
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 11 additions and 6 deletions

View File

@ -210,6 +210,7 @@ class TestLlama3_3NemotronSuper49Bv1(CliFlowAccuracyTestHarness):
EXAMPLE_FOLDER = "models/core/nemotron_nas"
@pytest.mark.skip_less_device(2)
@pytest.mark.skip_less_device_memory(80000)
def test_auto_dtype_tp2(self):
self.run(tasks=[MMLU(self.MODEL_NAME)], tp_size=2, dtype='auto')

View File

@ -289,6 +289,8 @@ class TestMixtral8x7BInstruct(LlmapiAccuracyTestHarness):
MODEL_PATH = f"{llm_models_root()}/Mixtral-8x7B-Instruct-v0.1"
@skip_post_blackwell
@pytest.mark.skip_less_device(2)
@pytest.mark.skip_less_device_memory(80000)
def test_awq_tp2(self):
quant_config = QuantConfig(quant_algo=QuantAlgo.W4A16_AWQ)
with LLM(self.MODEL_PATH,

View File

@ -2401,9 +2401,10 @@ class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
task = GPQADiamond(self.MODEL_NAME)
task.evaluate(llm,
extra_evaluator_kwargs=dict(apply_chat_template=True))
# skip gpqa test due to time consuming
# task = GPQADiamond(self.MODEL_NAME)
# task.evaluate(llm,
# extra_evaluator_kwargs=dict(apply_chat_template=True))
@skip_pre_hopper
@pytest.mark.skip_device_not_contain(["H100", "B200"])
@ -2415,9 +2416,10 @@ class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)
task = GPQADiamond(self.MODEL_NAME)
task.evaluate(llm,
extra_evaluator_kwargs=dict(apply_chat_template=True))
# skip gpqa test due to time consuming
# task = GPQADiamond(self.MODEL_NAME)
# task.evaluate(llm,
# extra_evaluator_kwargs=dict(apply_chat_template=True))
class TestNemotronUltra(LlmapiAccuracyTestHarness):