mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
[None][chore] Update constaintfor release (#8211)
Signed-off-by: Ivy Zhang <25222398+crazydemo@users.noreply.github.com>
This commit is contained in:
parent
04bded7c40
commit
bca5e29387
@ -210,6 +210,7 @@ class TestLlama3_3NemotronSuper49Bv1(CliFlowAccuracyTestHarness):
|
||||
EXAMPLE_FOLDER = "models/core/nemotron_nas"
|
||||
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
def test_auto_dtype_tp2(self):
|
||||
self.run(tasks=[MMLU(self.MODEL_NAME)], tp_size=2, dtype='auto')
|
||||
|
||||
|
||||
@ -289,6 +289,8 @@ class TestMixtral8x7BInstruct(LlmapiAccuracyTestHarness):
|
||||
MODEL_PATH = f"{llm_models_root()}/Mixtral-8x7B-Instruct-v0.1"
|
||||
|
||||
@skip_post_blackwell
|
||||
@pytest.mark.skip_less_device(2)
|
||||
@pytest.mark.skip_less_device_memory(80000)
|
||||
def test_awq_tp2(self):
|
||||
quant_config = QuantConfig(quant_algo=QuantAlgo.W4A16_AWQ)
|
||||
with LLM(self.MODEL_PATH,
|
||||
|
||||
@ -2401,9 +2401,10 @@ class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):
|
||||
task.evaluate(llm)
|
||||
task = GSM8K(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = GPQADiamond(self.MODEL_NAME)
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
# skip gpqa test due to time consuming
|
||||
# task = GPQADiamond(self.MODEL_NAME)
|
||||
# task.evaluate(llm,
|
||||
# extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
|
||||
@skip_pre_hopper
|
||||
@pytest.mark.skip_device_not_contain(["H100", "B200"])
|
||||
@ -2415,9 +2416,10 @@ class TestLlama3_1NemotronNano8Bv1(LlmapiAccuracyTestHarness):
|
||||
task.evaluate(llm)
|
||||
task = GSM8K(self.MODEL_NAME)
|
||||
task.evaluate(llm)
|
||||
task = GPQADiamond(self.MODEL_NAME)
|
||||
task.evaluate(llm,
|
||||
extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
# skip gpqa test due to time consuming
|
||||
# task = GPQADiamond(self.MODEL_NAME)
|
||||
# task.evaluate(llm,
|
||||
# extra_evaluator_kwargs=dict(apply_chat_template=True))
|
||||
|
||||
|
||||
class TestNemotronUltra(LlmapiAccuracyTestHarness):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user