diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 778b1452a9..918f135c21 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1736,8 +1736,10 @@ def test_openai_mmencoder_example(llm_root, llm_venv): str(test_root / "_test_openai_mmencoder.py")]) -@pytest.mark.parametrize( - "model_name", ["meta-llama/Llama-3.1-8B-Instruct", "openai/gpt-oss-120b"]) +@pytest.mark.parametrize("model_name", [ + "meta-llama/Llama-3.1-8B-Instruct", + pytest.param("openai/gpt-oss-120b", marks=skip_pre_hopper) +]) def test_openai_chat_guided_decoding(llm_root, llm_venv, model_name: str): test_root = unittest_path() / "llmapi" / "apps" llm_venv.run_cmd([ diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index d0cf36da90..cce9f52c0a 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -344,7 +344,6 @@ examples/test_llama.py::test_llama_3_x_with_bf16_lora_torch[llama-3.2-1b-instruc accuracy/test_llm_api_autodeploy.py::TestNemotronSuperV3::test_bf16 SKIP (https://nvbugs/5838184) cpp/test_multi_gpu.py::test_cache_transceiver[8proc-mooncake_kvcache-90] SKIP (https://nvbugs/5838199) accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass-auto] SKIP (https://nvbugs/5838211) -test_e2e.py::test_openai_chat_guided_decoding[openai/gpt-oss-120b] SKIP (https://nvbugs/5836594) test_e2e.py::test_ptp_quickstart_advanced_ngram[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct] SKIP (https://nvbugs/5843112) accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] SKIP (https://nvbugs/5839028) unittest/kv_cache_manager_v2_tests/test_kv_cache_manager_v2.py::TestNoBatching::test_naive_0 SKIP (https://nvbugs/5841954) diff --git a/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py b/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py index 86a5cc8a3a..87091660c5 100644 --- a/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py +++ b/tests/unittest/llmapi/apps/_test_openai_chat_guided_decoding.py @@ -9,11 +9,16 @@ import jsonschema import openai import pytest import yaml +from utils.llm_data import llm_datasets_root from ..test_llm import get_model_path from .openai_server import RemoteOpenAIServer pytestmark = pytest.mark.threadleak(enabled=False) +os.environ['TIKTOKEN_RS_CACHE_DIR'] = os.path.join(llm_datasets_root(), + 'tiktoken_vocab') +os.environ['TIKTOKEN_ENCODINGS_BASE'] = os.path.join(llm_datasets_root(), + 'tiktoken_vocab') @pytest.fixture(