diff --git a/jenkins/L0_MergeRequest.groovy b/jenkins/L0_MergeRequest.groovy
index 594adcf168..bcdaba2dd4 100644
--- a/jenkins/L0_MergeRequest.groovy
+++ b/jenkins/L0_MergeRequest.groovy
@@ -524,6 +524,8 @@ def getMultiGpuFileChanged(pipeline, testFilter, globalVars)
         "tests/integration/test_lists/test-db/l0_dgx_h200.yml",
         "tests/unittest/_torch/multi_gpu/",
         "tests/unittest/_torch/multi_gpu_modeling/",
+        "tests/unittest/llmapi/test_llm_pytorch.py",
+        "tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py",
         "jenkins/L0_Test.groovy",
     ]
 
diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
index 85fa1e8734..786e28d0b0 100644
--- a/jenkins/L0_Test.groovy
+++ b/jenkins/L0_Test.groovy
@@ -1199,6 +1199,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
         "A30-TensorRT-2": ["a30", "l0_a30", 2, 4],
         "A30-TensorRT-3": ["a30", "l0_a30", 3, 4],
         "A30-TensorRT-4": ["a30", "l0_a30", 4, 4],
+        "A100X-PyTorch-1": ["a100x", "l0_a100", 1, 1],
         "A100X-TensorRT-1": ["a100x", "l0_a100", 1, 4],
         "A100X-TensorRT-2": ["a100x", "l0_a100", 2, 4],
         "A100X-TensorRT-3": ["a100x", "l0_a100", 3, 4],
diff --git a/tests/integration/test_lists/test-db/l0_a100.yml b/tests/integration/test_lists/test-db/l0_a100.yml
index f4e87b7771..d4f6cbbda9 100644
--- a/tests/integration/test_lists/test-db/l0_a100.yml
+++ b/tests/integration/test_lists/test-db/l0_a100.yml
@@ -1,5 +1,19 @@
 version: 0.0.1
 l0_a100:
+- condition:
+    ranges:
+      system_gpu_count:
+        gte: 1
+        lte: 1
+    wildcards:
+      gpu:
+      - '*a100*'
+      linux_distribution_name: ubuntu*
+    terms:
+      stage: pre_merge
+      backend: "pytorch"
+  tests:
+    - unittest/llmapi/test_llm_pytorch.py
 - condition:
     ranges:
       system_gpu_count:
diff --git a/tests/integration/test_lists/test-db/l0_dgx_h100.yml b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
index 0cfb98a021..cc5e313a9b 100644
--- a/tests/integration/test_lists/test-db/l0_dgx_h100.yml
+++ b/tests/integration/test_lists/test-db/l0_dgx_h100.yml
@@ -16,6 +16,7 @@ l0_dgx_h100:
   # ------------- PyTorch tests ---------------
   - unittest/_torch/multi_gpu
   - unittest/_torch/auto_deploy/unit/multigpu
+  - unittest/llmapi/test_llm_multi_gpu_pytorch.py -m "gpu4"
   - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM]
   - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp4-attn_backend=TRTLLM-torch_compile]
   - accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_bfloat16_4gpus[tp2pp2-attn_backend=TRTLLM]
diff --git a/tests/unittest/llmapi/test_llm.py b/tests/unittest/llmapi/test_llm.py
index f74a6e0074..4ab35dffb6 100644
--- a/tests/unittest/llmapi/test_llm.py
+++ b/tests/unittest/llmapi/test_llm.py
@@ -1099,12 +1099,8 @@ def tinyllama_guided_decoding_test_harness(**llm_kwargs):
 
 @force_ampere
 @pytest.mark.part0
-@pytest.mark.parametrize("backend", ['tensorrt', 'pytorch'])
-def test_tinyllama_guided_decoding(backend: str):
-    llm_kwargs = {}
-    if backend == 'pytorch':
-        llm_kwargs['backend'] = 'pytorch'
-    tinyllama_guided_decoding_test_harness(**llm_kwargs)
+def test_tinyllama_guided_decoding():
+    tinyllama_guided_decoding_test_harness()
 
 
 @pytest.mark.part0
@@ -1766,18 +1762,13 @@ def llm_get_stats_test_harness(tp_size: int = 1,
     assert llm.get_stats(2)
 
 
-@pytest.mark.parametrize("return_context_logits, pytorch_backend, use_overlap",
-                         [
-                             (True, False, False),
-                             (False, False, False),
-                             (False, True, False),
-                             (False, True, True),
-                         ])
-def test_llm_get_stats(return_context_logits, pytorch_backend, use_overlap):
+@pytest.mark.parametrize("return_context_logits", [
+    (True, ),
+    (False, ),
+])
+def test_llm_get_stats(return_context_logits):
     llm_get_stats_test_harness(tp_size=1,
-                               return_context_logits=return_context_logits,
-                               pytorch_backend=pytorch_backend,
-                               use_overlap=use_overlap)
+                               return_context_logits=return_context_logits)
 
 
 def llm_get_stats_async_test_harness(tp_size: int = 1,
@@ -1833,20 +1824,15 @@ def llm_get_stats_async_test_harness(tp_size: int = 1,
     asyncio.run(main())
 
 
-@pytest.mark.parametrize("return_context_logits, pytorch_backend, use_overlap",
-                         [
-                             (True, False, False),
-                             (False, False, False),
-                             (False, True, False),
-                             (False, True, True),
-                         ])
-def test_llm_get_stats_async(return_context_logits, pytorch_backend,
-                             use_overlap):
+@pytest.mark.parametrize("return_context_logits", [
+    (True, ),
+    (False, ),
+])
+def test_llm_get_stats_async(return_context_logits):
     llm_get_stats_async_test_harness(
         tp_size=1,
         return_context_logits=return_context_logits,
-        pytorch_backend=pytorch_backend,
-        use_overlap=use_overlap)
+    )
 
 
 def test_llm_chunked_prefill():
@@ -1986,10 +1972,9 @@ def run_llm_with_postprocess_parallel_and_result_handler(
 
 
 @pytest.mark.parametrize("streaming", [True, False])
-@pytest.mark.parametrize("backend", [None, "pytorch"])
-def test_llm_with_postprocess_parallel_and_result_handler(streaming, backend):
+def test_llm_with_postprocess_parallel_and_result_handler(streaming):
     run_llm_with_postprocess_parallel_and_result_handler(streaming,
-                                                         backend,
+                                                         backend=None,
                                                          tp_size=1)
 
 
@@ -2041,41 +2026,6 @@ def test_llm_abort_request(llm_for_sampling_params,
                           sampling_params=sampling_params)
 
 
-@force_ampere
-@pytest.mark.parametrize(
-    "sampling_params",
-    [
-        SamplingParams()  # pytorch only supports n=1
-    ])
-def test_llm_abort_request_pytorch(sampling_params):
-    from tensorrt_llm._torch import LLM as LLM_torch
-    llm = LLM_torch(model=llama_model_path,
-                    kv_cache_config=global_kvcache_config)
-    run_llm_abort_request(llm=llm, sampling_params=sampling_params)
-
-
-def test_llm_reward_model_pytorch():
-    rm_model_path = get_model_path("Qwen2.5-Math-PRM-7B")
-    tokenizer = TransformersTokenizer.from_pretrained(rm_model_path)
-    tokenized_input = tokenizer(prompts, return_tensors="pt")["input_ids"]
-
-    from tensorrt_llm._torch import LLM as LLM_torch
-    from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
-    llm = LLM_torch(
-        model=rm_model_path,
-        pytorch_backend_config=PyTorchConfig(attn_backend="VANILLA"))
-
-    sampling_params = SamplingParams(return_context_logits=True)
-
-    outputs = llm.generate(prompts, sampling_params)
-    scores = outputs[0].context_logits
-
-    print(scores)
-
-    assert scores.shape == (tokenized_input.shape[1], 2)
-    assert not outputs[0].outputs[0].text
-
-
 def test_llm_sampling_params_n_lt_max_batch_size():
     sampling_params = SamplingParams(n=2, best_of=1)
     build_config = BuildConfig(max_batch_size=1, max_seq_len=1024)
@@ -2117,7 +2067,3 @@ def test_llm_api_draft_target():
         prompt = output.prompt
         generated_text = output.outputs[0].text
         print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-
-
-if __name__ == '__main__':
-    test_llm_with_postprocess_parallel_and_result_handler(True, "pytorch")
diff --git a/tests/unittest/llmapi/test_llm_multi_gpu.py b/tests/unittest/llmapi/test_llm_multi_gpu.py
index e03672e7b3..39f398f359 100644
--- a/tests/unittest/llmapi/test_llm_multi_gpu.py
+++ b/tests/unittest/llmapi/test_llm_multi_gpu.py
@@ -234,16 +234,11 @@ def test_tinyllama_logits_processor_tp2pp2():
 
 @pytest.mark.gpu4
 @pytest.mark.part0
-@pytest.mark.parametrize("backend", ['tensorrt', 'pytorch'])
-def test_tinyllama_guided_decoding_tp2pp2(backend: str):
-    llm_kwargs = {}
-    if backend == 'pytorch':
-        llm_kwargs['backend'] = 'pytorch'
+def test_tinyllama_guided_decoding_tp2pp2():
     tinyllama_guided_decoding_test_harness(
         tensor_parallel_size=2,
         pipeline_parallel_size=2,
-        kv_cache_config=global_kv_cache_config,
-        **llm_kwargs)
+        kv_cache_config=global_kv_cache_config)
 
 
 @pytest.mark.gpu2
diff --git a/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py b/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py
new file mode 100644
index 0000000000..9689b765f4
--- /dev/null
+++ b/tests/unittest/llmapi/test_llm_multi_gpu_pytorch.py
@@ -0,0 +1,15 @@
+import pytest
+
+# isort: off
+from .test_llm import (global_kvcache_config,
+                       tinyllama_guided_decoding_test_harness)
+# isort: on
+
+
+@pytest.mark.gpu4
+def test_tinyllama_guided_decoding_tp2pp2():
+    tinyllama_guided_decoding_test_harness(
+        tensor_parallel_size=2,
+        pipeline_parallel_size=2,
+        kv_cache_config=global_kvcache_config,
+        backend='pytorch')
diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py
new file mode 100644
index 0000000000..20ee44a093
--- /dev/null
+++ b/tests/unittest/llmapi/test_llm_pytorch.py
@@ -0,0 +1,84 @@
+import pytest
+
+from tensorrt_llm.llmapi.tokenizer import TransformersTokenizer
+from tensorrt_llm.sampling_params import SamplingParams
+
+# isort: off
+from .test_llm import (get_model_path, global_kvcache_config, llama_model_path,
+                       llm_get_stats_async_test_harness,
+                       llm_get_stats_test_harness, prompts,
+                       run_llm_abort_request,
+                       run_llm_with_postprocess_parallel_and_result_handler,
+                       tinyllama_guided_decoding_test_harness)
+from utils.util import force_ampere
+# isort: on
+
+
+@force_ampere
+def test_tinyllama_guided_decoding():
+    tinyllama_guided_decoding_test_harness(backend="pytorch")
+
+
+@pytest.mark.parametrize("return_context_logits, use_overlap", [
+    (False, False),
+    (False, True),
+])
+def test_llm_get_stats(return_context_logits, use_overlap):
+    llm_get_stats_test_harness(tp_size=1,
+                               return_context_logits=return_context_logits,
+                               pytorch_backend=True,
+                               use_overlap=use_overlap)
+
+
+@pytest.mark.parametrize("return_context_logits, use_overlap", [
+    (False, False),
+    (False, True),
+])
+def test_llm_get_stats_async(return_context_logits, use_overlap):
+    llm_get_stats_async_test_harness(
+        tp_size=1,
+        return_context_logits=return_context_logits,
+        pytorch_backend=True,
+        use_overlap=use_overlap)
+
+
+@force_ampere
+@pytest.mark.parametrize(
+    "sampling_params",
+    [
+        SamplingParams()  # pytorch only supports n=1
+    ])
+def test_llm_abort_request(sampling_params):
+    from tensorrt_llm._torch import LLM as LLM_torch
+    llm = LLM_torch(model=llama_model_path,
+                    kv_cache_config=global_kvcache_config)
+    run_llm_abort_request(llm=llm, sampling_params=sampling_params)
+
+
+def test_llm_reward_model():
+    rm_model_path = get_model_path("Qwen2.5-Math-PRM-7B")
+    tokenizer = TransformersTokenizer.from_pretrained(rm_model_path)
+    tokenized_input = tokenizer(prompts, return_tensors="pt")["input_ids"]
+
+    from tensorrt_llm._torch import LLM as LLM_torch
+    from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
+    llm = LLM_torch(
+        model=rm_model_path,
+        pytorch_backend_config=PyTorchConfig(attn_backend="VANILLA"))
+
+    sampling_params = SamplingParams(return_context_logits=True)
+
+    outputs = llm.generate(prompts, sampling_params)
+    scores = outputs[0].context_logits
+
+    print(scores)
+
+    assert scores.shape == (tokenized_input.shape[1], 2)
+    assert not outputs[0].outputs[0].text
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_llm_with_postprocess_parallel_and_result_handler(streaming):
+    run_llm_with_postprocess_parallel_and_result_handler(streaming,
+                                                         "pytorch",
+                                                         tp_size=1)