TensorRT-LLMs/examples/auto_deploy/.vscode/launch.json

{
    "version": "0.2.0",
    "configurations": [
        {
            "name": "build_and_run_ad.py",
            "type": "debugpy",
            "request": "launch",
            "program": "build_and_run_ad.py",
            "args": [
                "--model=meta-llama/Meta-Llama-3.1-8B-Instruct",
                "--args.world-size=2",
                "--args.runtime=demollm",
                "--args.compile-backend=torch-simple",
                "--args.attn-page-size=16",
                "--args.attn-backend=flashinfer",
                "--args.model-factory=AutoModelForCausalLM",
                "--benchmark.enabled=false",
                "--prompt.batch-size=2",
                "--args.model-kwargs",
                "num_hidden_layers=3,num_attention_heads=32",
            ],
            "console": "integratedTerminal",
            "justMyCode": false,
            "cwd": "${workspaceFolder}/examples/auto_deploy"
        },
        {
            "name": "Python: Debug Tests",
            "type": "debugpy",
            "request": "launch",
            "program": "${file}",
            "purpose": [
                "debug-test",
            ],
            "console": "integratedTerminal",
            "justMyCode": false
        },
    ]
}