TensorRT-LLMs/scripts/gen_cuda_headers_for_xqa.py
2024-08-29 17:25:07 +08:00

143 lines
4.8 KiB
Python
Executable File

#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
#
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
# property and proprietary rights in and to this material, related
# documentation and any modifications thereto. Any use, reproduction,
# disclosure or distribution of this material and related documentation
# without an express license agreement from NVIDIA CORPORATION or
# its affiliates is strictly prohibited.
#
# Embed system CUDA headers in c++ arries.
import argparse
import os
from collections import namedtuple
from pathlib import Path
parser = argparse.ArgumentParser(
description='Embed system CUDA headers in cpp arries',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--output_file', help='Output c++ file name', required=True)
parser.add_argument(
'--input_files',
help='Input CUDA header file name list, separated by ","',
default=
'cuda_bf16.h,cuda_bf16.hpp,cuda_fp16.h,cuda_fp16.hpp,cuda_fp8.h,cuda_fp8.hpp,vector_types.h,vector_functions.h'
)
parser.add_argument('--cuda_root',
help='CUDA Toolkit path',
default='/usr/local/cuda')
parser.add_argument(
'--chunk-size',
type=int,
help=
'Max length for each literal string in the output. Strings would be split into multiple smaller substrings if the length exceeds chunk-size.',
default=80)
args = parser.parse_args()
TEMPLATE_PROLOGUE = '''/*
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#pragma once
namespace tensorrt_llm {
namespace kernels {
'''
# Prepend the magic string to disable NVRTC encryption.
TEMPLATE_CONTENT = '''constexpr const char* {content_var_name} = "j3iAA#$)7"{content};
constexpr const char* {fname_var_name} = "{fname}";
'''
TEMPLATE_EPILOGUE = '''}
}
'''
# Input: "ThisIsAString.h" / "this_is_a_string.h"
# Output: "this_is_a_string_h"
def get_canonized_str(s: str):
tokens = []
n = len(s)
i = 0
while i < n and not s[i].isalpha() and not s[i].isdigit():
i += 1
while i < n:
j = i + 1
while j < n and (s[j].islower() or s[j].isdigit()):
j += 1
tokens.append(s[i:j].lower())
while j < n and not s[j].isalpha() and not s[j].isdigit():
j += 1
i = j
return '_'.join(tokens)
# Returned string includes the surrounding double quotation marks.
def convert_to_cpp_raw_str(s: str):
chunk_size = args.chunk_size
if len(s) <= chunk_size:
def stringify(x: bytes):
return "\\" + format(x, "03o")
b = bytes(s, 'utf-8')
return '"' + ''.join(map(stringify, b)) + '"'
else:
string_array = []
i = 0
while i < len(s):
string_array.append(s[i:i + chunk_size])
i += chunk_size
return '\n'.join(map(convert_to_cpp_raw_str, string_array))
Entry = namedtuple('Entry', ['content_var_name', 'fname_var_name'])
entries = []
output_content = ''
output_content += TEMPLATE_PROLOGUE
for input_file in args.input_files.split(','):
fname_var_name = get_canonized_str(input_file) + '_fname'
content_var_name = get_canonized_str(input_file) + '_content'
input_full_path = os.path.join(args.cuda_root, 'include', input_file)
with open(input_full_path, 'r') as f:
input_content = f.read()
output_content += TEMPLATE_CONTENT.format(
content_var_name=content_var_name,
content=convert_to_cpp_raw_str(input_content),
fname_var_name=fname_var_name,
fname=input_file)
entries.append(
Entry(content_var_name=content_var_name, fname_var_name=fname_var_name))
output_content += "constexpr char const* cuda_headers_content[] = {\n"
for entry in entries:
output_content += " " + entry.content_var_name + ",\n"
output_content += "};\n"
output_content += "constexpr char const* cuda_headers_name[] = {\n"
for entry in entries:
output_content += " " + entry.fname_var_name + ",\n"
output_content += "};\n"
output_content += TEMPLATE_EPILOGUE
output_dir = os.path.dirname(args.output_file)
Path(output_dir).mkdir(parents=True, exist_ok=True)
with open(args.output_file, 'w') as f:
f.write(output_content)