mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
143 lines
4.8 KiB
Python
Executable File
143 lines
4.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
# SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
|
|
#
|
|
# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
|
# property and proprietary rights in and to this material, related
|
|
# documentation and any modifications thereto. Any use, reproduction,
|
|
# disclosure or distribution of this material and related documentation
|
|
# without an express license agreement from NVIDIA CORPORATION or
|
|
# its affiliates is strictly prohibited.
|
|
#
|
|
# Embed system CUDA headers in c++ arries.
|
|
|
|
import argparse
|
|
import os
|
|
from collections import namedtuple
|
|
from pathlib import Path
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Embed system CUDA headers in cpp arries',
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
|
|
parser.add_argument('--output_file', help='Output c++ file name', required=True)
|
|
parser.add_argument(
|
|
'--input_files',
|
|
help='Input CUDA header file name list, separated by ","',
|
|
default=
|
|
'cuda_bf16.h,cuda_bf16.hpp,cuda_fp16.h,cuda_fp16.hpp,cuda_fp8.h,cuda_fp8.hpp,vector_types.h,vector_functions.h'
|
|
)
|
|
parser.add_argument('--cuda_root',
|
|
help='CUDA Toolkit path',
|
|
default='/usr/local/cuda')
|
|
parser.add_argument(
|
|
'--chunk-size',
|
|
type=int,
|
|
help=
|
|
'Max length for each literal string in the output. Strings would be split into multiple smaller substrings if the length exceeds chunk-size.',
|
|
default=80)
|
|
|
|
args = parser.parse_args()
|
|
|
|
TEMPLATE_PROLOGUE = '''/*
|
|
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
* SPDX-License-Identifier: NVIDIA TensorRT Source Code License Agreement
|
|
*
|
|
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
|
|
* property and proprietary rights in and to this material, related
|
|
* documentation and any modifications thereto. Any use, reproduction,
|
|
* disclosure or distribution of this material and related documentation
|
|
* without an express license agreement from NVIDIA CORPORATION or
|
|
* its affiliates is strictly prohibited.
|
|
*/
|
|
|
|
#pragma once
|
|
namespace tensorrt_llm {
|
|
namespace kernels {
|
|
'''
|
|
|
|
# Prepend the magic string to disable NVRTC encryption.
|
|
TEMPLATE_CONTENT = '''constexpr const char* {content_var_name} = "j3iAA#$)7"{content};
|
|
constexpr const char* {fname_var_name} = "{fname}";
|
|
'''
|
|
|
|
TEMPLATE_EPILOGUE = '''}
|
|
}
|
|
'''
|
|
|
|
|
|
# Input: "ThisIsAString.h" / "this_is_a_string.h"
|
|
# Output: "this_is_a_string_h"
|
|
def get_canonized_str(s: str):
|
|
tokens = []
|
|
n = len(s)
|
|
i = 0
|
|
while i < n and not s[i].isalpha() and not s[i].isdigit():
|
|
i += 1
|
|
while i < n:
|
|
j = i + 1
|
|
while j < n and (s[j].islower() or s[j].isdigit()):
|
|
j += 1
|
|
tokens.append(s[i:j].lower())
|
|
while j < n and not s[j].isalpha() and not s[j].isdigit():
|
|
j += 1
|
|
i = j
|
|
return '_'.join(tokens)
|
|
|
|
|
|
# Returned string includes the surrounding double quotation marks.
|
|
def convert_to_cpp_raw_str(s: str):
|
|
chunk_size = args.chunk_size
|
|
if len(s) <= chunk_size:
|
|
|
|
def stringify(x: bytes):
|
|
return "\\" + format(x, "03o")
|
|
|
|
b = bytes(s, 'utf-8')
|
|
return '"' + ''.join(map(stringify, b)) + '"'
|
|
else:
|
|
string_array = []
|
|
i = 0
|
|
while i < len(s):
|
|
string_array.append(s[i:i + chunk_size])
|
|
i += chunk_size
|
|
return '\n'.join(map(convert_to_cpp_raw_str, string_array))
|
|
|
|
|
|
Entry = namedtuple('Entry', ['content_var_name', 'fname_var_name'])
|
|
entries = []
|
|
|
|
output_content = ''
|
|
output_content += TEMPLATE_PROLOGUE
|
|
for input_file in args.input_files.split(','):
|
|
fname_var_name = get_canonized_str(input_file) + '_fname'
|
|
content_var_name = get_canonized_str(input_file) + '_content'
|
|
input_full_path = os.path.join(args.cuda_root, 'include', input_file)
|
|
with open(input_full_path, 'r') as f:
|
|
input_content = f.read()
|
|
output_content += TEMPLATE_CONTENT.format(
|
|
content_var_name=content_var_name,
|
|
content=convert_to_cpp_raw_str(input_content),
|
|
fname_var_name=fname_var_name,
|
|
fname=input_file)
|
|
entries.append(
|
|
Entry(content_var_name=content_var_name, fname_var_name=fname_var_name))
|
|
|
|
output_content += "constexpr char const* cuda_headers_content[] = {\n"
|
|
for entry in entries:
|
|
output_content += " " + entry.content_var_name + ",\n"
|
|
output_content += "};\n"
|
|
|
|
output_content += "constexpr char const* cuda_headers_name[] = {\n"
|
|
for entry in entries:
|
|
output_content += " " + entry.fname_var_name + ",\n"
|
|
output_content += "};\n"
|
|
|
|
output_content += TEMPLATE_EPILOGUE
|
|
|
|
output_dir = os.path.dirname(args.output_file)
|
|
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
with open(args.output_file, 'w') as f:
|
|
f.write(output_content)
|