mirror of
https://github.com/NVIDIA/TensorRT-LLM.git
synced 2026-01-14 06:27:45 +08:00
* Adding two-shot allreduce kernel and mnnvl multicasting buffergit gffe Signed-off-by: Shiyu Li <shili@nvidia.com> Adding comments Signed-off-by: Shiyu Li <shili@nvidia.com> Add unittest of the twoshot kernel. Signed-off-by: Shiyu Li <shili@nvidia.com> Update dispatch logic Signed-off-by: Shiyu Li <shili@nvidia.com> Use cpu barrier instead of GPU at init Signed-off-by: Shiyu Li <shili@nvidia.com> Merge dispatch logic fix Signed-off-by: Shiyu Li <shili@nvidia.com> Update the kernel to use GPU-managed buffer Signed-off-by: Shiyu Li <shili@nvidia.com> * Refine Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Clean code Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Fix compile error Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Fix issue Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Clean up Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Simplify AllReduce interface Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Rename Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Fix warning Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Tidy code Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Rename Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Fix compile error Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Refine Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Skip ut for no_fusion Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> * Refine Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> --------- Signed-off-by: Shiyu Li <shili@nvidia.com> Signed-off-by: Zongfei Jing <20381269+zongfeijing@users.noreply.github.com> Co-authored-by: Shiyu Li <shili@nvidia.com>
83 lines
2.4 KiB
CMake
83 lines
2.4 KiB
CMake
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION &
|
|
# AFFILIATES. All rights reserved. SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
|
# use this file except in compliance with the License. You may obtain a copy of
|
|
# the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations under
|
|
# the License.
|
|
include(FetchContent)
|
|
|
|
set(SRCS
|
|
utils/mpiUtils.cpp
|
|
utils/numpyUtils.cpp
|
|
utils/runtimeUtils.cpp
|
|
utils/debugUtils.cu
|
|
utils/speculativeChoicesUtils.cpp
|
|
bufferManager.cpp
|
|
cudaMemPool.cpp
|
|
decodingLayerWorkspace.cpp
|
|
eagleBuffers.cpp
|
|
explicitDraftTokensBuffers.cpp
|
|
lookaheadBuffers.cpp
|
|
layerProfiler.cpp
|
|
loraManager.cpp
|
|
loraUtils.cpp
|
|
loraModule.cpp
|
|
loraCache.cpp
|
|
decodingOutput.cpp
|
|
decoderState.cpp
|
|
gptDecoder.cpp
|
|
gptDecoderBatched.cpp
|
|
gptJsonConfig.cpp
|
|
iBuffer.cpp
|
|
iTensor.cpp
|
|
ipcUtils.cpp
|
|
ipcSocket.cpp
|
|
ipcNvlsMemory.cpp
|
|
mcastDeviceMemory.cpp
|
|
memoryCounters.cpp
|
|
moeLoadBalancer.cpp
|
|
ncclCommunicator.cpp
|
|
promptTuningParams.cpp
|
|
runtimeKernels.cu
|
|
tllmBuffers.cpp
|
|
tllmRuntime.cpp
|
|
tllmStreamReaders.cpp
|
|
tllmLogger.cpp
|
|
workerPool.cpp
|
|
worldConfig.cpp)
|
|
|
|
include_directories(${API_INCLUDE_DIR}/tensorrt_llm/runtime)
|
|
|
|
if(NOT WIN32)
|
|
# additional warnings
|
|
#
|
|
# Ignore overloaded-virtual warning. We intentionally change parameters of
|
|
# some methods in derived class.
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
|
|
if(WARNING_IS_ERROR)
|
|
message(STATUS "Treating warnings as errors in GCC compilation")
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
|
|
endif()
|
|
else() # Windows
|
|
# warning level 4
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
|
endif()
|
|
|
|
add_library(runtime_src OBJECT ${SRCS})
|
|
set_property(TARGET runtime_src PROPERTY POSITION_INDEPENDENT_CODE ON)
|
|
set_property(TARGET runtime_src PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
|
|
|
target_include_directories(runtime_src PRIVATE ${MPI_C_INCLUDE_DIRS})
|
|
|
|
if(ENABLE_MULTI_DEVICE)
|
|
target_link_libraries(runtime_src PUBLIC ${NCCL_LIB})
|
|
endif()
|