TensorRT-LLMs/jenkins/scripts/open_search_query.py
Yiqing Yan 8c88454fa5
[TRTLLM-7101][infra] Reuse passed tests (#6894)
Signed-off-by: Yiqing Yan <yiqingy@nvidia.com>
Co-authored-by: Yanchao Lu <yanchaol@nvidia.com>
2025-12-03 10:07:23 +08:00

125 lines
4.1 KiB
Python

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
# open_search_query.py
#
# This module provides functions to query the OpenSearch database for passed
# test results from previous pipeline runs. It retrieves test names that have
# passed for a given commit ID and stage name, which can be reused to skip
# redundant test execution in subsequent runs.
#
# Main functionality:
# - queryJobEvents: Queries OpenSearch for job events with pagination support
# - getPassedTestList: Retrieves and deduplicates passed test names
# - writeTestListToFile: Writes test list to a file for further processing
#
# =============================================================================
import argparse
import json
import os
import sys
from open_search_db import OpenSearchDB
def queryJobEvents(commitID="", stageName="", onlySuccess=True):
"""Query OpenSearch database for job events with pagination.
Args:
commitID: Git commit SHA to filter by (optional)
stageName: Stage name to filter by (optional)
onlySuccess: If True, only return PASSED tests (default: True)
Returns:
List of all matching test result records
"""
mustConditions = []
if commitID:
mustConditions.append({"term": {"s_trigger_mr_commit": commitID}})
if stageName:
mustConditions.append({"term": {"s_stage_name": stageName}})
if onlySuccess:
mustConditions.append({"term": {"s_status": "PASSED"}})
all_results = []
page_size = 1000
from_index = 0
while True:
requestBody = {
"query": {"bool": {"must": mustConditions}},
"_source": [
"s_job_name",
"s_status",
"s_build_id",
"s_turtle_name",
"s_test_name",
"s_gpu_type",
],
"size": page_size,
"from": from_index,
}
formattedRequestBody = json.dumps(requestBody)
response = OpenSearchDB.queryFromOpenSearchDB(
formattedRequestBody, "swdl-trtllm-infra-ci-prod-test_info"
)
if response is None:
print("Failed to query from OpenSearchDB")
break
data = response.json()
hits = data["hits"]["hits"]
if not hits:
break
all_results.extend(hits)
from_index += page_size
print(f"Fetched {len(all_results)} records...")
return all_results
def writeTestListToFile(testList, fileName):
os.makedirs(os.path.dirname(fileName), exist_ok=True)
with open(fileName, "w") as f:
for test in testList:
f.write(test + "\n")
def getPassedTestList(commitID, stageName, outputFile):
hits = queryJobEvents(commitID=commitID, stageName=stageName, onlySuccess=True)
# Use set to automatically remove duplicates
testSet = set()
for hit in hits:
testSet.add(hit["_source"]["s_turtle_name"])
testList = list(testSet)
writeTestListToFile(testList, outputFile)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--commit-id", required=True, help="Commit ID")
parser.add_argument("--stage-name", required=True, help="Stage Name")
parser.add_argument("--output-file", required=True, help="Output File")
args = parser.parse_args(sys.argv[1:])
getPassedTestList(
commitID=args.commit_id, stageName=args.stage_name, outputFile=args.output_file
)