189 lines
4.3 KiB
Bash
189 lines
4.3 KiB
Bash
#!/bin/bash
|
||
|
||
# 定义基础变量
|
||
SERVICE_NAME="ollama"
|
||
SERVICE_DIR="/etc/systemd/system"
|
||
OLLAMA_BIN="/usr/local/bin/ollama"
|
||
BASE_PORT=11434
|
||
USER="ollama"
|
||
GROUP="ollama"
|
||
|
||
# 检查是否以 root 用户运行
|
||
if [ "$EUID" -ne 0 ]; then
|
||
echo "请以 root 用户运行此脚本。"
|
||
exit 1
|
||
fi
|
||
|
||
# 检查 OLLAMA_BIN 是否存在
|
||
if [ ! -f "$OLLAMA_BIN" ]; then
|
||
echo "错误: $OLLAMA_BIN 不存在,请确认 Ollama 已安装。"
|
||
exit 1
|
||
fi
|
||
|
||
# 获取系统中的 GPU 数量
|
||
get_gpu_count() {
|
||
GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
|
||
if [ "$GPU_COUNT" -eq 0 ]; then
|
||
echo "未检测到 GPU,请确保 NVIDIA 驱动已安装。"
|
||
exit 1
|
||
fi
|
||
echo "检测到 $GPU_COUNT 个 GPU。"
|
||
}
|
||
|
||
# 创建服务文件
|
||
create_services() {
|
||
get_gpu_count
|
||
for ((i=0; i<GPU_COUNT; i++)); do
|
||
SERVICE_FILE="${SERVICE_DIR}/${SERVICE_NAME}${i}.service"
|
||
PORT=$((BASE_PORT + i))
|
||
|
||
echo "正在创建服务文件: $SERVICE_FILE"
|
||
|
||
cat <<EOF | sudo tee "$SERVICE_FILE" > /dev/null
|
||
[Unit]
|
||
Description=Ollama Service (GPU $i)
|
||
After=network-online.target
|
||
|
||
[Service]
|
||
ExecStart=$OLLAMA_BIN serve
|
||
User=$USER
|
||
Group=$GROUP
|
||
Restart=always
|
||
RestartSec=3
|
||
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin"
|
||
Environment="OLLAMA_NUM_PARALLEL=$GPU_COUNT"
|
||
Environment="OLLAMA_HOST=0.0.0.0:$PORT"
|
||
Environment="CUDA_VISIBLE_DEVICES=$i"
|
||
Environment="OLLAMA_ORIGINS=*"
|
||
|
||
[Install]
|
||
WantedBy=default.target
|
||
EOF
|
||
|
||
echo "服务文件 $SERVICE_FILE 创建成功。"
|
||
done
|
||
|
||
# 重新加载 systemd 配置
|
||
echo "重新加载 systemd 配置..."
|
||
sudo systemctl daemon-reload
|
||
}
|
||
|
||
# 启动所有服务
|
||
start_services() {
|
||
get_gpu_count
|
||
for ((i=0; i<GPU_COUNT; i++)); do
|
||
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
|
||
echo "正在启动服务: $SERVICE_NAME_FULL"
|
||
sudo systemctl start "$SERVICE_NAME_FULL"
|
||
sudo systemctl enable "$SERVICE_NAME_FULL"
|
||
done
|
||
}
|
||
|
||
# 停止所有服务
|
||
stop_services() {
|
||
get_gpu_count
|
||
for ((i=0; i<GPU_COUNT; i++)); do
|
||
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
|
||
echo "正在停止服务: $SERVICE_NAME_FULL"
|
||
sudo systemctl stop "$SERVICE_NAME_FULL"
|
||
sudo systemctl disable "$SERVICE_NAME_FULL"
|
||
done
|
||
}
|
||
|
||
# 重启所有服务
|
||
restart_services() {
|
||
get_gpu_count
|
||
for ((i=0; i<GPU_COUNT; i++)); do
|
||
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
|
||
echo "正在重启服务: $SERVICE_NAME_FULL"
|
||
sudo systemctl restart "$SERVICE_NAME_FULL"
|
||
done
|
||
}
|
||
|
||
# 查看服务状态
|
||
status_services() {
|
||
get_gpu_count
|
||
for ((i=0; i<GPU_COUNT; i++)); do
|
||
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
|
||
echo "检查服务状态: $SERVICE_NAME_FULL"
|
||
sudo systemctl status "$SERVICE_NAME_FULL" --no-pager
|
||
done
|
||
}
|
||
|
||
# 查看服务日志
|
||
logs_services() {
|
||
get_gpu_count
|
||
for ((i=0; i<GPU_COUNT; i++)); do
|
||
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
|
||
echo "查看服务日志: $SERVICE_NAME_FULL"
|
||
sudo journalctl -u "$SERVICE_NAME_FULL" -n 10 --no-pager
|
||
done
|
||
}
|
||
|
||
# 删除所有服务
|
||
delete_services() {
|
||
get_gpu_count
|
||
for ((i=0; i<GPU_COUNT; i++)); do
|
||
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
|
||
SERVICE_FILE="${SERVICE_DIR}/${SERVICE_NAME}${i}.service"
|
||
echo "正在停止并删除服务: $SERVICE_NAME_FULL"
|
||
sudo systemctl stop "$SERVICE_NAME_FULL"
|
||
sudo systemctl disable "$SERVICE_NAME_FULL"
|
||
sudo rm -f "$SERVICE_FILE"
|
||
done
|
||
|
||
# 重新加载 systemd 配置
|
||
echo "重新加载 systemd 配置..."
|
||
sudo systemctl daemon-reload
|
||
}
|
||
|
||
# 显示帮助信息
|
||
show_help() {
|
||
echo "用法: $0 {create|start|stop|restart|status|logs|delete|help}"
|
||
echo " create - 创建并启动所有服务"
|
||
echo " start - 启动所有服务"
|
||
echo " stop - 停止所有服务"
|
||
echo " restart - 重启所有服务"
|
||
echo " status - 查看所有服务状态"
|
||
echo " logs - 查看所有服务日志"
|
||
echo " delete - 删除所有服务"
|
||
echo " help - 显示帮助信息"
|
||
}
|
||
|
||
# 根据传入参数执行相应操作
|
||
case "$1" in
|
||
create)
|
||
create_services
|
||
start_services
|
||
status_services
|
||
;;
|
||
start)
|
||
start_services
|
||
status_services
|
||
;;
|
||
stop)
|
||
stop_services
|
||
;;
|
||
restart)
|
||
restart_services
|
||
status_services
|
||
;;
|
||
status)
|
||
status_services
|
||
;;
|
||
logs)
|
||
logs_services
|
||
;;
|
||
delete)
|
||
delete_services
|
||
;;
|
||
help)
|
||
show_help
|
||
;;
|
||
*)
|
||
show_help
|
||
exit 1
|
||
;;
|
||
esac
|
||
|
||
exit 0 |