easy_llm_setup/8ollama.sh
2025-03-03 13:26:05 +08:00

189 lines
4.3 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# 定义基础变量
SERVICE_NAME="ollama"
SERVICE_DIR="/etc/systemd/system"
OLLAMA_BIN="/usr/local/bin/ollama"
BASE_PORT=11434
USER="ollama"
GROUP="ollama"
# 检查是否以 root 用户运行
if [ "$EUID" -ne 0 ]; then
echo "请以 root 用户运行此脚本。"
exit 1
fi
# 检查 OLLAMA_BIN 是否存在
if [ ! -f "$OLLAMA_BIN" ]; then
echo "错误: $OLLAMA_BIN 不存在,请确认 Ollama 已安装。"
exit 1
fi
# 获取系统中的 GPU 数量
get_gpu_count() {
GPU_COUNT=$(nvidia-smi --list-gpus | wc -l)
if [ "$GPU_COUNT" -eq 0 ]; then
echo "未检测到 GPU请确保 NVIDIA 驱动已安装。"
exit 1
fi
echo "检测到 $GPU_COUNT 个 GPU。"
}
# 创建服务文件
create_services() {
get_gpu_count
for ((i=0; i<GPU_COUNT; i++)); do
SERVICE_FILE="${SERVICE_DIR}/${SERVICE_NAME}${i}.service"
PORT=$((BASE_PORT + i))
echo "正在创建服务文件: $SERVICE_FILE"
cat <<EOF | sudo tee "$SERVICE_FILE" > /dev/null
[Unit]
Description=Ollama Service (GPU $i)
After=network-online.target
[Service]
ExecStart=$OLLAMA_BIN serve
User=$USER
Group=$GROUP
Restart=always
RestartSec=3
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin"
Environment="OLLAMA_NUM_PARALLEL=$GPU_COUNT"
Environment="OLLAMA_HOST=0.0.0.0:$PORT"
Environment="CUDA_VISIBLE_DEVICES=$i"
Environment="OLLAMA_ORIGINS=*"
[Install]
WantedBy=default.target
EOF
echo "服务文件 $SERVICE_FILE 创建成功。"
done
# 重新加载 systemd 配置
echo "重新加载 systemd 配置..."
sudo systemctl daemon-reload
}
# 启动所有服务
start_services() {
get_gpu_count
for ((i=0; i<GPU_COUNT; i++)); do
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
echo "正在启动服务: $SERVICE_NAME_FULL"
sudo systemctl start "$SERVICE_NAME_FULL"
sudo systemctl enable "$SERVICE_NAME_FULL"
done
}
# 停止所有服务
stop_services() {
get_gpu_count
for ((i=0; i<GPU_COUNT; i++)); do
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
echo "正在停止服务: $SERVICE_NAME_FULL"
sudo systemctl stop "$SERVICE_NAME_FULL"
sudo systemctl disable "$SERVICE_NAME_FULL"
done
}
# 重启所有服务
restart_services() {
get_gpu_count
for ((i=0; i<GPU_COUNT; i++)); do
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
echo "正在重启服务: $SERVICE_NAME_FULL"
sudo systemctl restart "$SERVICE_NAME_FULL"
done
}
# 查看服务状态
status_services() {
get_gpu_count
for ((i=0; i<GPU_COUNT; i++)); do
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
echo "检查服务状态: $SERVICE_NAME_FULL"
sudo systemctl status "$SERVICE_NAME_FULL" --no-pager
done
}
# 查看服务日志
logs_services() {
get_gpu_count
for ((i=0; i<GPU_COUNT; i++)); do
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
echo "查看服务日志: $SERVICE_NAME_FULL"
sudo journalctl -u "$SERVICE_NAME_FULL" -n 10 --no-pager
done
}
# 删除所有服务
delete_services() {
get_gpu_count
for ((i=0; i<GPU_COUNT; i++)); do
SERVICE_NAME_FULL="${SERVICE_NAME}${i}.service"
SERVICE_FILE="${SERVICE_DIR}/${SERVICE_NAME}${i}.service"
echo "正在停止并删除服务: $SERVICE_NAME_FULL"
sudo systemctl stop "$SERVICE_NAME_FULL"
sudo systemctl disable "$SERVICE_NAME_FULL"
sudo rm -f "$SERVICE_FILE"
done
# 重新加载 systemd 配置
echo "重新加载 systemd 配置..."
sudo systemctl daemon-reload
}
# 显示帮助信息
show_help() {
echo "用法: $0 {create|start|stop|restart|status|logs|delete|help}"
echo " create - 创建并启动所有服务"
echo " start - 启动所有服务"
echo " stop - 停止所有服务"
echo " restart - 重启所有服务"
echo " status - 查看所有服务状态"
echo " logs - 查看所有服务日志"
echo " delete - 删除所有服务"
echo " help - 显示帮助信息"
}
# 根据传入参数执行相应操作
case "$1" in
create)
create_services
start_services
status_services
;;
start)
start_services
status_services
;;
stop)
stop_services
;;
restart)
restart_services
status_services
;;
status)
status_services
;;
logs)
logs_services
;;
delete)
delete_services
;;
help)
show_help
;;
*)
show_help
exit 1
;;
esac
exit 0