9 Commits

Author SHA1 Message Date
Harry-zklcdc cbc426c180 [Feat] Unified Entrance of Web Services 2025-02-23 22:14:27 +08:00
Harry-zklcdc 1a3a126388 [Fix] 😅 Typo 2025-02-23 18:57:53 +08:00
Harry-zklcdc 1133650632 [Docs] 📝 Add VideoDemo to README.md 2025-02-23 17:20:29 +08:00
Harry-zklcdc 9d561cb63a [Fix] 🐛 Images Key Init 2025-02-19 02:14:29 +08:00
Harry-zklcdc 1f55ecb649 [Fix] 🐛 Cors Error at FrontEnd 2025-02-16 23:53:14 +08:00
Harry-zklcdc 87995f0572 [Fix] 🐛 GPU Num Restore Error at Some Scene 2025-02-16 22:12:28 +08:00
Harry-zklcdc bb7b60352e [Fix] 🐛 DON'T Delete Container or Volume at Patch/Restart Instance Error 2025-02-16 22:11:36 +08:00
Harry-zklcdc ef93c40361 [Fix] 🐛 Fix GPU Device Error at NoCard Instances #14 2025-02-15 22:00:02 +08:00
Harry-zklcdc 716764a86d [Fix] 🐛 From Status Error at Start Action #12 2025-02-12 23:25:53 +08:00
14 changed files with 66 additions and 51 deletions
+4 -1
View File
@@ -49,9 +49,12 @@
> 查看文档 [**>>> 🚧 正在施工中 <<<**]() > 查看文档 [**>>> 🚧 正在施工中 <<<**]()
## 📌 效果展示 ## 📌 效果展示
### 视频演示
[Bilibili - MEGREZ——你的新一代开源GPU管理系统](https://www.bilibili.com/video/BV1EnfWY9ECC/)
### 登录注册 ### 登录注册
| 登录 | 注册 | | 登录 | 注册 |
+5 -6
View File
@@ -70,12 +70,11 @@
:href="'http://' + data.code_server_address" target="_blank" v-tooltip.top="'VSCode Web'" /> :href="'http://' + data.code_server_address" target="_blank" v-tooltip.top="'VSCode Web'" />
<Button v-else icon="pi pi-code" aria-label="Filter" v-tooltip.top="'VSCode Web'" disabled /> <Button v-else icon="pi pi-code" aria-label="Filter" v-tooltip.top="'VSCode Web'" disabled />
<Button v-if="data.status == statusRunning" severity="info" icon="pi pi-inbox" aria-label="Filter" as="a" <Button v-if="data.status == statusRunning" severity="info" icon="pi pi-inbox" aria-label="Filter" as="a"
:href="'http://' + data.jupyter_address + '/lab'" target="_blank" v-tooltip.top="'Jupter Lab'" /> :href="'http://' + data.jupyter_address" target="_blank" v-tooltip.top="'Jupyter Lab'" />
<Button v-else severity="info" icon="pi pi-inbox" aria-label="Filter" v-tooltip.top="'Jupter Lab'" <Button v-else severity="info" icon="pi pi-inbox" aria-label="Filter" v-tooltip.top="'Jupyter Lab'"
disabled /> disabled />
<Button v-if="data.status == statusRunning" severity="contrast" icon="pi pi-chart-bar" as="a" <Button v-if="data.status == statusRunning" severity="contrast" icon="pi pi-chart-bar" as="a"
:href="'http://' + data.grafana_address + '/public-dashboards/2c510f203876465ba76617510ce3e219'" :href="'http://' + data.grafana_address" target="_blank" v-tooltip.top="'监控'" />
target="_blank" v-tooltip.top="'监控'" />
<Button v-else severity="contrast" icon="pi pi-chart-bar" v-tooltip.top="'监控'" disabled /> <Button v-else severity="contrast" icon="pi pi-chart-bar" v-tooltip.top="'监控'" disabled />
<Button v-if="!isAdmin" icon="pi pi-ellipsis-h" severity="secondary" aria-label="Bookmark" <Button v-if="!isAdmin" icon="pi pi-ellipsis-h" severity="secondary" aria-label="Bookmark"
@click="showMenu($event, data)" /> @click="showMenu($event, data)" />
@@ -122,13 +121,13 @@
</Fieldset> </Fieldset>
<Fieldset legend="GPU"> <Fieldset legend="GPU">
<span v-if="instanceDetail.gpu_count !== 0">{{ instanceDetail.gpu_type }} * {{ instanceDetail.gpu_count <span v-if="instanceDetail.gpu_count !== 0">{{ instanceDetail.gpu_type }} * {{ instanceDetail.gpu_count
}}</span> }}</span>
<span v-else>无卡模式</span> <span v-else>无卡模式</span>
</Fieldset> </Fieldset>
<div class="flex flex-col md:flex-row gap-4"> <div class="flex flex-col md:flex-row gap-4">
<Fieldset class="flex flex-wrap gap-2 w-full" legend="CPU"> <Fieldset class="flex flex-wrap gap-2 w-full" legend="CPU">
<span v-if="instanceDetail.gpu_count !== 0">{{ instanceDetail.cpu_count_per_gpu * instanceDetail.gpu_count <span v-if="instanceDetail.gpu_count !== 0">{{ instanceDetail.cpu_count_per_gpu * instanceDetail.gpu_count
}} }}
</span> </span>
<span v-else>1 </span> <span v-else>1 </span>
</Fieldset> </Fieldset>
+3 -4
View File
@@ -74,12 +74,11 @@
:href="'http://' + data.code_server_address" target="_blank" v-tooltip.top="'VSCode Web'" /> :href="'http://' + data.code_server_address" target="_blank" v-tooltip.top="'VSCode Web'" />
<Button v-else icon="pi pi-code" aria-label="Filter" v-tooltip.top="'VSCode Web'" disabled /> <Button v-else icon="pi pi-code" aria-label="Filter" v-tooltip.top="'VSCode Web'" disabled />
<Button v-if="data.status == statusRunning" severity="info" icon="pi pi-inbox" aria-label="Filter" as="a" <Button v-if="data.status == statusRunning" severity="info" icon="pi pi-inbox" aria-label="Filter" as="a"
:href="'http://' + data.jupyter_address + '/lab'" target="_blank" v-tooltip.top="'Jupter Lab'" /> :href="'http://' + data.jupyter_address" target="_blank" v-tooltip.top="'Jupyter Lab'" />
<Button v-else severity="info" icon="pi pi-inbox" aria-label="Filter" v-tooltip.top="'Jupter Lab'" <Button v-else severity="info" icon="pi pi-inbox" aria-label="Filter" v-tooltip.top="'Jupyter Lab'"
disabled /> disabled />
<Button v-if="data.status == statusRunning" severity="contrast" icon="pi pi-chart-bar" as="a" <Button v-if="data.status == statusRunning" severity="contrast" icon="pi pi-chart-bar" as="a"
:href="'http://' + data.grafana_address + '/public-dashboards/2c510f203876465ba76617510ce3e219'" :href="'http://' + data.grafana_address" target="_blank" v-tooltip.top="'监控'" />
target="_blank" v-tooltip.top="'监控'" />
<Button v-else severity="contrast" icon="pi pi-chart-bar" v-tooltip.top="'监控'" disabled /> <Button v-else severity="contrast" icon="pi pi-chart-bar" v-tooltip.top="'监控'" disabled />
<Button icon="pi pi-ellipsis-h" severity="secondary" aria-label="Bookmark" <Button icon="pi pi-ellipsis-h" severity="secondary" aria-label="Bookmark"
@click="showMenu($event, data)" /> @click="showMenu($event, data)" />
@@ -38,6 +38,8 @@ func forceDeleteHandler(ctx iris.Context) {
if instance.FromAction == models.InstanceActionStop || instance.FromAction == models.InstanceActionPause || instance.FromAction == models.InstanceActionRestart { if instance.FromAction == models.InstanceActionStop || instance.FromAction == models.InstanceActionPause || instance.FromAction == models.InstanceActionRestart {
redis.RawDB.IncrBy(ctx, "remain_gpu:server:"+strconv.Itoa(int(instance.ServerID)), int64(instance.GpuCount)) redis.RawDB.IncrBy(ctx, "remain_gpu:server:"+strconv.Itoa(int(instance.ServerID)), int64(instance.GpuCount))
}
if instance.FromAction != models.InstanceActionCreate {
redis.RawDB.IncrBy(ctx, "remain_volume:server:"+strconv.Itoa(int(instance.ServerID)), int64(instance.VolumeSize+30)) redis.RawDB.IncrBy(ctx, "remain_volume:server:"+strconv.Itoa(int(instance.ServerID)), int64(instance.VolumeSize+30))
} }
+12
View File
@@ -0,0 +1,12 @@
package index
import "github.com/kataras/iris/v12"
func cors(ctx iris.Context) {
ctx.Header("Access-Control-Allow-Origin", "*")
ctx.Header("Access-Control-Allow-Methods", "GET")
ctx.Header("Access-Control-Allow-Headers", "Content-Type, Authorization")
ctx.Header("Access-Control-Max-Age", "86400")
ctx.Header("Access-Control-Allow-Credentials", "true")
ctx.Next()
}
+1
View File
@@ -5,6 +5,7 @@ import (
) )
func InitIndex(app *iris.Application) { func InitIndex(app *iris.Application) {
app.Use(cors)
app.HandleDir("/", GetWebFS(), iris.DirOptions{ app.HandleDir("/", GetWebFS(), iris.DirOptions{
IndexName: "/index.html", IndexName: "/index.html",
Compress: true, Compress: true,
+1 -1
View File
@@ -61,7 +61,7 @@ func control(serverID uint, data Data) (err error) {
if err != nil { if err != nil {
ctx := context.Background() ctx := context.Background()
redis.RawDB.IncrBy(ctx, "remain_gpu:server:"+strconv.Itoa(int(serverID)), int64(instance.GpuCount)) redis.RawDB.IncrBy(ctx, "remain_gpu:server:"+strconv.Itoa(int(serverID)), int64(instance.GpuCount))
database.DB.Model(&instance).Update("status", models.InstanceStatusFail).Update("from_action", models.InstanceActionRestart) database.DB.Model(&instance).Update("status", models.InstanceStatusFail).Update("from_action", models.InstanceActionStart)
lc.Error("instance restart error: %v", err) lc.Error("instance restart error: %v", err)
return return
} }
-1
View File
@@ -61,7 +61,6 @@ func modify(serverID uint, data Data) (err error) {
err = instanceController.Patch(&instance, gpuCount, volumeSize, data.CpuOnly) err = instanceController.Patch(&instance, gpuCount, volumeSize, data.CpuOnly)
if err != nil { if err != nil {
ctx := context.Background() ctx := context.Background()
redis.RawDB.IncrBy(ctx, "remain_gpu:server:"+strconv.Itoa(int(serverID)), int64(gpuCount))
redis.RawDB.IncrBy(ctx, "remain_volume:server:"+strconv.Itoa(int(serverID)), int64(volumeSize-oldVolumeSize)) redis.RawDB.IncrBy(ctx, "remain_volume:server:"+strconv.Itoa(int(serverID)), int64(volumeSize-oldVolumeSize))
database.DB.Model(&instance).Update("status", models.InstanceStatusFail).Update("from_action", models.InstanceActionModify) database.DB.Model(&instance).Update("status", models.InstanceStatusFail).Update("from_action", models.InstanceActionModify)
lc.Error("patch instance error: %v", err) lc.Error("patch instance error: %v", err)
+4 -4
View File
@@ -48,7 +48,7 @@ func Create(instance *models.Instances) (containerName, volumeName string, err e
} }
go func() { go func() {
SetJupterPassword(server.IP, server.Port, server.Apikey, containerName, instance.SshPasswd) SetJupyterPassword(server.IP, server.Port, server.Apikey, containerName, instance.SshPasswd)
SetCodeServerPassword(server.IP, server.Port, server.Apikey, containerName, instance.SshPasswd) SetCodeServerPassword(server.IP, server.Port, server.Apikey, containerName, instance.SshPasswd)
}() }()
@@ -64,9 +64,9 @@ func Create(instance *models.Instances) (containerName, volumeName string, err e
instance.SshAddress = server.IP + ":" + portBindings["22"] instance.SshAddress = server.IP + ":" + portBindings["22"]
instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"] instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"]
instance.JupyterAddress = server.IP + ":" + portBindings["8888"] instance.JupyterAddress = server.IP + ":" + portBindings["80"] + "/jupyter"
instance.GrafanaAddress = server.IP + ":" + portBindings["3000"] instance.GrafanaAddress = server.IP + ":" + portBindings["80"] + "/monitor/public-dashboards/2c510f203876465ba76617510ce3e219"
instance.CodeServerAddress = server.IP + ":" + portBindings["8080"] instance.CodeServerAddress = server.IP + ":" + portBindings["80"] + "/code-server/"
instance.Status = 0 instance.Status = 0
result = database.DB.Save(&instance) result = database.DB.Save(&instance)
@@ -40,14 +40,13 @@ func createInstance(ip string, port int, apikey string,
Memory: strconv.Itoa(memorySize) + "GB", Memory: strconv.Itoa(memorySize) + "GB",
ContainerPorts: []string{ ContainerPorts: []string{
"22", // SSH "22", // SSH
"80", // Nginx
"6007", // TensorBoard "6007", // TensorBoard
"8888", // Jupyter Notebook
"3000", // Grafana
"8080", // Code-Server
"34567", // Custom Port "34567", // Custom Port
}, },
Env: []string{ Env: []string{
"NVIDIA_DRIVER_CAPABILITIES=video,compute,utility", "NVIDIA_DRIVER_CAPABILITIES=video,compute,utility",
"NVIDIA_VISIBLE_DEVICES=none",
}, },
} }
+8 -8
View File
@@ -57,9 +57,9 @@ func SetRootPassword(ip string, port int, apikey string,
return nil return nil
} }
func SetJupterPassword(ip string, port int, apikey string, func SetJupyterPassword(ip string, port int, apikey string,
containerName, password string) (err error) { containerName, password string) (err error) {
l.SetFunction("SetJupterPassword") l.SetFunction("SetJupyterPassword")
// Set Jupyter Password // Set Jupyter Password
data := executeReq{ data := executeReq{
@@ -85,8 +85,8 @@ func SetJupterPassword(ip string, port int, apikey string,
c.Do() c.Do()
if c.GetStatusCode() != 200 { if c.GetStatusCode() != 200 {
l.Error("set jupter password error: %d", c.GetStatusCode()) l.Error("set jupyter password error: %d", c.GetStatusCode())
return errors.New("set jupter password request error") return errors.New("set jupyter password request error")
} }
var res resStruct var res resStruct
@@ -97,7 +97,7 @@ func SetJupterPassword(ip string, port int, apikey string,
} }
if res.Code != 200 { if res.Code != 200 {
l.Error("set jupter password code: %d, error: %s", res.Code, res.Msg) l.Error("set jupyter password code: %d, error: %s", res.Code, res.Msg)
return errors.New(res.Msg) return errors.New(res.Msg)
} }
@@ -124,8 +124,8 @@ func SetJupterPassword(ip string, port int, apikey string,
c.Do() c.Do()
if c.GetStatusCode() != 200 { if c.GetStatusCode() != 200 {
l.Error("restart jupter error: %d", c.GetStatusCode()) l.Error("restart jupyter error: %d", c.GetStatusCode())
return errors.New("restart jupter request error") return errors.New("restart jupyter request error")
} }
err = json.Unmarshal(c.GetBody(), &res) err = json.Unmarshal(c.GetBody(), &res)
@@ -135,7 +135,7 @@ func SetJupterPassword(ip string, port int, apikey string,
} }
if res.Code != 200 { if res.Code != 200 {
l.Error("restart jupter code: %d, error: %s", res.Code, res.Msg) l.Error("restart jupyter code: %d, error: %s", res.Code, res.Msg)
return errors.New(res.Msg) return errors.New(res.Msg)
} }
+8 -15
View File
@@ -87,16 +87,12 @@ func Patch(instance *models.Instances, gpuCount, volumeSize int, cpuOnly bool) (
err = SetRootPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) err = SetRootPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
if err != nil { if err != nil {
deleteInstance(server.IP, server.Port, server.Apikey, instance.ContainerName)
if instance.VolumeName != "" {
deleteVolume(server.IP, server.Port, server.Apikey, instance.VolumeName, false)
}
l.Error("set root password error: %v", err) l.Error("set root password error: %v", err)
return err return err
} }
go func() { go func() {
SetJupterPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) SetJupyterPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
SetCodeServerPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) SetCodeServerPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
}() }()
@@ -108,8 +104,9 @@ func Patch(instance *models.Instances, gpuCount, volumeSize int, cpuOnly bool) (
instance.SshAddress = server.IP + ":" + portBindings["22"] instance.SshAddress = server.IP + ":" + portBindings["22"]
instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"] instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"]
instance.JupyterAddress = server.IP + ":" + portBindings["8888"] instance.JupyterAddress = server.IP + ":" + portBindings["80"] + "/jupyter"
instance.GrafanaAddress = server.IP + ":" + portBindings["3000"] instance.GrafanaAddress = server.IP + ":" + portBindings["80"] + "/monitor/public-dashboards/2c510f203876465ba76617510ce3e219"
instance.CodeServerAddress = server.IP + ":" + portBindings["80"] + "/code-server/"
instance.CpuOnly = true instance.CpuOnly = true
instance.GpuCount = 0 instance.GpuCount = 0
@@ -135,16 +132,12 @@ func Patch(instance *models.Instances, gpuCount, volumeSize int, cpuOnly bool) (
err = SetRootPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) err = SetRootPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
if err != nil { if err != nil {
deleteInstance(server.IP, server.Port, server.Apikey, instance.ContainerName)
if instance.VolumeName != "" {
deleteVolume(server.IP, server.Port, server.Apikey, instance.VolumeName, false)
}
l.Error("set root password error: %v", err) l.Error("set root password error: %v", err)
return err return err
} }
go func() { go func() {
SetJupterPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) SetJupyterPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
SetCodeServerPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) SetCodeServerPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
}() }()
@@ -156,9 +149,9 @@ func Patch(instance *models.Instances, gpuCount, volumeSize int, cpuOnly bool) (
instance.SshAddress = server.IP + ":" + portBindings["22"] instance.SshAddress = server.IP + ":" + portBindings["22"]
instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"] instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"]
instance.JupyterAddress = server.IP + ":" + portBindings["8888"] instance.JupyterAddress = server.IP + ":" + portBindings["80"] + "/jupyter"
instance.GrafanaAddress = server.IP + ":" + portBindings["3000"] instance.GrafanaAddress = server.IP + ":" + portBindings["80"] + "/monitor/public-dashboards/2c510f203876465ba76617510ce3e219"
instance.CodeServerAddress = server.IP + ":" + portBindings["8080"] instance.CodeServerAddress = server.IP + ":" + portBindings["80"] + "/code-server/"
instance.CpuOnly = false instance.CpuOnly = false
instance.GpuCount = gpuCount instance.GpuCount = gpuCount
+4 -8
View File
@@ -40,16 +40,12 @@ func Restart(instance *models.Instances) (err error) {
err = SetRootPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) err = SetRootPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
if err != nil { if err != nil {
deleteInstance(server.IP, server.Port, server.Apikey, instance.ContainerName)
if instance.VolumeName != "" {
deleteVolume(server.IP, server.Port, server.Apikey, instance.VolumeName, false)
}
l.Error("set root password error: %v", err) l.Error("set root password error: %v", err)
return err return err
} }
go func() { go func() {
SetJupterPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) SetJupyterPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
SetCodeServerPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd) SetCodeServerPassword(server.IP, server.Port, server.Apikey, instance.ContainerName, instance.SshPasswd)
}() }()
@@ -61,9 +57,9 @@ func Restart(instance *models.Instances) (err error) {
instance.SshAddress = server.IP + ":" + portBindings["22"] instance.SshAddress = server.IP + ":" + portBindings["22"]
instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"] instance.TensorBoardAddress = server.IP + ":" + portBindings["6007"]
instance.JupyterAddress = server.IP + ":" + portBindings["8888"] instance.JupyterAddress = server.IP + ":" + portBindings["80"] + "/jupyter"
instance.GrafanaAddress = server.IP + ":" + portBindings["3000"] instance.GrafanaAddress = server.IP + ":" + portBindings["80"] + "/monitor/public-dashboards/2c510f203876465ba76617510ce3e219"
instance.CodeServerAddress = server.IP + ":" + portBindings["8080"] instance.CodeServerAddress = server.IP + ":" + portBindings["80"] + "/code-server/"
instance.Status = models.InstanceStatusRunning instance.Status = models.InstanceStatusRunning
result = database.DB.Save(&instance) result = database.DB.Save(&instance)
+12
View File
@@ -7,6 +7,8 @@ import (
"megrez/services/database" "megrez/services/database"
) )
const imagesKey = "images"
func systemInit() (err error) { func systemInit() (err error) {
l.SetFunction("systemInit") l.SetFunction("systemInit")
@@ -35,6 +37,16 @@ func systemInit() (err error) {
return return
} }
st := models.System{
Key: imagesKey,
Value: "{}",
}
result = database.DB.Create(&st)
if result.Error != nil {
l.Error("Create system failed, Error: %v", result.Error)
return
}
l.Info("System init success") l.Info("System init success")
return return