mirror of
https://github.com/datawhalechina/llms-from-scratch-cn.git
synced 2026-05-03 13:02:35 +00:00
@@ -1,10 +1,10 @@
|
|||||||
# Appendix A: Introduction to PyTorch (Part 3)
|
# 附录A :PyTorch的介绍(第三部分)
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from torch.utils.data import Dataset, DataLoader
|
from torch.utils.data import Dataset, DataLoader
|
||||||
|
|
||||||
# NEW imports:
|
# 导入新的库
|
||||||
import os
|
import os
|
||||||
import torch.multiprocessing as mp
|
import torch.multiprocessing as mp
|
||||||
from torch.utils.data.distributed import DistributedSampler
|
from torch.utils.data.distributed import DistributedSampler
|
||||||
@@ -12,22 +12,23 @@ from torch.nn.parallel import DistributedDataParallel as DDP
|
|||||||
from torch.distributed import init_process_group, destroy_process_group
|
from torch.distributed import init_process_group, destroy_process_group
|
||||||
|
|
||||||
|
|
||||||
# NEW: function to initialize a distributed process group (1 process / GPU)
|
# 创建一个新的函数用于初始化一个分布式进程(每个GPU一个进程)
|
||||||
# this allows communication among processes
|
# 该函数允许进程之间的通信
|
||||||
def ddp_setup(rank, world_size):
|
def ddp_setup(rank, world_size):
|
||||||
"""
|
"""
|
||||||
Arguments:
|
提示:
|
||||||
rank: a unique process ID
|
rank:特定的进程编号(进程ID)
|
||||||
world_size: total number of processes in the group
|
world_size:组内的进程总数
|
||||||
"""
|
"""
|
||||||
# rank of machine running rank:0 process
|
|
||||||
# here, we assume all GPUs are on the same machine
|
# 正在运行的机器编号 ID:进程0
|
||||||
|
# 这里的前提是假设所有的GPU在同一台机器上
|
||||||
os.environ["MASTER_ADDR"] = "localhost"
|
os.environ["MASTER_ADDR"] = "localhost"
|
||||||
# any free port on the machine
|
# 机器上任意的空闲端口号
|
||||||
os.environ["MASTER_PORT"] = "12345"
|
os.environ["MASTER_PORT"] = "12345"
|
||||||
|
|
||||||
# initialize process group
|
# 初始化进程
|
||||||
# Windows users may have to use "gloo" instead of "nccl" as backend
|
# Windows 用户使用"gloo"来替代下面代码中的"nccl"
|
||||||
# nccl: NVIDIA Collective Communication Library
|
# nccl: NVIDIA Collective Communication Library
|
||||||
init_process_group(backend="nccl", rank=rank, world_size=world_size)
|
init_process_group(backend="nccl", rank=rank, world_size=world_size)
|
||||||
torch.cuda.set_device(rank)
|
torch.cuda.set_device(rank)
|
||||||
@@ -52,15 +53,15 @@ class NeuralNetwork(torch.nn.Module):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
self.layers = torch.nn.Sequential(
|
self.layers = torch.nn.Sequential(
|
||||||
# 1st hidden layer
|
# 第一个隐藏层
|
||||||
torch.nn.Linear(num_inputs, 30),
|
torch.nn.Linear(num_inputs, 30),
|
||||||
torch.nn.ReLU(),
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
# 2nd hidden layer
|
# 第二个隐藏层
|
||||||
torch.nn.Linear(30, 20),
|
torch.nn.Linear(30, 20),
|
||||||
torch.nn.ReLU(),
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
# output layer
|
# 输出层
|
||||||
torch.nn.Linear(20, num_outputs),
|
torch.nn.Linear(20, num_outputs),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -91,11 +92,11 @@ def prepare_dataset():
|
|||||||
train_loader = DataLoader(
|
train_loader = DataLoader(
|
||||||
dataset=train_ds,
|
dataset=train_ds,
|
||||||
batch_size=2,
|
batch_size=2,
|
||||||
shuffle=False, # NEW: False because of DistributedSampler below
|
shuffle=False, # 这里设置为False
|
||||||
pin_memory=True,
|
pin_memory=True,
|
||||||
drop_last=True,
|
drop_last=True,
|
||||||
# NEW: chunk batches across GPUs without overlapping samples:
|
# 在多个GPU上划分批次,确保批次之间不重叠样本
|
||||||
sampler=DistributedSampler(train_ds) # NEW
|
sampler=DistributedSampler(train_ds)
|
||||||
)
|
)
|
||||||
test_loader = DataLoader(
|
test_loader = DataLoader(
|
||||||
dataset=test_ds,
|
dataset=test_ds,
|
||||||
@@ -105,33 +106,33 @@ def prepare_dataset():
|
|||||||
return train_loader, test_loader
|
return train_loader, test_loader
|
||||||
|
|
||||||
|
|
||||||
# NEW: wrapper
|
# 包装器
|
||||||
def main(rank, world_size, num_epochs):
|
def main(rank, world_size, num_epochs):
|
||||||
|
|
||||||
ddp_setup(rank, world_size) # NEW: initialize process groups
|
ddp_setup(rank, world_size) #
|
||||||
|
|
||||||
train_loader, test_loader = prepare_dataset()
|
train_loader, test_loader = prepare_dataset()
|
||||||
model = NeuralNetwork(num_inputs=2, num_outputs=2)
|
model = NeuralNetwork(num_inputs=2, num_outputs=2)
|
||||||
model.to(rank)
|
model.to(rank)
|
||||||
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)
|
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)
|
||||||
|
|
||||||
model = DDP(model, device_ids=[rank]) # NEW: wrap model with DDP
|
model = DDP(model, device_ids=[rank]) # 使用分布式数据并行(DDP)将模型进行包装
|
||||||
# the core model is now accessible as model.module
|
# 现在核心模型可以通过 model.module 访问
|
||||||
|
|
||||||
for epoch in range(num_epochs):
|
for epoch in range(num_epochs):
|
||||||
|
|
||||||
model.train()
|
model.train()
|
||||||
for features, labels in enumerate(train_loader):
|
for features, labels in enumerate(train_loader):
|
||||||
|
|
||||||
features, labels = features.to(rank), labels.to(rank) # New: use rank
|
features, labels = features.to(rank), labels.to(rank)
|
||||||
logits = model(features)
|
logits = model(features)
|
||||||
loss = F.cross_entropy(logits, labels) # Loss function
|
loss = F.cross_entropy(logits, labels) # 损失函数
|
||||||
|
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
loss.backward()
|
loss.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
|
|
||||||
### LOGGING
|
### 日志
|
||||||
print(f"[GPU{rank}] Epoch: {epoch+1:03d}/{num_epochs:03d}"
|
print(f"[GPU{rank}] Epoch: {epoch+1:03d}/{num_epochs:03d}"
|
||||||
f" | Batchsize {labels.shape[0]:03d}"
|
f" | Batchsize {labels.shape[0]:03d}"
|
||||||
f" | Train/Val Loss: {loss:.2f}")
|
f" | Train/Val Loss: {loss:.2f}")
|
||||||
@@ -142,7 +143,7 @@ def main(rank, world_size, num_epochs):
|
|||||||
test_acc = compute_accuracy(model, test_loader, device=rank)
|
test_acc = compute_accuracy(model, test_loader, device=rank)
|
||||||
print(f"[GPU{rank}] Test accuracy", test_acc)
|
print(f"[GPU{rank}] Test accuracy", test_acc)
|
||||||
|
|
||||||
destroy_process_group() # NEW: cleanly exit distributed mode
|
destroy_process_group() # 清理退出分布式模式
|
||||||
|
|
||||||
|
|
||||||
def compute_accuracy(model, dataloader, device):
|
def compute_accuracy(model, dataloader, device):
|
||||||
@@ -169,10 +170,10 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
torch.manual_seed(123)
|
torch.manual_seed(123)
|
||||||
|
|
||||||
# NEW: spawn new processes
|
# 新建进程
|
||||||
# note that spawn will automatically pass the rank
|
# 请注意,spawn会自动传递排名
|
||||||
num_epochs = 3
|
num_epochs = 3
|
||||||
world_size = torch.cuda.device_count()
|
world_size = torch.cuda.device_count()
|
||||||
mp.spawn(main, args=(world_size, num_epochs), nprocs=world_size)
|
mp.spawn(main, args=(world_size, num_epochs), nprocs=world_size)
|
||||||
# nprocs=world_size spawns one process per GPU
|
# nprocs=world_size 会为每个GPU生成一个进程
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
"id": "ca7fc8a0-280c-4979-b0c7-fc3a99b3b785",
|
"id": "ca7fc8a0-280c-4979-b0c7-fc3a99b3b785",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"# Appendix A: Introduction to PyTorch (Part 1)"
|
"# 附件A:PyTorch的介绍(第一部分)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -13,12 +13,12 @@
|
|||||||
"id": "f5bf13d2-8fc2-483e-88cc-6b4310221e68",
|
"id": "f5bf13d2-8fc2-483e-88cc-6b4310221e68",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.1 What is PyTorch"
|
"## A.1 什么是PyTorch"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": null,
|
||||||
"id": "96ee5660-5327-48e2-9104-a882b3b2afa4",
|
"id": "96ee5660-5327-48e2-9104-a882b3b2afa4",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -32,13 +32,13 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import torch\n",
|
"import torch\n",
|
||||||
"\n",
|
"# 显示PyTorch的版本\n",
|
||||||
"print(torch.__version__)"
|
"print(torch.__version__)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"id": "f73ad4e4-7ec6-4467-a9e9-0cdf6d195264",
|
"id": "f73ad4e4-7ec6-4467-a9e9-0cdf6d195264",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@@ -51,6 +51,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 显示PyTorch是否是GPU版本,False表示CPU版本,True表示GPU版本\n",
|
||||||
"print(torch.cuda.is_available())"
|
"print(torch.cuda.is_available())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -59,7 +60,7 @@
|
|||||||
"id": "2100cf2e-7459-4ab3-92a8-43e86ab35a9b",
|
"id": "2100cf2e-7459-4ab3-92a8-43e86ab35a9b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.2 Understanding tensors"
|
"## A.2 向量"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -67,7 +68,7 @@
|
|||||||
"id": "26d7f785-e048-42bc-9182-a556af6bb7f4",
|
"id": "26d7f785-e048-42bc-9182-a556af6bb7f4",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### A.2.1 Scalars, vectors, matrices, and tensors"
|
"### A.2.1 标量、向量、矩阵和张量\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -80,22 +81,22 @@
|
|||||||
"import torch\n",
|
"import torch\n",
|
||||||
"import numpy as np\n",
|
"import numpy as np\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a 0D tensor (scalar) from a Python integer\n",
|
"# 用Python整数创建一个0维张量\n",
|
||||||
"tensor0d = torch.tensor(1)\n",
|
"tensor0d = torch.tensor(1)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a 1D tensor (vector) from a Python list\n",
|
"# 用Python列表创建一个1维张量(向量)\n",
|
||||||
"tensor1d = torch.tensor([1, 2, 3])\n",
|
"tensor1d = torch.tensor([1, 2, 3])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a 2D tensor from a nested Python list\n",
|
"# 用Python列表创建一个2维张量(向量)\n",
|
||||||
"tensor2d = torch.tensor([[1, 2], [3, 4]])\n",
|
"tensor2d = torch.tensor([[1, 2], [3, 4]])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a 3D tensor from a nested Python list\n",
|
"# 用嵌套的Python列表创建一个3维张量\n",
|
||||||
"tensor3d_1 = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
|
"tensor3d_1 = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# create a 3D tensor from NumPy array\n",
|
"# 从NumPy数组创建一个3维张量\n",
|
||||||
"ary3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
|
"ary3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
|
||||||
"tensor3d_2 = torch.tensor(ary3d) # Copies NumPy array\n",
|
"tensor3d_2 = torch.tensor(ary3d) # 复制NumPy数组\n",
|
||||||
"tensor3d_3 = torch.from_numpy(ary3d) # Shares memory with NumPy array"
|
"tensor3d_3 = torch.from_numpy(ary3d) # 与NumPy数组共享内存"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -118,7 +119,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"ary3d[0, 0, 0] = 999\n",
|
"ary3d[0, 0, 0] = 999\n",
|
||||||
"print(tensor3d_2) # remains unchanged"
|
"print(tensor3d_2) # 保持不变"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -140,7 +141,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"print(tensor3d_3) # changes because of memory sharing"
|
"print(tensor3d_3) # 由于内存共享需要改变"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -148,7 +149,7 @@
|
|||||||
"id": "63dec48d-2b60-41a2-ac06-fef7e718605a",
|
"id": "63dec48d-2b60-41a2-ac06-fef7e718605a",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### A.2.2 Tensor data types"
|
"### A.2.2 向量的数据类型"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -213,7 +214,7 @@
|
|||||||
"id": "2020deb5-aa02-4524-b311-c010f4ad27ff",
|
"id": "2020deb5-aa02-4524-b311-c010f4ad27ff",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### A.2.3 Common PyTorch tensor operations"
|
"### A.2.3 PyTorch中常见的张量操作"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -257,7 +258,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"tensor2d.shape"
|
"tensor2d.shape # 张量形状"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -280,7 +281,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"tensor2d.reshape(3, 2)"
|
"tensor2d.reshape(3, 2) # 修改形状"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -303,7 +304,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"tensor2d.view(3, 2)"
|
"tensor2d.view(3, 2) # 查看张量"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -326,7 +327,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"tensor2d.T"
|
"tensor2d.T # 转置张量"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -348,7 +349,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"tensor2d.matmul(tensor2d.T)"
|
"tensor2d.matmul(tensor2d.T) # 张量乘法:tensor2d与其转置相乘"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -370,7 +371,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"tensor2d @ tensor2d.T"
|
"tensor2d @ tensor2d.T # 张量乘法的另一种实现方式:tensor2d与其转置相乘"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -378,7 +379,7 @@
|
|||||||
"id": "4c15bdeb-78e2-4870-8a4f-a9f591666f38",
|
"id": "4c15bdeb-78e2-4870-8a4f-a9f591666f38",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.3 Seeing models as computation graphs"
|
"## A.3 把模型作为计算图"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -398,13 +399,13 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"import torch.nn.functional as F\n",
|
"import torch.nn.functional as F\n",
|
||||||
"\n",
|
"\n",
|
||||||
"y = torch.tensor([1.0]) # true label\n",
|
"y = torch.tensor([1.0]) # 真实样本\n",
|
||||||
"x1 = torch.tensor([1.1]) # input feature\n",
|
"x1 = torch.tensor([1.1]) # 输入特征\n",
|
||||||
"w1 = torch.tensor([2.2]) # weight parameter\n",
|
"w1 = torch.tensor([2.2]) # 权重变量\n",
|
||||||
"b = torch.tensor([0.0]) # bias unit\n",
|
"b = torch.tensor([0.0]) # 偏置单元\n",
|
||||||
"\n",
|
"\n",
|
||||||
"z = x1 * w1 + b # net input\n",
|
"z = x1 * w1 + b # 网络输入\n",
|
||||||
"a = torch.sigmoid(z) # activation & output\n",
|
"a = torch.sigmoid(z) # 激活函数 & 输出\n",
|
||||||
"\n",
|
"\n",
|
||||||
"loss = F.binary_cross_entropy(a, y)\n",
|
"loss = F.binary_cross_entropy(a, y)\n",
|
||||||
"print(loss)"
|
"print(loss)"
|
||||||
@@ -415,7 +416,7 @@
|
|||||||
"id": "f9424f26-2bac-47e7-b834-92ece802247c",
|
"id": "f9424f26-2bac-47e7-b834-92ece802247c",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.4 Automatic differentiation made easy"
|
"## A.4 自动求导"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -470,7 +471,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"loss.backward()\n",
|
"loss.backward()# 反向传播\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print(w1.grad)\n",
|
"print(w1.grad)\n",
|
||||||
"print(b.grad)"
|
"print(b.grad)"
|
||||||
@@ -481,7 +482,7 @@
|
|||||||
"id": "f53bdd7d-44e6-40ab-8a5a-4eef74ef35dc",
|
"id": "f53bdd7d-44e6-40ab-8a5a-4eef74ef35dc",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.5 Implementing multilayer neural networks"
|
"## A.5 多层神经网络的实现"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -497,15 +498,15 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" self.layers = torch.nn.Sequential(\n",
|
" self.layers = torch.nn.Sequential(\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # 1st hidden layer\n",
|
" # 第一个隐藏层\n",
|
||||||
" torch.nn.Linear(num_inputs, 30),\n",
|
" torch.nn.Linear(num_inputs, 30),\n",
|
||||||
" torch.nn.ReLU(),\n",
|
" torch.nn.ReLU(),\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # 2nd hidden layer\n",
|
" # 第二个隐藏层\n",
|
||||||
" torch.nn.Linear(30, 20),\n",
|
" torch.nn.Linear(30, 20),\n",
|
||||||
" torch.nn.ReLU(),\n",
|
" torch.nn.ReLU(),\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # output layer\n",
|
" # 输出层\n",
|
||||||
" torch.nn.Linear(20, num_outputs),\n",
|
" torch.nn.Linear(20, num_outputs),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -566,7 +567,7 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
|
"num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
|
||||||
"print(\"Total number of trainable model parameters:\", num_params)"
|
"print(\"Total number of trainable model parameters:\", num_params)# 打印训练模型的参数"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -592,7 +593,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"print(model.layers[0].weight)"
|
"print(model.layers[0].weight) # 打印神经网络模型的第一层的权重"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -618,9 +619,13 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 设置随机数种子,以确保可复现性\n",
|
||||||
"torch.manual_seed(123)\n",
|
"torch.manual_seed(123)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 假设 NeuralNetwork 是一个神经网络类,且其构造函数接受两个参数,分别为输入特征的维度和输出特征的维度\n",
|
||||||
"model = NeuralNetwork(50, 3)\n",
|
"model = NeuralNetwork(50, 3)\n",
|
||||||
|
"\n",
|
||||||
|
"# 打印神经网络模型的第一层的权重\n",
|
||||||
"print(model.layers[0].weight)"
|
"print(model.layers[0].weight)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -639,6 +644,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 打印神经网络模型的第一层权重的形状\n",
|
||||||
"print(model.layers[0].weight.shape)"
|
"print(model.layers[0].weight.shape)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -657,10 +663,16 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 设置随机数种子,以确保可复现性\n",
|
||||||
"torch.manual_seed(123)\n",
|
"torch.manual_seed(123)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 模型输入特征的维度为 50\n",
|
||||||
"X = torch.rand((1, 50))\n",
|
"X = torch.rand((1, 50))\n",
|
||||||
|
"\n",
|
||||||
|
"# 使用模型进行前向传播计算输出\n",
|
||||||
"out = model(X)\n",
|
"out = model(X)\n",
|
||||||
|
"\n",
|
||||||
|
"# 打印输出结果\n",
|
||||||
"print(out)"
|
"print(out)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -679,6 +691,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 使用 torch.no_grad() 上下文管理器,以便在推断时不计算梯度\n",
|
||||||
"with torch.no_grad():\n",
|
"with torch.no_grad():\n",
|
||||||
" out = model(X)\n",
|
" out = model(X)\n",
|
||||||
"print(out)"
|
"print(out)"
|
||||||
@@ -709,7 +722,7 @@
|
|||||||
"id": "19858180-0f26-43a8-b2c3-7ed40abf9f85",
|
"id": "19858180-0f26-43a8-b2c3-7ed40abf9f85",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.6 Setting up efficient data loaders"
|
"## A.6 建立高效的数据加载器"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -719,6 +732,7 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 定义输入特征张量 X_train\n",
|
||||||
"X_train = torch.tensor([\n",
|
"X_train = torch.tensor([\n",
|
||||||
" [-1.2, 3.1],\n",
|
" [-1.2, 3.1],\n",
|
||||||
" [-0.9, 2.9],\n",
|
" [-0.9, 2.9],\n",
|
||||||
@@ -727,6 +741,7 @@
|
|||||||
" [2.7, -1.5]\n",
|
" [2.7, -1.5]\n",
|
||||||
"])\n",
|
"])\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 定义对应的标签张量 y_train\n",
|
||||||
"y_train = torch.tensor([0, 0, 0, 1, 1])"
|
"y_train = torch.tensor([0, 0, 0, 1, 1])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -756,18 +771,19 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"class ToyDataset(Dataset):\n",
|
"class ToyDataset(Dataset):\n",
|
||||||
|
" # 初始化 ToyDataset 类\n",
|
||||||
" def __init__(self, X, y):\n",
|
" def __init__(self, X, y):\n",
|
||||||
" self.features = X\n",
|
" self.features = X\n",
|
||||||
" self.labels = y\n",
|
" self.labels = y\n",
|
||||||
"\n",
|
" # 获取指定索引的数据\n",
|
||||||
" def __getitem__(self, index):\n",
|
" def __getitem__(self, index):\n",
|
||||||
" one_x = self.features[index]\n",
|
" one_x = self.features[index]\n",
|
||||||
" one_y = self.labels[index] \n",
|
" one_y = self.labels[index] \n",
|
||||||
" return one_x, one_y\n",
|
" return one_x, one_y\n",
|
||||||
"\n",
|
" # 获取数据集的长度\n",
|
||||||
" def __len__(self):\n",
|
" def __len__(self):\n",
|
||||||
" return self.labels.shape[0]\n",
|
" return self.labels.shape[0]\n",
|
||||||
"\n",
|
"# 创建训练数据集和测试数据集实例\n",
|
||||||
"train_ds = ToyDataset(X_train, y_train)\n",
|
"train_ds = ToyDataset(X_train, y_train)\n",
|
||||||
"test_ds = ToyDataset(X_test, y_test)"
|
"test_ds = ToyDataset(X_test, y_test)"
|
||||||
]
|
]
|
||||||
@@ -804,6 +820,11 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"torch.manual_seed(123)\n",
|
"torch.manual_seed(123)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 创建训练数据加载器 train_loader\n",
|
||||||
|
"# dataset 参数传入了您定义的 ToyDataset 类的实例 train_ds\n",
|
||||||
|
"# batch_size 参数指定了每个批次包含的样本数量\n",
|
||||||
|
"# shuffle 参数指定是否在每个 epoch 之前对数据进行洗牌\n",
|
||||||
|
"# num_workers 参数指定用于数据加载的子进程数量\n",
|
||||||
"train_loader = DataLoader(\n",
|
"train_loader = DataLoader(\n",
|
||||||
" dataset=train_ds,\n",
|
" dataset=train_ds,\n",
|
||||||
" batch_size=2,\n",
|
" batch_size=2,\n",
|
||||||
@@ -821,6 +842,11 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"test_ds = ToyDataset(X_test, y_test)\n",
|
"test_ds = ToyDataset(X_test, y_test)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 创建测试数据加载器 test_loader\n",
|
||||||
|
"# dataset 参数传入了您定义的 ToyDataset 类的实例 test_ds\n",
|
||||||
|
"# batch_size 参数指定了每个批次包含的样本数量\n",
|
||||||
|
"# shuffle 参数指定是否在每个 epoch 之前对数据进行洗牌,这里设为 False 表示不洗牌\n",
|
||||||
|
"# num_workers 参数指定用于数据加载的子进程数量\n",
|
||||||
"test_loader = DataLoader(\n",
|
"test_loader = DataLoader(\n",
|
||||||
" dataset=test_ds,\n",
|
" dataset=test_ds,\n",
|
||||||
" batch_size=2,\n",
|
" batch_size=2,\n",
|
||||||
@@ -848,7 +874,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 迭代训练数据加载器 train_loader\n",
|
||||||
"for idx, (x, y) in enumerate(train_loader):\n",
|
"for idx, (x, y) in enumerate(train_loader):\n",
|
||||||
|
" # 打印每个批次的索引、输入特征和对应的标签\n",
|
||||||
" print(f\"Batch {idx+1}:\", x, y)"
|
" print(f\"Batch {idx+1}:\", x, y)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -884,7 +912,7 @@
|
|||||||
"id": "d904ca82-e50f-4f3d-a3ac-fc6ca53dd00e",
|
"id": "d904ca82-e50f-4f3d-a3ac-fc6ca53dd00e",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.7 A typical training loop"
|
"## A.7 一个示例训练轮次"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -923,19 +951,19 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" logits = model(features)\n",
|
" logits = model(features)\n",
|
||||||
" \n",
|
" \n",
|
||||||
" loss = F.cross_entropy(logits, labels) # Loss function\n",
|
" loss = F.cross_entropy(logits, labels) # 损失函数\n",
|
||||||
" \n",
|
" \n",
|
||||||
" optimizer.zero_grad()\n",
|
" optimizer.zero_grad()\n",
|
||||||
" loss.backward()\n",
|
" loss.backward()\n",
|
||||||
" optimizer.step()\n",
|
" optimizer.step()\n",
|
||||||
" \n",
|
" \n",
|
||||||
" ### LOGGING\n",
|
" ### 日志\n",
|
||||||
" print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
|
" print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
|
||||||
" f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
|
" f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
|
||||||
" f\" | Train/Val Loss: {loss:.2f}\")\n",
|
" f\" | Train/Val Loss: {loss:.2f}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
" model.eval()\n",
|
" model.eval()\n",
|
||||||
" # Optional model evaluation"
|
" # 可选的模型评估指标"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -985,10 +1013,16 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 设置 PyTorch 的打印选项,以关闭科学计数法\n",
|
||||||
"torch.set_printoptions(sci_mode=False)\n",
|
"torch.set_printoptions(sci_mode=False)\n",
|
||||||
|
"\n",
|
||||||
|
"# 假设 outputs 是模型的输出张量\n",
|
||||||
|
"\n",
|
||||||
|
"# 对模型的输出进行 softmax 操作,计算类别概率\n",
|
||||||
"probas = torch.softmax(outputs, dim=1)\n",
|
"probas = torch.softmax(outputs, dim=1)\n",
|
||||||
"print(probas)\n",
|
"print(probas)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 获取模型的预测结果,即具有最大概率的类别\n",
|
||||||
"predictions = torch.argmax(outputs, dim=1)\n",
|
"predictions = torch.argmax(outputs, dim=1)\n",
|
||||||
"print(predictions)"
|
"print(predictions)"
|
||||||
]
|
]
|
||||||
@@ -1008,6 +1042,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 使用 torch.argmax() 函数沿着 dim=1 维度获取每个样本最大值的索引,即模型的预测结果\n",
|
||||||
"predictions = torch.argmax(outputs, dim=1)\n",
|
"predictions = torch.argmax(outputs, dim=1)\n",
|
||||||
"print(predictions)"
|
"print(predictions)"
|
||||||
]
|
]
|
||||||
@@ -1062,21 +1097,36 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def compute_accuracy(model, dataloader):\n",
|
"def compute_accuracy(model, dataloader):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" 计算模型在给定数据加载器上的准确率。\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" 参数:\n",
|
||||||
|
" model (torch.nn.Module): 待评估的模型。\n",
|
||||||
|
" dataloader (torch.utils.data.DataLoader): 包含输入数据的数据加载器。\n",
|
||||||
|
"\n",
|
||||||
|
" 返回:\n",
|
||||||
|
" float: 准确率值。\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" # 将模型设为评估模式\n",
|
||||||
" model = model.eval()\n",
|
" model = model.eval()\n",
|
||||||
" correct = 0.0\n",
|
" correct = 0.0\n",
|
||||||
" total_examples = 0\n",
|
" total_examples = 0\n",
|
||||||
" \n",
|
" \n",
|
||||||
|
" # 遍历数据加载器\n",
|
||||||
" for idx, (features, labels) in enumerate(dataloader):\n",
|
" for idx, (features, labels) in enumerate(dataloader):\n",
|
||||||
" \n",
|
" \n",
|
||||||
|
" # 使用 no_grad 上下文,以便不跟踪梯度\n",
|
||||||
" with torch.no_grad():\n",
|
" with torch.no_grad():\n",
|
||||||
|
" # 使用模型进行前向传播获取预测结果\n",
|
||||||
" logits = model(features)\n",
|
" logits = model(features)\n",
|
||||||
" \n",
|
" \n",
|
||||||
|
" # 获取预测结果并计算正确预测的数量\n",
|
||||||
" predictions = torch.argmax(logits, dim=1)\n",
|
" predictions = torch.argmax(logits, dim=1)\n",
|
||||||
" compare = labels == predictions\n",
|
" compare = labels == predictions\n",
|
||||||
" correct += torch.sum(compare)\n",
|
" correct += torch.sum(compare)\n",
|
||||||
" total_examples += len(compare)\n",
|
" total_examples += len(compare)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" # 计算并返回准确率\n",
|
||||||
" return (correct / total_examples).item()"
|
" return (correct / total_examples).item()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -1127,7 +1177,7 @@
|
|||||||
"id": "4d5cd469-3a45-4394-944b-3ce543f41dac",
|
"id": "4d5cd469-3a45-4394-944b-3ce543f41dac",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.8 Saving and loading models"
|
"## A.8 保存并加载模型"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1158,7 +1208,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"model = NeuralNetwork(2, 2) # needs to match the original model exactly\n",
|
"model = NeuralNetwork(2, 2) # 需要与原始模型完全匹配\n",
|
||||||
"model.load_state_dict(torch.load(\"model.pth\"))"
|
"model.load_state_dict(torch.load(\"model.pth\"))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -1167,7 +1217,7 @@
|
|||||||
"id": "f891c013-43da-4a05-973d-997be313d2d8",
|
"id": "f891c013-43da-4a05-973d-997be313d2d8",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.9 Optimizing training performance with GPUs"
|
"## A.9 使用GPU来优化训练性能"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1175,7 +1225,7 @@
|
|||||||
"id": "e68ae888-cabf-49c9-bad6-ecdce774db57",
|
"id": "e68ae888-cabf-49c9-bad6-ecdce774db57",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### A.9.1 PyTorch computations on GPU devices"
|
"### A.9.1 在GPU上进行 PyTorch 的运算"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1183,7 +1233,7 @@
|
|||||||
"id": "141c845f-efe3-4614-b376-b8b7a9a2c887",
|
"id": "141c845f-efe3-4614-b376-b8b7a9a2c887",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"See [code-part2.ipynb](code-part2.ipynb)"
|
"See [code-part2.ipynb](code-part2.ipynb) "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1191,7 +1241,7 @@
|
|||||||
"id": "99811829-b817-42ea-b03e-d35374debcc0",
|
"id": "99811829-b817-42ea-b03e-d35374debcc0",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### A.9.2 Single-GPU training"
|
"### A.9.2 单个GPU的训练"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -1207,7 +1257,7 @@
|
|||||||
"id": "db6eb2d1-a341-4489-b04b-635c26945333",
|
"id": "db6eb2d1-a341-4489-b04b-635c26945333",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### A.9.3 Training with multiple GPUs"
|
"### A.9.3 多GPU的训练"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
"id": "O9i6kzBsZVaZ"
|
"id": "O9i6kzBsZVaZ"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"# Appendix A: Introduction to PyTorch (Part 2)"
|
"# 附件A:PyTorch的介绍(第二部分)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -15,7 +15,7 @@
|
|||||||
"id": "ppbG5d-NZezH"
|
"id": "ppbG5d-NZezH"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.9 Optimizing training performance with GPUs"
|
"## A.9 使用GPU优化训练性能"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -24,7 +24,7 @@
|
|||||||
"id": "6jH0J_DPZhbn"
|
"id": "6jH0J_DPZhbn"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"### A.9.1 PyTorch computations on GPU devices"
|
"### A.9.1 在GPU上进行 PyTorch 计算"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -48,13 +48,13 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import torch\n",
|
"import torch\n",
|
||||||
"\n",
|
"# 显示PyTorch的版本\n",
|
||||||
"print(torch.__version__)"
|
"print(torch.__version__)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": null,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"colab": {
|
"colab": {
|
||||||
"base_uri": "https://localhost:8080/"
|
"base_uri": "https://localhost:8080/"
|
||||||
@@ -72,6 +72,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 显示PyTorch是否支持GPU\n",
|
||||||
"print(torch.cuda.is_available())"
|
"print(torch.cuda.is_available())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -124,6 +125,7 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 将两个张量移动到CUDA设备上\n",
|
||||||
"tensor_1 = tensor_1.to(\"cuda\")\n",
|
"tensor_1 = tensor_1.to(\"cuda\")\n",
|
||||||
"tensor_2 = tensor_2.to(\"cuda\")\n",
|
"tensor_2 = tensor_2.to(\"cuda\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -165,7 +167,7 @@
|
|||||||
"id": "c8j1cWDcWAMf"
|
"id": "c8j1cWDcWAMf"
|
||||||
},
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## A.9.2 Single-GPU training"
|
"## A.9.2 单GPU训练"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -264,15 +266,15 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" self.layers = torch.nn.Sequential(\n",
|
" self.layers = torch.nn.Sequential(\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # 1st hidden layer\n",
|
" # 第一个隐藏层\n",
|
||||||
" torch.nn.Linear(num_inputs, 30),\n",
|
" torch.nn.Linear(num_inputs, 30),\n",
|
||||||
" torch.nn.ReLU(),\n",
|
" torch.nn.ReLU(),\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # 2nd hidden layer\n",
|
" # 第二个隐藏层\n",
|
||||||
" torch.nn.Linear(30, 20),\n",
|
" torch.nn.Linear(30, 20),\n",
|
||||||
" torch.nn.ReLU(),\n",
|
" torch.nn.ReLU(),\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # output layer\n",
|
" # 输出层\n",
|
||||||
" torch.nn.Linear(20, num_outputs),\n",
|
" torch.nn.Linear(20, num_outputs),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -309,14 +311,22 @@
|
|||||||
"import torch.nn.functional as F\n",
|
"import torch.nn.functional as F\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 设置随机数种子,以确保可复现性\n",
|
||||||
"torch.manual_seed(123)\n",
|
"torch.manual_seed(123)\n",
|
||||||
|
"\n",
|
||||||
|
"# 创建神经网络模型\n",
|
||||||
"model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
|
"model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # NEW\n",
|
"# 根据设备可用情况选择设备\n",
|
||||||
"model = model.to(device) # NEW\n",
|
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 将模型移动到所选设备上\n",
|
||||||
|
"model = model.to(device)\n",
|
||||||
|
"\n",
|
||||||
|
"# 定义优化器,使用随机梯度下降 (SGD)\n",
|
||||||
"optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
|
"optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 定义训练循环的 epoch 数量\n",
|
||||||
"num_epochs = 3\n",
|
"num_epochs = 3\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for epoch in range(num_epochs):\n",
|
"for epoch in range(num_epochs):\n",
|
||||||
@@ -324,21 +334,21 @@
|
|||||||
" model.train()\n",
|
" model.train()\n",
|
||||||
" for batch_idx, (features, labels) in enumerate(train_loader):\n",
|
" for batch_idx, (features, labels) in enumerate(train_loader):\n",
|
||||||
"\n",
|
"\n",
|
||||||
" features, labels = features.to(device), labels.to(device) # NEW\n",
|
" features, labels = features.to(device), labels.to(device) \n",
|
||||||
" logits = model(features)\n",
|
" logits = model(features)\n",
|
||||||
" loss = F.cross_entropy(logits, labels) # Loss function\n",
|
" loss = F.cross_entropy(logits, labels) # 损失函数\n",
|
||||||
"\n",
|
"\n",
|
||||||
" optimizer.zero_grad()\n",
|
" optimizer.zero_grad()\n",
|
||||||
" loss.backward()\n",
|
" loss.backward()\n",
|
||||||
" optimizer.step()\n",
|
" optimizer.step()\n",
|
||||||
"\n",
|
"\n",
|
||||||
" ### LOGGING\n",
|
" ### 训练日志\n",
|
||||||
" print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
|
" print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
|
||||||
" f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
|
" f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
|
||||||
" f\" | Train/Val Loss: {loss:.2f}\")\n",
|
" f\" | Train/Val Loss: {loss:.2f}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
" model.eval()\n",
|
" model.eval()\n",
|
||||||
" # Optional model evaluation"
|
" # 可选的模型参数"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -349,6 +359,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 使用accuracy(准确率)作为指标\n",
|
||||||
"def compute_accuracy(model, dataloader, device):\n",
|
"def compute_accuracy(model, dataloader, device):\n",
|
||||||
"\n",
|
"\n",
|
||||||
" model = model.eval()\n",
|
" model = model.eval()\n",
|
||||||
@@ -356,17 +367,17 @@
|
|||||||
" total_examples = 0\n",
|
" total_examples = 0\n",
|
||||||
"\n",
|
"\n",
|
||||||
" for idx, (features, labels) in enumerate(dataloader):\n",
|
" for idx, (features, labels) in enumerate(dataloader):\n",
|
||||||
"\n",
|
" # 将数据移动到指定的设备上\n",
|
||||||
" features, labels = features.to(device), labels.to(device) # New\n",
|
" features, labels = features.to(device), labels.to(device) # New\n",
|
||||||
"\n",
|
"\n",
|
||||||
" with torch.no_grad():\n",
|
" with torch.no_grad():\n",
|
||||||
" logits = model(features)\n",
|
" logits = model(features)\n",
|
||||||
"\n",
|
" # 获取预测结果并计算准确数量\n",
|
||||||
" predictions = torch.argmax(logits, dim=1)\n",
|
" predictions = torch.argmax(logits, dim=1)\n",
|
||||||
" compare = labels == predictions\n",
|
" compare = labels == predictions\n",
|
||||||
" correct += torch.sum(compare)\n",
|
" correct += torch.sum(compare)\n",
|
||||||
" total_examples += len(compare)\n",
|
" total_examples += len(compare)\n",
|
||||||
"\n",
|
" # 计算并返回准确率\n",
|
||||||
" return (correct / total_examples).item()"
|
" return (correct / total_examples).item()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Exercise A.3"
|
"## 练习 A.3"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -21,15 +21,15 @@
|
|||||||
"\n",
|
"\n",
|
||||||
" self.layers = torch.nn.Sequential(\n",
|
" self.layers = torch.nn.Sequential(\n",
|
||||||
" \n",
|
" \n",
|
||||||
" # 1st hidden layer\n",
|
" # 第一个隐藏层\n",
|
||||||
" torch.nn.Linear(num_inputs, 30),\n",
|
" torch.nn.Linear(num_inputs, 30),\n",
|
||||||
" torch.nn.ReLU(),\n",
|
" torch.nn.ReLU(),\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # 2nd hidden layer\n",
|
" # 第二个隐藏层\n",
|
||||||
" torch.nn.Linear(30, 20),\n",
|
" torch.nn.Linear(30, 20),\n",
|
||||||
" torch.nn.ReLU(),\n",
|
" torch.nn.ReLU(),\n",
|
||||||
"\n",
|
"\n",
|
||||||
" # output layer\n",
|
" # 输出层\n",
|
||||||
" torch.nn.Linear(20, num_outputs),\n",
|
" torch.nn.Linear(20, num_outputs),\n",
|
||||||
" )\n",
|
" )\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -62,7 +62,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Exercise A.4"
|
"## 练习 A.4"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -74,7 +74,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import torch\n",
|
"import torch\n",
|
||||||
"\n",
|
"# 创建随机向量\n",
|
||||||
"a = torch.rand(100, 200)\n",
|
"a = torch.rand(100, 200)\n",
|
||||||
"b = torch.rand(200, 300)"
|
"b = torch.rand(200, 300)"
|
||||||
]
|
]
|
||||||
@@ -99,6 +99,9 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 使用 @ 符号进行矩阵相乘,并计算执行时间\n",
|
||||||
|
"# %timeit 是 IPython 提供的魔术命令,用于多次执行代码以获取平均执行时间\n",
|
||||||
|
"# 它会自动选择执行次数以确保结果的准确性\n",
|
||||||
"%timeit a @ b"
|
"%timeit a @ b"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -110,6 +113,7 @@
|
|||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"# 将 a 和 b 移动到 CUDA 设备上以利用 GPU 加速计算\n",
|
||||||
"a, b = a.to(\"cuda\"), b.to(\"cuda\")"
|
"a, b = a.to(\"cuda\"), b.to(\"cuda\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -135,15 +139,6 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"%timeit a @ b"
|
"%timeit a @ b"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"id": "Zqqa-To2L749"
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
@@ -168,7 +163,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.6"
|
"version": "3.11.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Reference in New Issue
Block a user