diff --git a/appendix-A/03_main-chapter-code/DDP-script.py b/appendix-A/03_main-chapter-code/DDP-script.py index 89323c0..fdcf4df 100644 --- a/appendix-A/03_main-chapter-code/DDP-script.py +++ b/appendix-A/03_main-chapter-code/DDP-script.py @@ -1,10 +1,10 @@ -# Appendix A: Introduction to PyTorch (Part 3) +# 附录A :PyTorch的介绍(第三部分) import torch import torch.nn.functional as F from torch.utils.data import Dataset, DataLoader -# NEW imports: +# 导入新的库 import os import torch.multiprocessing as mp from torch.utils.data.distributed import DistributedSampler @@ -12,22 +12,23 @@ from torch.nn.parallel import DistributedDataParallel as DDP from torch.distributed import init_process_group, destroy_process_group -# NEW: function to initialize a distributed process group (1 process / GPU) -# this allows communication among processes +# 创建一个新的函数用于初始化一个分布式进程(每个GPU一个进程) +# 该函数允许进程之间的通信 def ddp_setup(rank, world_size): """ - Arguments: - rank: a unique process ID - world_size: total number of processes in the group + 提示: + rank:特定的进程编号(进程ID) + world_size:组内的进程总数 """ - # rank of machine running rank:0 process - # here, we assume all GPUs are on the same machine + + # 正在运行的机器编号 ID:进程0 + # 这里的前提是假设所有的GPU在同一台机器上 os.environ["MASTER_ADDR"] = "localhost" - # any free port on the machine + # 机器上任意的空闲端口号 os.environ["MASTER_PORT"] = "12345" - # initialize process group - # Windows users may have to use "gloo" instead of "nccl" as backend + # 初始化进程 + # Windows 用户使用"gloo"来替代下面代码中的"nccl" # nccl: NVIDIA Collective Communication Library init_process_group(backend="nccl", rank=rank, world_size=world_size) torch.cuda.set_device(rank) @@ -52,15 +53,15 @@ class NeuralNetwork(torch.nn.Module): super().__init__() self.layers = torch.nn.Sequential( - # 1st hidden layer + # 第一个隐藏层 torch.nn.Linear(num_inputs, 30), torch.nn.ReLU(), - # 2nd hidden layer + # 第二个隐藏层 torch.nn.Linear(30, 20), torch.nn.ReLU(), - # output layer + # 输出层 torch.nn.Linear(20, num_outputs), ) @@ -91,11 +92,11 @@ def prepare_dataset(): train_loader = DataLoader( dataset=train_ds, batch_size=2, - shuffle=False, # NEW: False because of DistributedSampler below + shuffle=False, # 这里设置为False pin_memory=True, drop_last=True, - # NEW: chunk batches across GPUs without overlapping samples: - sampler=DistributedSampler(train_ds) # NEW + # 在多个GPU上划分批次,确保批次之间不重叠样本 + sampler=DistributedSampler(train_ds) ) test_loader = DataLoader( dataset=test_ds, @@ -105,33 +106,33 @@ def prepare_dataset(): return train_loader, test_loader -# NEW: wrapper +# 包装器 def main(rank, world_size, num_epochs): - ddp_setup(rank, world_size) # NEW: initialize process groups + ddp_setup(rank, world_size) # train_loader, test_loader = prepare_dataset() model = NeuralNetwork(num_inputs=2, num_outputs=2) model.to(rank) optimizer = torch.optim.SGD(model.parameters(), lr=0.5) - model = DDP(model, device_ids=[rank]) # NEW: wrap model with DDP - # the core model is now accessible as model.module + model = DDP(model, device_ids=[rank]) # 使用分布式数据并行(DDP)将模型进行包装 + # 现在核心模型可以通过 model.module 访问 for epoch in range(num_epochs): model.train() for features, labels in enumerate(train_loader): - features, labels = features.to(rank), labels.to(rank) # New: use rank + features, labels = features.to(rank), labels.to(rank) logits = model(features) - loss = F.cross_entropy(logits, labels) # Loss function + loss = F.cross_entropy(logits, labels) # 损失函数 optimizer.zero_grad() loss.backward() optimizer.step() - ### LOGGING + ### 日志 print(f"[GPU{rank}] Epoch: {epoch+1:03d}/{num_epochs:03d}" f" | Batchsize {labels.shape[0]:03d}" f" | Train/Val Loss: {loss:.2f}") @@ -142,7 +143,7 @@ def main(rank, world_size, num_epochs): test_acc = compute_accuracy(model, test_loader, device=rank) print(f"[GPU{rank}] Test accuracy", test_acc) - destroy_process_group() # NEW: cleanly exit distributed mode + destroy_process_group() # 清理退出分布式模式 def compute_accuracy(model, dataloader, device): @@ -169,10 +170,10 @@ if __name__ == "__main__": torch.manual_seed(123) - # NEW: spawn new processes - # note that spawn will automatically pass the rank + # 新建进程 + # 请注意,spawn会自动传递排名 num_epochs = 3 world_size = torch.cuda.device_count() mp.spawn(main, args=(world_size, num_epochs), nprocs=world_size) - # nprocs=world_size spawns one process per GPU + # nprocs=world_size 会为每个GPU生成一个进程 diff --git a/appendix-A/03_main-chapter-code/code-part1.ipynb b/appendix-A/03_main-chapter-code/code-part1.ipynb index 71490fa..435909e 100644 --- a/appendix-A/03_main-chapter-code/code-part1.ipynb +++ b/appendix-A/03_main-chapter-code/code-part1.ipynb @@ -5,7 +5,7 @@ "id": "ca7fc8a0-280c-4979-b0c7-fc3a99b3b785", "metadata": {}, "source": [ - "# Appendix A: Introduction to PyTorch (Part 1)" + "# 附件A:PyTorch的介绍(第一部分)" ] }, { @@ -13,12 +13,12 @@ "id": "f5bf13d2-8fc2-483e-88cc-6b4310221e68", "metadata": {}, "source": [ - "## A.1 What is PyTorch" + "## A.1 什么是PyTorch" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "96ee5660-5327-48e2-9104-a882b3b2afa4", "metadata": {}, "outputs": [ @@ -32,13 +32,13 @@ ], "source": [ "import torch\n", - "\n", + "# 显示PyTorch的版本\n", "print(torch.__version__)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "f73ad4e4-7ec6-4467-a9e9-0cdf6d195264", "metadata": {}, "outputs": [ @@ -51,6 +51,7 @@ } ], "source": [ + "# 显示PyTorch是否是GPU版本,False表示CPU版本,True表示GPU版本\n", "print(torch.cuda.is_available())" ] }, @@ -59,7 +60,7 @@ "id": "2100cf2e-7459-4ab3-92a8-43e86ab35a9b", "metadata": {}, "source": [ - "## A.2 Understanding tensors" + "## A.2 向量" ] }, { @@ -67,7 +68,7 @@ "id": "26d7f785-e048-42bc-9182-a556af6bb7f4", "metadata": {}, "source": [ - "### A.2.1 Scalars, vectors, matrices, and tensors" + "### A.2.1 标量、向量、矩阵和张量\n" ] }, { @@ -80,22 +81,22 @@ "import torch\n", "import numpy as np\n", "\n", - "# create a 0D tensor (scalar) from a Python integer\n", + "# 用Python整数创建一个0维张量\n", "tensor0d = torch.tensor(1)\n", "\n", - "# create a 1D tensor (vector) from a Python list\n", + "# 用Python列表创建一个1维张量(向量)\n", "tensor1d = torch.tensor([1, 2, 3])\n", "\n", - "# create a 2D tensor from a nested Python list\n", + "# 用Python列表创建一个2维张量(向量)\n", "tensor2d = torch.tensor([[1, 2], [3, 4]])\n", "\n", - "# create a 3D tensor from a nested Python list\n", + "# 用嵌套的Python列表创建一个3维张量\n", "tensor3d_1 = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n", "\n", - "# create a 3D tensor from NumPy array\n", + "# 从NumPy数组创建一个3维张量\n", "ary3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n", - "tensor3d_2 = torch.tensor(ary3d) # Copies NumPy array\n", - "tensor3d_3 = torch.from_numpy(ary3d) # Shares memory with NumPy array" + "tensor3d_2 = torch.tensor(ary3d) # 复制NumPy数组\n", + "tensor3d_3 = torch.from_numpy(ary3d) # 与NumPy数组共享内存" ] }, { @@ -118,7 +119,7 @@ ], "source": [ "ary3d[0, 0, 0] = 999\n", - "print(tensor3d_2) # remains unchanged" + "print(tensor3d_2) # 保持不变" ] }, { @@ -140,7 +141,7 @@ } ], "source": [ - "print(tensor3d_3) # changes because of memory sharing" + "print(tensor3d_3) # 由于内存共享需要改变" ] }, { @@ -148,7 +149,7 @@ "id": "63dec48d-2b60-41a2-ac06-fef7e718605a", "metadata": {}, "source": [ - "### A.2.2 Tensor data types" + "### A.2.2 向量的数据类型" ] }, { @@ -213,7 +214,7 @@ "id": "2020deb5-aa02-4524-b311-c010f4ad27ff", "metadata": {}, "source": [ - "### A.2.3 Common PyTorch tensor operations" + "### A.2.3 PyTorch中常见的张量操作" ] }, { @@ -257,7 +258,7 @@ } ], "source": [ - "tensor2d.shape" + "tensor2d.shape # 张量形状" ] }, { @@ -280,7 +281,7 @@ } ], "source": [ - "tensor2d.reshape(3, 2)" + "tensor2d.reshape(3, 2) # 修改形状" ] }, { @@ -303,7 +304,7 @@ } ], "source": [ - "tensor2d.view(3, 2)" + "tensor2d.view(3, 2) # 查看张量" ] }, { @@ -326,7 +327,7 @@ } ], "source": [ - "tensor2d.T" + "tensor2d.T # 转置张量" ] }, { @@ -348,7 +349,7 @@ } ], "source": [ - "tensor2d.matmul(tensor2d.T)" + "tensor2d.matmul(tensor2d.T) # 张量乘法:tensor2d与其转置相乘" ] }, { @@ -370,7 +371,7 @@ } ], "source": [ - "tensor2d @ tensor2d.T" + "tensor2d @ tensor2d.T # 张量乘法的另一种实现方式:tensor2d与其转置相乘" ] }, { @@ -378,7 +379,7 @@ "id": "4c15bdeb-78e2-4870-8a4f-a9f591666f38", "metadata": {}, "source": [ - "## A.3 Seeing models as computation graphs" + "## A.3 把模型作为计算图" ] }, { @@ -398,13 +399,13 @@ "source": [ "import torch.nn.functional as F\n", "\n", - "y = torch.tensor([1.0]) # true label\n", - "x1 = torch.tensor([1.1]) # input feature\n", - "w1 = torch.tensor([2.2]) # weight parameter\n", - "b = torch.tensor([0.0]) # bias unit\n", + "y = torch.tensor([1.0]) # 真实样本\n", + "x1 = torch.tensor([1.1]) # 输入特征\n", + "w1 = torch.tensor([2.2]) # 权重变量\n", + "b = torch.tensor([0.0]) # 偏置单元\n", "\n", - "z = x1 * w1 + b # net input\n", - "a = torch.sigmoid(z) # activation & output\n", + "z = x1 * w1 + b # 网络输入\n", + "a = torch.sigmoid(z) # 激活函数 & 输出\n", "\n", "loss = F.binary_cross_entropy(a, y)\n", "print(loss)" @@ -415,7 +416,7 @@ "id": "f9424f26-2bac-47e7-b834-92ece802247c", "metadata": {}, "source": [ - "## A.4 Automatic differentiation made easy" + "## A.4 自动求导" ] }, { @@ -470,7 +471,7 @@ } ], "source": [ - "loss.backward()\n", + "loss.backward()# 反向传播\n", "\n", "print(w1.grad)\n", "print(b.grad)" @@ -481,7 +482,7 @@ "id": "f53bdd7d-44e6-40ab-8a5a-4eef74ef35dc", "metadata": {}, "source": [ - "## A.5 Implementing multilayer neural networks" + "## A.5 多层神经网络的实现" ] }, { @@ -497,15 +498,15 @@ "\n", " self.layers = torch.nn.Sequential(\n", " \n", - " # 1st hidden layer\n", + " # 第一个隐藏层\n", " torch.nn.Linear(num_inputs, 30),\n", " torch.nn.ReLU(),\n", "\n", - " # 2nd hidden layer\n", + " # 第二个隐藏层\n", " torch.nn.Linear(30, 20),\n", " torch.nn.ReLU(),\n", "\n", - " # output layer\n", + " # 输出层\n", " torch.nn.Linear(20, num_outputs),\n", " )\n", "\n", @@ -566,7 +567,7 @@ ], "source": [ "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n", - "print(\"Total number of trainable model parameters:\", num_params)" + "print(\"Total number of trainable model parameters:\", num_params)# 打印训练模型的参数" ] }, { @@ -592,7 +593,7 @@ } ], "source": [ - "print(model.layers[0].weight)" + "print(model.layers[0].weight) # 打印神经网络模型的第一层的权重" ] }, { @@ -618,9 +619,13 @@ } ], "source": [ + "# 设置随机数种子,以确保可复现性\n", "torch.manual_seed(123)\n", "\n", + "# 假设 NeuralNetwork 是一个神经网络类,且其构造函数接受两个参数,分别为输入特征的维度和输出特征的维度\n", "model = NeuralNetwork(50, 3)\n", + "\n", + "# 打印神经网络模型的第一层的权重\n", "print(model.layers[0].weight)" ] }, @@ -639,6 +644,7 @@ } ], "source": [ + "# 打印神经网络模型的第一层权重的形状\n", "print(model.layers[0].weight.shape)" ] }, @@ -657,10 +663,16 @@ } ], "source": [ + "# 设置随机数种子,以确保可复现性\n", "torch.manual_seed(123)\n", "\n", + "# 模型输入特征的维度为 50\n", "X = torch.rand((1, 50))\n", + "\n", + "# 使用模型进行前向传播计算输出\n", "out = model(X)\n", + "\n", + "# 打印输出结果\n", "print(out)" ] }, @@ -679,6 +691,7 @@ } ], "source": [ + "# 使用 torch.no_grad() 上下文管理器,以便在推断时不计算梯度\n", "with torch.no_grad():\n", " out = model(X)\n", "print(out)" @@ -709,7 +722,7 @@ "id": "19858180-0f26-43a8-b2c3-7ed40abf9f85", "metadata": {}, "source": [ - "## A.6 Setting up efficient data loaders" + "## A.6 建立高效的数据加载器" ] }, { @@ -719,6 +732,7 @@ "metadata": {}, "outputs": [], "source": [ + "# 定义输入特征张量 X_train\n", "X_train = torch.tensor([\n", " [-1.2, 3.1],\n", " [-0.9, 2.9],\n", @@ -727,6 +741,7 @@ " [2.7, -1.5]\n", "])\n", "\n", + "# 定义对应的标签张量 y_train\n", "y_train = torch.tensor([0, 0, 0, 1, 1])" ] }, @@ -756,18 +771,19 @@ "\n", "\n", "class ToyDataset(Dataset):\n", + " # 初始化 ToyDataset 类\n", " def __init__(self, X, y):\n", " self.features = X\n", " self.labels = y\n", - "\n", + " # 获取指定索引的数据\n", " def __getitem__(self, index):\n", " one_x = self.features[index]\n", " one_y = self.labels[index] \n", " return one_x, one_y\n", - "\n", + " # 获取数据集的长度\n", " def __len__(self):\n", " return self.labels.shape[0]\n", - "\n", + "# 创建训练数据集和测试数据集实例\n", "train_ds = ToyDataset(X_train, y_train)\n", "test_ds = ToyDataset(X_test, y_test)" ] @@ -804,6 +820,11 @@ "\n", "torch.manual_seed(123)\n", "\n", + "# 创建训练数据加载器 train_loader\n", + "# dataset 参数传入了您定义的 ToyDataset 类的实例 train_ds\n", + "# batch_size 参数指定了每个批次包含的样本数量\n", + "# shuffle 参数指定是否在每个 epoch 之前对数据进行洗牌\n", + "# num_workers 参数指定用于数据加载的子进程数量\n", "train_loader = DataLoader(\n", " dataset=train_ds,\n", " batch_size=2,\n", @@ -821,6 +842,11 @@ "source": [ "test_ds = ToyDataset(X_test, y_test)\n", "\n", + "# 创建测试数据加载器 test_loader\n", + "# dataset 参数传入了您定义的 ToyDataset 类的实例 test_ds\n", + "# batch_size 参数指定了每个批次包含的样本数量\n", + "# shuffle 参数指定是否在每个 epoch 之前对数据进行洗牌,这里设为 False 表示不洗牌\n", + "# num_workers 参数指定用于数据加载的子进程数量\n", "test_loader = DataLoader(\n", " dataset=test_ds,\n", " batch_size=2,\n", @@ -848,7 +874,9 @@ } ], "source": [ + "# 迭代训练数据加载器 train_loader\n", "for idx, (x, y) in enumerate(train_loader):\n", + " # 打印每个批次的索引、输入特征和对应的标签\n", " print(f\"Batch {idx+1}:\", x, y)" ] }, @@ -884,7 +912,7 @@ "id": "d904ca82-e50f-4f3d-a3ac-fc6ca53dd00e", "metadata": {}, "source": [ - "## A.7 A typical training loop" + "## A.7 一个示例训练轮次" ] }, { @@ -923,19 +951,19 @@ "\n", " logits = model(features)\n", " \n", - " loss = F.cross_entropy(logits, labels) # Loss function\n", + " loss = F.cross_entropy(logits, labels) # 损失函数\n", " \n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", " \n", - " ### LOGGING\n", + " ### 日志\n", " print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n", " f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n", " f\" | Train/Val Loss: {loss:.2f}\")\n", "\n", " model.eval()\n", - " # Optional model evaluation" + " # 可选的模型评估指标" ] }, { @@ -985,10 +1013,16 @@ } ], "source": [ + "# 设置 PyTorch 的打印选项,以关闭科学计数法\n", "torch.set_printoptions(sci_mode=False)\n", + "\n", + "# 假设 outputs 是模型的输出张量\n", + "\n", + "# 对模型的输出进行 softmax 操作,计算类别概率\n", "probas = torch.softmax(outputs, dim=1)\n", "print(probas)\n", "\n", + "# 获取模型的预测结果,即具有最大概率的类别\n", "predictions = torch.argmax(outputs, dim=1)\n", "print(predictions)" ] @@ -1008,6 +1042,7 @@ } ], "source": [ + "# 使用 torch.argmax() 函数沿着 dim=1 维度获取每个样本最大值的索引,即模型的预测结果\n", "predictions = torch.argmax(outputs, dim=1)\n", "print(predictions)" ] @@ -1062,21 +1097,36 @@ "outputs": [], "source": [ "def compute_accuracy(model, dataloader):\n", + " \"\"\"\n", + " 计算模型在给定数据加载器上的准确率。\n", "\n", + " 参数:\n", + " model (torch.nn.Module): 待评估的模型。\n", + " dataloader (torch.utils.data.DataLoader): 包含输入数据的数据加载器。\n", + "\n", + " 返回:\n", + " float: 准确率值。\n", + " \"\"\"\n", + " # 将模型设为评估模式\n", " model = model.eval()\n", " correct = 0.0\n", " total_examples = 0\n", " \n", + " # 遍历数据加载器\n", " for idx, (features, labels) in enumerate(dataloader):\n", " \n", + " # 使用 no_grad 上下文,以便不跟踪梯度\n", " with torch.no_grad():\n", + " # 使用模型进行前向传播获取预测结果\n", " logits = model(features)\n", " \n", + " # 获取预测结果并计算正确预测的数量\n", " predictions = torch.argmax(logits, dim=1)\n", " compare = labels == predictions\n", " correct += torch.sum(compare)\n", " total_examples += len(compare)\n", "\n", + " # 计算并返回准确率\n", " return (correct / total_examples).item()" ] }, @@ -1127,7 +1177,7 @@ "id": "4d5cd469-3a45-4394-944b-3ce543f41dac", "metadata": {}, "source": [ - "## A.8 Saving and loading models" + "## A.8 保存并加载模型" ] }, { @@ -1158,7 +1208,7 @@ } ], "source": [ - "model = NeuralNetwork(2, 2) # needs to match the original model exactly\n", + "model = NeuralNetwork(2, 2) # 需要与原始模型完全匹配\n", "model.load_state_dict(torch.load(\"model.pth\"))" ] }, @@ -1167,7 +1217,7 @@ "id": "f891c013-43da-4a05-973d-997be313d2d8", "metadata": {}, "source": [ - "## A.9 Optimizing training performance with GPUs" + "## A.9 使用GPU来优化训练性能" ] }, { @@ -1175,7 +1225,7 @@ "id": "e68ae888-cabf-49c9-bad6-ecdce774db57", "metadata": {}, "source": [ - "### A.9.1 PyTorch computations on GPU devices" + "### A.9.1 在GPU上进行 PyTorch 的运算" ] }, { @@ -1183,7 +1233,7 @@ "id": "141c845f-efe3-4614-b376-b8b7a9a2c887", "metadata": {}, "source": [ - "See [code-part2.ipynb](code-part2.ipynb)" + "See [code-part2.ipynb](code-part2.ipynb) " ] }, { @@ -1191,7 +1241,7 @@ "id": "99811829-b817-42ea-b03e-d35374debcc0", "metadata": {}, "source": [ - "### A.9.2 Single-GPU training" + "### A.9.2 单个GPU的训练" ] }, { @@ -1207,7 +1257,7 @@ "id": "db6eb2d1-a341-4489-b04b-635c26945333", "metadata": {}, "source": [ - "### A.9.3 Training with multiple GPUs" + "### A.9.3 多GPU的训练" ] }, { diff --git a/appendix-A/03_main-chapter-code/code-part2.ipynb b/appendix-A/03_main-chapter-code/code-part2.ipynb index 8a11b20..03a59df 100644 --- a/appendix-A/03_main-chapter-code/code-part2.ipynb +++ b/appendix-A/03_main-chapter-code/code-part2.ipynb @@ -6,7 +6,7 @@ "id": "O9i6kzBsZVaZ" }, "source": [ - "# Appendix A: Introduction to PyTorch (Part 2)" + "# 附件A:PyTorch的介绍(第二部分)" ] }, { @@ -15,7 +15,7 @@ "id": "ppbG5d-NZezH" }, "source": [ - "## A.9 Optimizing training performance with GPUs" + "## A.9 使用GPU优化训练性能" ] }, { @@ -24,7 +24,7 @@ "id": "6jH0J_DPZhbn" }, "source": [ - "### A.9.1 PyTorch computations on GPU devices" + "### A.9.1 在GPU上进行 PyTorch 计算" ] }, { @@ -48,13 +48,13 @@ ], "source": [ "import torch\n", - "\n", + "# 显示PyTorch的版本\n", "print(torch.__version__)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -72,6 +72,7 @@ } ], "source": [ + "# 显示PyTorch是否支持GPU\n", "print(torch.cuda.is_available())" ] }, @@ -124,6 +125,7 @@ } ], "source": [ + "# 将两个张量移动到CUDA设备上\n", "tensor_1 = tensor_1.to(\"cuda\")\n", "tensor_2 = tensor_2.to(\"cuda\")\n", "\n", @@ -165,7 +167,7 @@ "id": "c8j1cWDcWAMf" }, "source": [ - "## A.9.2 Single-GPU training" + "## A.9.2 单GPU训练" ] }, { @@ -264,15 +266,15 @@ "\n", " self.layers = torch.nn.Sequential(\n", "\n", - " # 1st hidden layer\n", + " # 第一个隐藏层\n", " torch.nn.Linear(num_inputs, 30),\n", " torch.nn.ReLU(),\n", "\n", - " # 2nd hidden layer\n", + " # 第二个隐藏层\n", " torch.nn.Linear(30, 20),\n", " torch.nn.ReLU(),\n", "\n", - " # output layer\n", + " # 输出层\n", " torch.nn.Linear(20, num_outputs),\n", " )\n", "\n", @@ -309,14 +311,22 @@ "import torch.nn.functional as F\n", "\n", "\n", + "# 设置随机数种子,以确保可复现性\n", "torch.manual_seed(123)\n", + "\n", + "# 创建神经网络模型\n", "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n", "\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # NEW\n", - "model = model.to(device) # NEW\n", + "# 根据设备可用情况选择设备\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", + "# 将模型移动到所选设备上\n", + "model = model.to(device)\n", + "\n", + "# 定义优化器,使用随机梯度下降 (SGD)\n", "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n", "\n", + "# 定义训练循环的 epoch 数量\n", "num_epochs = 3\n", "\n", "for epoch in range(num_epochs):\n", @@ -324,21 +334,21 @@ " model.train()\n", " for batch_idx, (features, labels) in enumerate(train_loader):\n", "\n", - " features, labels = features.to(device), labels.to(device) # NEW\n", + " features, labels = features.to(device), labels.to(device) \n", " logits = model(features)\n", - " loss = F.cross_entropy(logits, labels) # Loss function\n", + " loss = F.cross_entropy(logits, labels) # 损失函数\n", "\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", - " ### LOGGING\n", + " ### 训练日志\n", " print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n", " f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n", " f\" | Train/Val Loss: {loss:.2f}\")\n", "\n", " model.eval()\n", - " # Optional model evaluation" + " # 可选的模型参数" ] }, { @@ -349,6 +359,7 @@ }, "outputs": [], "source": [ + "# 使用accuracy(准确率)作为指标\n", "def compute_accuracy(model, dataloader, device):\n", "\n", " model = model.eval()\n", @@ -356,17 +367,17 @@ " total_examples = 0\n", "\n", " for idx, (features, labels) in enumerate(dataloader):\n", - "\n", + " # 将数据移动到指定的设备上\n", " features, labels = features.to(device), labels.to(device) # New\n", "\n", " with torch.no_grad():\n", " logits = model(features)\n", - "\n", + " # 获取预测结果并计算准确数量\n", " predictions = torch.argmax(logits, dim=1)\n", " compare = labels == predictions\n", " correct += torch.sum(compare)\n", " total_examples += len(compare)\n", - "\n", + " # 计算并返回准确率\n", " return (correct / total_examples).item()" ] }, diff --git a/appendix-A/03_main-chapter-code/exercise-solutions.ipynb b/appendix-A/03_main-chapter-code/exercise-solutions.ipynb index f934b2d..4c49997 100644 --- a/appendix-A/03_main-chapter-code/exercise-solutions.ipynb +++ b/appendix-A/03_main-chapter-code/exercise-solutions.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Exercise A.3" + "## 练习 A.3" ] }, { @@ -21,15 +21,15 @@ "\n", " self.layers = torch.nn.Sequential(\n", " \n", - " # 1st hidden layer\n", + " # 第一个隐藏层\n", " torch.nn.Linear(num_inputs, 30),\n", " torch.nn.ReLU(),\n", "\n", - " # 2nd hidden layer\n", + " # 第二个隐藏层\n", " torch.nn.Linear(30, 20),\n", " torch.nn.ReLU(),\n", "\n", - " # output layer\n", + " # 输出层\n", " torch.nn.Linear(20, num_outputs),\n", " )\n", "\n", @@ -62,7 +62,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Exercise A.4" + "## 练习 A.4" ] }, { @@ -74,7 +74,7 @@ "outputs": [], "source": [ "import torch\n", - "\n", + "# 创建随机向量\n", "a = torch.rand(100, 200)\n", "b = torch.rand(200, 300)" ] @@ -99,6 +99,9 @@ } ], "source": [ + "# 使用 @ 符号进行矩阵相乘,并计算执行时间\n", + "# %timeit 是 IPython 提供的魔术命令,用于多次执行代码以获取平均执行时间\n", + "# 它会自动选择执行次数以确保结果的准确性\n", "%timeit a @ b" ] }, @@ -110,6 +113,7 @@ }, "outputs": [], "source": [ + "# 将 a 和 b 移动到 CUDA 设备上以利用 GPU 加速计算\n", "a, b = a.to(\"cuda\"), b.to(\"cuda\")" ] }, @@ -135,15 +139,6 @@ "source": [ "%timeit a @ b" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Zqqa-To2L749" - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -168,7 +163,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.11.5" } }, "nbformat": 4,