From e0678d43bfd922ea2af950672c519ad29eabbbef Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:23:02 +0800
Subject: [PATCH 1/8] Delete appendix-A/03_main-chapter-code/DDP-script.py

---
 appendix-A/03_main-chapter-code/DDP-script.py | 178 ------------------
 1 file changed, 178 deletions(-)
 delete mode 100644 appendix-A/03_main-chapter-code/DDP-script.py

diff --git a/appendix-A/03_main-chapter-code/DDP-script.py b/appendix-A/03_main-chapter-code/DDP-script.py
deleted file mode 100644
index 89323c0..0000000
--- a/appendix-A/03_main-chapter-code/DDP-script.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Appendix A: Introduction to PyTorch (Part 3)
-
-import torch
-import torch.nn.functional as F
-from torch.utils.data import Dataset, DataLoader
-
-# NEW imports:
-import os
-import torch.multiprocessing as mp
-from torch.utils.data.distributed import DistributedSampler
-from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.distributed import init_process_group, destroy_process_group
-
-
-# NEW: function to initialize a distributed process group (1 process / GPU)
-# this allows communication among processes
-def ddp_setup(rank, world_size):
-    """
-    Arguments:
-        rank: a unique process ID
-        world_size: total number of processes in the group
-    """
-    # rank of machine running rank:0 process
-    # here, we assume all GPUs are on the same machine
-    os.environ["MASTER_ADDR"] = "localhost"
-    # any free port on the machine
-    os.environ["MASTER_PORT"] = "12345"
-
-    # initialize process group
-    # Windows users may have to use "gloo" instead of "nccl" as backend
-    # nccl: NVIDIA Collective Communication Library
-    init_process_group(backend="nccl", rank=rank, world_size=world_size)
-    torch.cuda.set_device(rank)
-
-
-class ToyDataset(Dataset):
-    def __init__(self, X, y):
-        self.features = X
-        self.labels = y
-
-    def __getitem__(self, index):
-        one_x = self.features[index]
-        one_y = self.labels[index]
-        return one_x, one_y
-
-    def __len__(self):
-        return self.labels.shape[0]
-
-
-class NeuralNetwork(torch.nn.Module):
-    def __init__(self, num_inputs, num_outputs):
-        super().__init__()
-
-        self.layers = torch.nn.Sequential(
-            # 1st hidden layer
-            torch.nn.Linear(num_inputs, 30),
-            torch.nn.ReLU(),
-
-            # 2nd hidden layer
-            torch.nn.Linear(30, 20),
-            torch.nn.ReLU(),
-
-            # output layer
-            torch.nn.Linear(20, num_outputs),
-        )
-
-    def forward(self, x):
-        logits = self.layers(x)
-        return logits
-
-
-def prepare_dataset():
-    X_train = torch.tensor([
-        [-1.2, 3.1],
-        [-0.9, 2.9],
-        [-0.5, 2.6],
-        [2.3, -1.1],
-        [2.7, -1.5]
-    ])
-    y_train = torch.tensor([0, 0, 0, 1, 1])
-
-    X_test = torch.tensor([
-        [-0.8, 2.8],
-        [2.6, -1.6],
-    ])
-    y_test = torch.tensor([0, 1])
-
-    train_ds = ToyDataset(X_train, y_train)
-    test_ds = ToyDataset(X_test, y_test)
-
-    train_loader = DataLoader(
-        dataset=train_ds,
-        batch_size=2,
-        shuffle=False, # NEW: False because of DistributedSampler below
-        pin_memory=True,
-        drop_last=True,
-        # NEW: chunk batches across GPUs without overlapping samples:
-        sampler=DistributedSampler(train_ds) # NEW
-    )
-    test_loader = DataLoader(
-        dataset=test_ds,
-        batch_size=2,
-        shuffle=False,
-    )
-    return train_loader, test_loader
-
-
-# NEW: wrapper
-def main(rank, world_size, num_epochs):
-
-    ddp_setup(rank, world_size) # NEW: initialize process groups
-
-    train_loader, test_loader = prepare_dataset()
-    model = NeuralNetwork(num_inputs=2, num_outputs=2)
-    model.to(rank)
-    optimizer = torch.optim.SGD(model.parameters(), lr=0.5)
-
-    model = DDP(model, device_ids=[rank]) # NEW: wrap model with DDP
-    # the core model is now accessible as model.module
-    
-    for epoch in range(num_epochs):
-    
-        model.train()
-        for features, labels in enumerate(train_loader):
-    
-            features, labels = features.to(rank), labels.to(rank) # New: use rank
-            logits = model(features)
-            loss = F.cross_entropy(logits, labels) # Loss function
-    
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-    
-            ### LOGGING
-            print(f"[GPU{rank}] Epoch: {epoch+1:03d}/{num_epochs:03d}"
-                  f" | Batchsize {labels.shape[0]:03d}"
-                  f" | Train/Val Loss: {loss:.2f}")
-    
-    model.eval()
-    train_acc = compute_accuracy(model, train_loader, device=rank)
-    print(f"[GPU{rank}] Training accuracy", train_acc)
-    test_acc = compute_accuracy(model, test_loader, device=rank)
-    print(f"[GPU{rank}] Test accuracy", test_acc)
-
-    destroy_process_group() # NEW: cleanly exit distributed mode
-
-
-def compute_accuracy(model, dataloader, device):
-    model = model.eval()
-    correct = 0.0
-    total_examples = 0
-
-    for idx, (features, labels) in enumerate(dataloader):
-        features, labels = features.to(device), labels.to(device)
-
-        with torch.no_grad():
-            logits = model(features)
-        predictions = torch.argmax(logits, dim=1)
-        compare = labels == predictions
-        correct += torch.sum(compare)
-        total_examples += len(compare)
-    return (correct / total_examples).item()
-
-
-if __name__ == "__main__":
-    print("PyTorch version:", torch.__version__)
-    print("CUDA available:", torch.cuda.is_available())
-    print("Number of GPUs available:", torch.cuda.device_count())
-
-    torch.manual_seed(123)
-
-    # NEW: spawn new processes
-    # note that spawn will automatically pass the rank
-    num_epochs = 3
-    world_size = torch.cuda.device_count()
-    mp.spawn(main, args=(world_size, num_epochs), nprocs=world_size)
-    # nprocs=world_size spawns one process per GPU
-

From 9c08e88d13618e038833c789162446a2f798498e Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:23:29 +0800
Subject: [PATCH 2/8] Add files via upload

---
 appendix-A/03_main-chapter-code/DDP-script.py | 179 ++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 appendix-A/03_main-chapter-code/DDP-script.py

diff --git a/appendix-A/03_main-chapter-code/DDP-script.py b/appendix-A/03_main-chapter-code/DDP-script.py
new file mode 100644
index 0000000..fdcf4df
--- /dev/null
+++ b/appendix-A/03_main-chapter-code/DDP-script.py
@@ -0,0 +1,179 @@
+# 附录A ：PyTorch的介绍（第三部分）
+
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset, DataLoader
+
+# 导入新的库
+import os
+import torch.multiprocessing as mp
+from torch.utils.data.distributed import DistributedSampler
+from torch.nn.parallel import DistributedDataParallel as DDP
+from torch.distributed import init_process_group, destroy_process_group
+
+
+# 创建一个新的函数用于初始化一个分布式进程（每个GPU一个进程）
+# 该函数允许进程之间的通信
+def ddp_setup(rank, world_size):
+    """
+    提示：
+        rank:特定的进程编号（进程ID)
+        world_size:组内的进程总数
+    """
+    
+    # 正在运行的机器编号 ID：进程0
+    # 这里的前提是假设所有的GPU在同一台机器上
+    os.environ["MASTER_ADDR"] = "localhost"
+    # 机器上任意的空闲端口号
+    os.environ["MASTER_PORT"] = "12345"
+
+    # 初始化进程
+    # Windows 用户使用"gloo"来替代下面代码中的"nccl"
+    # nccl: NVIDIA Collective Communication Library
+    init_process_group(backend="nccl", rank=rank, world_size=world_size)
+    torch.cuda.set_device(rank)
+
+
+class ToyDataset(Dataset):
+    def __init__(self, X, y):
+        self.features = X
+        self.labels = y
+
+    def __getitem__(self, index):
+        one_x = self.features[index]
+        one_y = self.labels[index]
+        return one_x, one_y
+
+    def __len__(self):
+        return self.labels.shape[0]
+
+
+class NeuralNetwork(torch.nn.Module):
+    def __init__(self, num_inputs, num_outputs):
+        super().__init__()
+
+        self.layers = torch.nn.Sequential(
+            # 第一个隐藏层
+            torch.nn.Linear(num_inputs, 30),
+            torch.nn.ReLU(),
+
+            # 第二个隐藏层
+            torch.nn.Linear(30, 20),
+            torch.nn.ReLU(),
+
+            # 输出层
+            torch.nn.Linear(20, num_outputs),
+        )
+
+    def forward(self, x):
+        logits = self.layers(x)
+        return logits
+
+
+def prepare_dataset():
+    X_train = torch.tensor([
+        [-1.2, 3.1],
+        [-0.9, 2.9],
+        [-0.5, 2.6],
+        [2.3, -1.1],
+        [2.7, -1.5]
+    ])
+    y_train = torch.tensor([0, 0, 0, 1, 1])
+
+    X_test = torch.tensor([
+        [-0.8, 2.8],
+        [2.6, -1.6],
+    ])
+    y_test = torch.tensor([0, 1])
+
+    train_ds = ToyDataset(X_train, y_train)
+    test_ds = ToyDataset(X_test, y_test)
+
+    train_loader = DataLoader(
+        dataset=train_ds,
+        batch_size=2,
+        shuffle=False, # 这里设置为False 
+        pin_memory=True,
+        drop_last=True,
+        # 在多个GPU上划分批次，确保批次之间不重叠样本
+        sampler=DistributedSampler(train_ds) 
+    )
+    test_loader = DataLoader(
+        dataset=test_ds,
+        batch_size=2,
+        shuffle=False,
+    )
+    return train_loader, test_loader
+
+
+# 包装器
+def main(rank, world_size, num_epochs):
+
+    ddp_setup(rank, world_size) # 
+
+    train_loader, test_loader = prepare_dataset()
+    model = NeuralNetwork(num_inputs=2, num_outputs=2)
+    model.to(rank)
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.5)
+
+    model = DDP(model, device_ids=[rank]) # 使用分布式数据并行（DDP）将模型进行包装
+    # 现在核心模型可以通过 model.module 访问
+    
+    for epoch in range(num_epochs):
+    
+        model.train()
+        for features, labels in enumerate(train_loader):
+    
+            features, labels = features.to(rank), labels.to(rank) 
+            logits = model(features)
+            loss = F.cross_entropy(logits, labels) # 损失函数
+    
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+    
+            ### 日志
+            print(f"[GPU{rank}] Epoch: {epoch+1:03d}/{num_epochs:03d}"
+                  f" | Batchsize {labels.shape[0]:03d}"
+                  f" | Train/Val Loss: {loss:.2f}")
+    
+    model.eval()
+    train_acc = compute_accuracy(model, train_loader, device=rank)
+    print(f"[GPU{rank}] Training accuracy", train_acc)
+    test_acc = compute_accuracy(model, test_loader, device=rank)
+    print(f"[GPU{rank}] Test accuracy", test_acc)
+
+    destroy_process_group() # 清理退出分布式模式
+
+
+def compute_accuracy(model, dataloader, device):
+    model = model.eval()
+    correct = 0.0
+    total_examples = 0
+
+    for idx, (features, labels) in enumerate(dataloader):
+        features, labels = features.to(device), labels.to(device)
+
+        with torch.no_grad():
+            logits = model(features)
+        predictions = torch.argmax(logits, dim=1)
+        compare = labels == predictions
+        correct += torch.sum(compare)
+        total_examples += len(compare)
+    return (correct / total_examples).item()
+
+
+if __name__ == "__main__":
+    print("PyTorch version:", torch.__version__)
+    print("CUDA available:", torch.cuda.is_available())
+    print("Number of GPUs available:", torch.cuda.device_count())
+
+    torch.manual_seed(123)
+
+    # 新建进程
+    # 请注意，spawn会自动传递排名
+    num_epochs = 3
+    world_size = torch.cuda.device_count()
+    mp.spawn(main, args=(world_size, num_epochs), nprocs=world_size)
+    # nprocs=world_size 会为每个GPU生成一个进程
+

From e788c49ea082e7957151b3c03d93c8c45720f4d1 Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:24:08 +0800
Subject: [PATCH 3/8] Delete appendix-A/03_main-chapter-code/code-part1.ipynb

---
 .../03_main-chapter-code/code-part1.ipynb     | 1251 -----------------
 1 file changed, 1251 deletions(-)
 delete mode 100644 appendix-A/03_main-chapter-code/code-part1.ipynb

diff --git a/appendix-A/03_main-chapter-code/code-part1.ipynb b/appendix-A/03_main-chapter-code/code-part1.ipynb
deleted file mode 100644
index 71490fa..0000000
--- a/appendix-A/03_main-chapter-code/code-part1.ipynb
+++ /dev/null
@@ -1,1251 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "ca7fc8a0-280c-4979-b0c7-fc3a99b3b785",
-   "metadata": {},
-   "source": [
-    "# Appendix A: Introduction to PyTorch (Part 1)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f5bf13d2-8fc2-483e-88cc-6b4310221e68",
-   "metadata": {},
-   "source": [
-    "## A.1 What is PyTorch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "96ee5660-5327-48e2-9104-a882b3b2afa4",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.0.1\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "\n",
-    "print(torch.__version__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "f73ad4e4-7ec6-4467-a9e9-0cdf6d195264",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "False\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(torch.cuda.is_available())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2100cf2e-7459-4ab3-92a8-43e86ab35a9b",
-   "metadata": {},
-   "source": [
-    "## A.2 Understanding tensors"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "26d7f785-e048-42bc-9182-a556af6bb7f4",
-   "metadata": {},
-   "source": [
-    "### A.2.1 Scalars, vectors, matrices, and tensors"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "a3a464d6-cec8-4363-87bd-ea4f900baced",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "import numpy as np\n",
-    "\n",
-    "# create a 0D tensor (scalar) from a Python integer\n",
-    "tensor0d = torch.tensor(1)\n",
-    "\n",
-    "# create a 1D tensor (vector) from a Python list\n",
-    "tensor1d = torch.tensor([1, 2, 3])\n",
-    "\n",
-    "# create a 2D tensor from a nested Python list\n",
-    "tensor2d = torch.tensor([[1, 2], [3, 4]])\n",
-    "\n",
-    "# create a 3D tensor from a nested Python list\n",
-    "tensor3d_1 = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
-    "\n",
-    "# create a 3D tensor from NumPy array\n",
-    "ary3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
-    "tensor3d_2 = torch.tensor(ary3d)  # Copies NumPy array\n",
-    "tensor3d_3 = torch.from_numpy(ary3d)  # Shares memory with NumPy array"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "dbe14c47-499a-4d48-b354-a0e6fd957872",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[[1, 2],\n",
-      "         [3, 4]],\n",
-      "\n",
-      "        [[5, 6],\n",
-      "         [7, 8]]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "ary3d[0, 0, 0] = 999\n",
-    "print(tensor3d_2) # remains unchanged"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "e3e4c23a-cdba-46f5-a2dc-5fb32bf9117b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[[999,   2],\n",
-      "         [  3,   4]],\n",
-      "\n",
-      "        [[  5,   6],\n",
-      "         [  7,   8]]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(tensor3d_3) # changes because of memory sharing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "63dec48d-2b60-41a2-ac06-fef7e718605a",
-   "metadata": {},
-   "source": [
-    "### A.2.2 Tensor data types"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "3f48c014-e1a2-4a53-b5c5-125812d4034c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.int64\n"
-     ]
-    }
-   ],
-   "source": [
-    "tensor1d = torch.tensor([1, 2, 3])\n",
-    "print(tensor1d.dtype)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "5429a086-9de2-4ac7-9f14-d087a7507394",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.float32\n"
-     ]
-    }
-   ],
-   "source": [
-    "floatvec = torch.tensor([1.0, 2.0, 3.0])\n",
-    "print(floatvec.dtype)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "a9a438d1-49bb-481c-8442-7cc2bb3dd4af",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.float32\n"
-     ]
-    }
-   ],
-   "source": [
-    "floatvec = tensor1d.to(torch.float32)\n",
-    "print(floatvec.dtype)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2020deb5-aa02-4524-b311-c010f4ad27ff",
-   "metadata": {},
-   "source": [
-    "### A.2.3 Common PyTorch tensor operations"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "c02095f2-8a48-4953-b3c9-5313d4362ce7",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[1, 2, 3],\n",
-       "        [4, 5, 6]])"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor2d = torch.tensor([[1, 2, 3], [4, 5, 6]])\n",
-    "tensor2d"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "f33e1d45-5b2c-4afe-b4b2-66ac4099fd1a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "torch.Size([2, 3])"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor2d.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "f3a4129d-f870-4e03-9c32-cd8521cb83fe",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[1, 2],\n",
-       "        [3, 4],\n",
-       "        [5, 6]])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor2d.reshape(3, 2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "589ac0a7-adc7-41f3-b721-155f580e9369",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[1, 2],\n",
-       "        [3, 4],\n",
-       "        [5, 6]])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor2d.view(3, 2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "344e307f-ba5d-4f9a-a791-2c75a3d1417e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[1, 4],\n",
-       "        [2, 5],\n",
-       "        [3, 6]])"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor2d.T"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "19a75030-6a41-4ca8-9aae-c507ae79225c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[14, 32],\n",
-       "        [32, 77]])"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor2d.matmul(tensor2d.T)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "e7c950bc-d640-4203-b210-3ac8932fe4d4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([[14, 32],\n",
-       "        [32, 77]])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor2d @ tensor2d.T"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4c15bdeb-78e2-4870-8a4f-a9f591666f38",
-   "metadata": {},
-   "source": [
-    "## A.3 Seeing models as computation graphs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "22af61e9-0443-4705-94d7-24c21add09c7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor(0.0852)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch.nn.functional as F\n",
-    "\n",
-    "y = torch.tensor([1.0])  # true label\n",
-    "x1 = torch.tensor([1.1]) # input feature\n",
-    "w1 = torch.tensor([2.2]) # weight parameter\n",
-    "b = torch.tensor([0.0])  # bias unit\n",
-    "\n",
-    "z = x1 * w1 + b          # net input\n",
-    "a = torch.sigmoid(z)     # activation & output\n",
-    "\n",
-    "loss = F.binary_cross_entropy(a, y)\n",
-    "print(loss)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f9424f26-2bac-47e7-b834-92ece802247c",
-   "metadata": {},
-   "source": [
-    "## A.4 Automatic differentiation made easy"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "ebf5cef7-48d6-4d2a-8ab0-0fb10bdd7d1a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(tensor([-0.0898]),)\n",
-      "(tensor([-0.0817]),)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch.nn.functional as F\n",
-    "from torch.autograd import grad\n",
-    "\n",
-    "y = torch.tensor([1.0])\n",
-    "x1 = torch.tensor([1.1])\n",
-    "w1 = torch.tensor([2.2], requires_grad=True)\n",
-    "b = torch.tensor([0.0], requires_grad=True)\n",
-    "\n",
-    "z = x1 * w1 + b \n",
-    "a = torch.sigmoid(z)\n",
-    "\n",
-    "loss = F.binary_cross_entropy(a, y)\n",
-    "\n",
-    "grad_L_w1 = grad(loss, w1, retain_graph=True)\n",
-    "grad_L_b = grad(loss, b, retain_graph=True)\n",
-    "\n",
-    "print(grad_L_w1)\n",
-    "print(grad_L_b)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "93c5875d-f6b2-492c-b5ef-7e132f93a4e0",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([-0.0898])\n",
-      "tensor([-0.0817])\n"
-     ]
-    }
-   ],
-   "source": [
-    "loss.backward()\n",
-    "\n",
-    "print(w1.grad)\n",
-    "print(b.grad)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f53bdd7d-44e6-40ab-8a5a-4eef74ef35dc",
-   "metadata": {},
-   "source": [
-    "## A.5 Implementing multilayer neural networks"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "84b749e1-7768-4cfe-94d6-a08c7feff4a1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class NeuralNetwork(torch.nn.Module):\n",
-    "    def __init__(self, num_inputs, num_outputs):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.layers = torch.nn.Sequential(\n",
-    "                \n",
-    "            # 1st hidden layer\n",
-    "            torch.nn.Linear(num_inputs, 30),\n",
-    "            torch.nn.ReLU(),\n",
-    "\n",
-    "            # 2nd hidden layer\n",
-    "            torch.nn.Linear(30, 20),\n",
-    "            torch.nn.ReLU(),\n",
-    "\n",
-    "            # output layer\n",
-    "            torch.nn.Linear(20, num_outputs),\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        logits = self.layers(x)\n",
-    "        return logits"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "c5b59e2e-1930-456d-93b9-f69263e3adbe",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "model = NeuralNetwork(50, 3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "39d02a21-33e7-4879-8fd2-d6309faf2f8d",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "NeuralNetwork(\n",
-      "  (layers): Sequential(\n",
-      "    (0): Linear(in_features=50, out_features=30, bias=True)\n",
-      "    (1): ReLU()\n",
-      "    (2): Linear(in_features=30, out_features=20, bias=True)\n",
-      "    (3): ReLU()\n",
-      "    (4): Linear(in_features=20, out_features=3, bias=True)\n",
-      "  )\n",
-      ")\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "94535738-de02-4c2a-9b44-1cd186fa990a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total number of trainable model parameters: 2213\n"
-     ]
-    }
-   ],
-   "source": [
-    "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
-    "print(\"Total number of trainable model parameters:\", num_params)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "2c394106-ad71-4ccb-a3c9-9b60af3fa748",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Parameter containing:\n",
-      "tensor([[-0.0064,  0.0004, -0.0903,  ..., -0.1316,  0.0910,  0.0363],\n",
-      "        [ 0.1354,  0.1124, -0.0476,  ...,  0.0578,  0.1014,  0.0008],\n",
-      "        [ 0.0975, -0.0478,  0.0298,  ...,  0.0416,  0.0849,  0.1314],\n",
-      "        ...,\n",
-      "        [ 0.0118,  0.0240,  0.0420,  ..., -0.1305, -0.0517, -0.0826],\n",
-      "        [-0.0323,  0.1073,  0.0215,  ..., -0.1264, -0.1100,  0.1232],\n",
-      "        [ 0.0861,  0.0403, -0.0545,  ...,  0.1352,  0.0817, -0.0938]],\n",
-      "       requires_grad=True)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(model.layers[0].weight)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "b201882b-9285-4db9-bb63-43afe6a2ff9e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Parameter containing:\n",
-      "tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],\n",
-      "        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],\n",
-      "        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],\n",
-      "        ...,\n",
-      "        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],\n",
-      "        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],\n",
-      "        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],\n",
-      "       requires_grad=True)\n"
-     ]
-    }
-   ],
-   "source": [
-    "torch.manual_seed(123)\n",
-    "\n",
-    "model = NeuralNetwork(50, 3)\n",
-    "print(model.layers[0].weight)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "id": "1da9a35e-44f3-460c-90fe-304519736fd6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([30, 50])\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(model.layers[0].weight.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "id": "57eadbae-90fe-43a3-a33f-c23a095ba42a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)\n"
-     ]
-    }
-   ],
-   "source": [
-    "torch.manual_seed(123)\n",
-    "\n",
-    "X = torch.rand((1, 50))\n",
-    "out = model(X)\n",
-    "print(out)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "48d720cb-ef73-4b7b-92e0-8198a072defd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[-0.1262,  0.1080, -0.1792]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "with torch.no_grad():\n",
-    "    out = model(X)\n",
-    "print(out)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "10df3640-83c3-4061-a74d-08f07a5cc6ac",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[0.3113, 0.3934, 0.2952]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "with torch.no_grad():\n",
-    "    out = torch.softmax(model(X), dim=1)\n",
-    "print(out)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "19858180-0f26-43a8-b2c3-7ed40abf9f85",
-   "metadata": {},
-   "source": [
-    "## A.6 Setting up efficient data loaders"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "b9dc2745-8be8-4344-80ef-325f02cda7b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_train = torch.tensor([\n",
-    "    [-1.2, 3.1],\n",
-    "    [-0.9, 2.9],\n",
-    "    [-0.5, 2.6],\n",
-    "    [2.3, -1.1],\n",
-    "    [2.7, -1.5]\n",
-    "])\n",
-    "\n",
-    "y_train = torch.tensor([0, 0, 0, 1, 1])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "88283948-5fca-461a-98a1-788b6be191d5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X_test = torch.tensor([\n",
-    "    [-0.8, 2.8],\n",
-    "    [2.6, -1.6],\n",
-    "])\n",
-    "\n",
-    "y_test = torch.tensor([0, 1])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "edf323e2-1789-41a0-8e44-f3cab16e5f5d",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch.utils.data import Dataset\n",
-    "\n",
-    "\n",
-    "class ToyDataset(Dataset):\n",
-    "    def __init__(self, X, y):\n",
-    "        self.features = X\n",
-    "        self.labels = y\n",
-    "\n",
-    "    def __getitem__(self, index):\n",
-    "        one_x = self.features[index]\n",
-    "        one_y = self.labels[index]        \n",
-    "        return one_x, one_y\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        return self.labels.shape[0]\n",
-    "\n",
-    "train_ds = ToyDataset(X_train, y_train)\n",
-    "test_ds = ToyDataset(X_test, y_test)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "b7014705-1fdc-4f72-b892-d8db8bebc331",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "5"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "len(train_ds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "3ec6627a-4c3f-481a-b794-d2131be95eaf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from torch.utils.data import DataLoader\n",
-    "\n",
-    "torch.manual_seed(123)\n",
-    "\n",
-    "train_loader = DataLoader(\n",
-    "    dataset=train_ds,\n",
-    "    batch_size=2,\n",
-    "    shuffle=True,\n",
-    "    num_workers=0\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "8c9446de-5e4b-44fa-bf9a-a63e2661027e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_ds = ToyDataset(X_test, y_test)\n",
-    "\n",
-    "test_loader = DataLoader(\n",
-    "    dataset=test_ds,\n",
-    "    batch_size=2,\n",
-    "    shuffle=False,\n",
-    "    num_workers=0\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "99d4404c-9884-419f-979c-f659742d86ef",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Batch 1: tensor([[ 2.3000, -1.1000],\n",
-      "        [-0.9000,  2.9000]]) tensor([1, 0])\n",
-      "Batch 2: tensor([[-1.2000,  3.1000],\n",
-      "        [-0.5000,  2.6000]]) tensor([0, 0])\n",
-      "Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])\n"
-     ]
-    }
-   ],
-   "source": [
-    "for idx, (x, y) in enumerate(train_loader):\n",
-    "    print(f\"Batch {idx+1}:\", x, y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "id": "9d003f7e-7a80-40bf-a7fb-7a0d7dbba9db",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "train_loader = DataLoader(\n",
-    "    dataset=train_ds,\n",
-    "    batch_size=2,\n",
-    "    shuffle=True,\n",
-    "    num_workers=0,\n",
-    "    drop_last=True\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4db4d7f4-82da-44a4-b94e-ee04665d9c3c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for idx, (x, y) in enumerate(train_loader):\n",
-    "    print(f\"Batch {idx+1}:\", x, y)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d904ca82-e50f-4f3d-a3ac-fc6ca53dd00e",
-   "metadata": {},
-   "source": [
-    "## A.7 A typical training loop"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "93f1791a-d887-4fc5-a307-5e5bde9e06f6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
-      "Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
-      "Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
-      "Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
-      "Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
-      "Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch.nn.functional as F\n",
-    "\n",
-    "\n",
-    "torch.manual_seed(123)\n",
-    "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
-    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
-    "\n",
-    "num_epochs = 3\n",
-    "\n",
-    "for epoch in range(num_epochs):\n",
-    "    \n",
-    "    model.train()\n",
-    "    for batch_idx, (features, labels) in enumerate(train_loader):\n",
-    "\n",
-    "        logits = model(features)\n",
-    "        \n",
-    "        loss = F.cross_entropy(logits, labels) # Loss function\n",
-    "        \n",
-    "        optimizer.zero_grad()\n",
-    "        loss.backward()\n",
-    "        optimizer.step()\n",
-    "    \n",
-    "        ### LOGGING\n",
-    "        print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
-    "              f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
-    "              f\" | Train/Val Loss: {loss:.2f}\")\n",
-    "\n",
-    "    model.eval()\n",
-    "    # Optional model evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "id": "00dcf57f-6a7e-4af7-aa5a-df2cb0866fa5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[ 2.8569, -4.1618],\n",
-      "        [ 2.5382, -3.7548],\n",
-      "        [ 2.0944, -3.1820],\n",
-      "        [-1.4814,  1.4816],\n",
-      "        [-1.7176,  1.7342]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "model.eval()\n",
-    "\n",
-    "with torch.no_grad():\n",
-    "    outputs = model(X_train)\n",
-    "\n",
-    "print(outputs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "id": "19be7390-18b8-43f9-9841-d7fb1919f6fd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[    0.9991,     0.0009],\n",
-      "        [    0.9982,     0.0018],\n",
-      "        [    0.9949,     0.0051],\n",
-      "        [    0.0491,     0.9509],\n",
-      "        [    0.0307,     0.9693]])\n",
-      "tensor([0, 0, 0, 1, 1])\n"
-     ]
-    }
-   ],
-   "source": [
-    "torch.set_printoptions(sci_mode=False)\n",
-    "probas = torch.softmax(outputs, dim=1)\n",
-    "print(probas)\n",
-    "\n",
-    "predictions = torch.argmax(outputs, dim=1)\n",
-    "print(predictions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "id": "07e7e530-f8d3-429c-9f5e-cf8078078c0e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([0, 0, 0, 1, 1])\n"
-     ]
-    }
-   ],
-   "source": [
-    "predictions = torch.argmax(outputs, dim=1)\n",
-    "print(predictions)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "id": "5f756f0d-63c8-41b5-a5d8-01baa847e026",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([True, True, True, True, True])"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "predictions == y_train"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "da274bb0-f11c-4c81-a880-7a031fbf2943",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor(5)"
-      ]
-     },
-     "execution_count": 43,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "torch.sum(predictions == y_train)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "16d62314-8dee-45b0-8f55-9e5aae2b24f4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def compute_accuracy(model, dataloader):\n",
-    "\n",
-    "    model = model.eval()\n",
-    "    correct = 0.0\n",
-    "    total_examples = 0\n",
-    "    \n",
-    "    for idx, (features, labels) in enumerate(dataloader):\n",
-    "        \n",
-    "        with torch.no_grad():\n",
-    "            logits = model(features)\n",
-    "        \n",
-    "        predictions = torch.argmax(logits, dim=1)\n",
-    "        compare = labels == predictions\n",
-    "        correct += torch.sum(compare)\n",
-    "        total_examples += len(compare)\n",
-    "\n",
-    "    return (correct / total_examples).item()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 45,
-   "id": "4f6c9c17-2a5f-46c0-804b-873f169b729a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.0"
-      ]
-     },
-     "execution_count": 45,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "compute_accuracy(model, train_loader)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "311ed864-e21e-4aac-97c7-c6086caef27a",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.0"
-      ]
-     },
-     "execution_count": 46,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "compute_accuracy(model, test_loader)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "4d5cd469-3a45-4394-944b-3ce543f41dac",
-   "metadata": {},
-   "source": [
-    "## A.8 Saving and loading models"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "b013127d-a2c3-4b04-9fb3-a6a7c88d83c5",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "torch.save(model.state_dict(), \"model.pth\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "b2b428c2-3a44-4d91-97c4-8298cf2b51eb",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<All keys matched successfully>"
-      ]
-     },
-     "execution_count": 48,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "model = NeuralNetwork(2, 2) # needs to match the original  model exactly\n",
-    "model.load_state_dict(torch.load(\"model.pth\"))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "f891c013-43da-4a05-973d-997be313d2d8",
-   "metadata": {},
-   "source": [
-    "## A.9 Optimizing training performance with GPUs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e68ae888-cabf-49c9-bad6-ecdce774db57",
-   "metadata": {},
-   "source": [
-    "### A.9.1 PyTorch computations on GPU devices"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "141c845f-efe3-4614-b376-b8b7a9a2c887",
-   "metadata": {},
-   "source": [
-    "See [code-part2.ipynb](code-part2.ipynb)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "99811829-b817-42ea-b03e-d35374debcc0",
-   "metadata": {},
-   "source": [
-    "### A.9.2 Single-GPU training"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0b21456c-4af7-440f-9e78-37770277b5bc",
-   "metadata": {},
-   "source": [
-    "See [code-part2.ipynb](code-part2.ipynb)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "db6eb2d1-a341-4489-b04b-635c26945333",
-   "metadata": {},
-   "source": [
-    "### A.9.3 Training with multiple GPUs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9d049a81-5fb0-49b5-9d6a-17a9976d8520",
-   "metadata": {},
-   "source": [
-    "See [DDP-script.py](DDP-script.py)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b32db94f-f159-4aa3-91cc-5b937eef7fc7",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

From 9ff373bc350c6dfd8f0022ca55014db8976fd91e Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:24:18 +0800
Subject: [PATCH 4/8] Delete appendix-A/03_main-chapter-code/code-part2.ipynb

---
 .../03_main-chapter-code/code-part2.ipynb     | 452 ------------------
 1 file changed, 452 deletions(-)
 delete mode 100644 appendix-A/03_main-chapter-code/code-part2.ipynb

diff --git a/appendix-A/03_main-chapter-code/code-part2.ipynb b/appendix-A/03_main-chapter-code/code-part2.ipynb
deleted file mode 100644
index 8a11b20..0000000
--- a/appendix-A/03_main-chapter-code/code-part2.ipynb
+++ /dev/null
@@ -1,452 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "O9i6kzBsZVaZ"
-   },
-   "source": [
-    "# Appendix A: Introduction to PyTorch (Part 2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ppbG5d-NZezH"
-   },
-   "source": [
-    "## A.9 Optimizing training performance with GPUs"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "6jH0J_DPZhbn"
-   },
-   "source": [
-    "### A.9.1 PyTorch computations on GPU devices"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "RM7kGhwMF_nO",
-    "outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.0.1+cu118\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "\n",
-    "print(torch.__version__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "OXLCKXhiUkZt",
-    "outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(torch.cuda.is_available())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "MTTlfh53Va-T",
-    "outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "tensor([5., 7., 9.])"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "tensor_1 = torch.tensor([1., 2., 3.])\n",
-    "tensor_2 = torch.tensor([4., 5., 6.])\n",
-    "\n",
-    "print(tensor_1 + tensor_2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "Z4LwTNw7Vmmb",
-    "outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([5., 7., 9.], device='cuda:0')\n"
-     ]
-    }
-   ],
-   "source": [
-    "tensor_1 = tensor_1.to(\"cuda\")\n",
-    "tensor_2 = tensor_2.to(\"cuda\")\n",
-    "\n",
-    "print(tensor_1 + tensor_2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 184
-    },
-    "id": "tKT6URN1Vuft",
-    "outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
-   },
-   "outputs": [
-    {
-     "ename": "RuntimeError",
-     "evalue": "ignored",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
-     ]
-    }
-   ],
-   "source": [
-    "tensor_1 = tensor_1.to(\"cpu\")\n",
-    "print(tensor_1 + tensor_2)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "c8j1cWDcWAMf"
-   },
-   "source": [
-    "## A.9.2 Single-GPU training"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {
-    "id": "GyY59cjieitv"
-   },
-   "outputs": [],
-   "source": [
-    "X_train = torch.tensor([\n",
-    "    [-1.2, 3.1],\n",
-    "    [-0.9, 2.9],\n",
-    "    [-0.5, 2.6],\n",
-    "    [2.3, -1.1],\n",
-    "    [2.7, -1.5]\n",
-    "])\n",
-    "\n",
-    "y_train = torch.tensor([0, 0, 0, 1, 1])\n",
-    "\n",
-    "X_test = torch.tensor([\n",
-    "    [-0.8, 2.8],\n",
-    "    [2.6, -1.6],\n",
-    "])\n",
-    "\n",
-    "y_test = torch.tensor([0, 1])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {
-    "id": "v41gKqEJempa"
-   },
-   "outputs": [],
-   "source": [
-    "from torch.utils.data import Dataset\n",
-    "\n",
-    "\n",
-    "class ToyDataset(Dataset):\n",
-    "    def __init__(self, X, y):\n",
-    "        self.features = X\n",
-    "        self.labels = y\n",
-    "\n",
-    "    def __getitem__(self, index):\n",
-    "        one_x = self.features[index]\n",
-    "        one_y = self.labels[index]\n",
-    "        return one_x, one_y\n",
-    "\n",
-    "    def __len__(self):\n",
-    "        return self.labels.shape[0]\n",
-    "\n",
-    "train_ds = ToyDataset(X_train, y_train)\n",
-    "test_ds = ToyDataset(X_test, y_test)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {
-    "id": "UPGVRuylep8Y"
-   },
-   "outputs": [],
-   "source": [
-    "from torch.utils.data import DataLoader\n",
-    "\n",
-    "torch.manual_seed(123)\n",
-    "\n",
-    "train_loader = DataLoader(\n",
-    "    dataset=train_ds,\n",
-    "    batch_size=2,\n",
-    "    shuffle=True,\n",
-    "    num_workers=1,\n",
-    "    drop_last=True\n",
-    ")\n",
-    "\n",
-    "test_loader = DataLoader(\n",
-    "    dataset=test_ds,\n",
-    "    batch_size=2,\n",
-    "    shuffle=False,\n",
-    "    num_workers=1\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {
-    "id": "drhg6IXofAXh"
-   },
-   "outputs": [],
-   "source": [
-    "class NeuralNetwork(torch.nn.Module):\n",
-    "    def __init__(self, num_inputs, num_outputs):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.layers = torch.nn.Sequential(\n",
-    "\n",
-    "            # 1st hidden layer\n",
-    "            torch.nn.Linear(num_inputs, 30),\n",
-    "            torch.nn.ReLU(),\n",
-    "\n",
-    "            # 2nd hidden layer\n",
-    "            torch.nn.Linear(30, 20),\n",
-    "            torch.nn.ReLU(),\n",
-    "\n",
-    "            # output layer\n",
-    "            torch.nn.Linear(20, num_outputs),\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        logits = self.layers(x)\n",
-    "        return logits"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "7jaS5sqPWCY0",
-    "outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
-      "Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
-      "Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
-      "Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
-      "Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
-      "Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch.nn.functional as F\n",
-    "\n",
-    "\n",
-    "torch.manual_seed(123)\n",
-    "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
-    "\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\") # NEW\n",
-    "model = model.to(device) # NEW\n",
-    "\n",
-    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
-    "\n",
-    "num_epochs = 3\n",
-    "\n",
-    "for epoch in range(num_epochs):\n",
-    "\n",
-    "    model.train()\n",
-    "    for batch_idx, (features, labels) in enumerate(train_loader):\n",
-    "\n",
-    "        features, labels = features.to(device), labels.to(device) # NEW\n",
-    "        logits = model(features)\n",
-    "        loss = F.cross_entropy(logits, labels) # Loss function\n",
-    "\n",
-    "        optimizer.zero_grad()\n",
-    "        loss.backward()\n",
-    "        optimizer.step()\n",
-    "\n",
-    "        ### LOGGING\n",
-    "        print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
-    "              f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
-    "              f\" | Train/Val Loss: {loss:.2f}\")\n",
-    "\n",
-    "    model.eval()\n",
-    "    # Optional model evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "id": "4qrlmnPPe7FO"
-   },
-   "outputs": [],
-   "source": [
-    "def compute_accuracy(model, dataloader, device):\n",
-    "\n",
-    "    model = model.eval()\n",
-    "    correct = 0.0\n",
-    "    total_examples = 0\n",
-    "\n",
-    "    for idx, (features, labels) in enumerate(dataloader):\n",
-    "\n",
-    "        features, labels = features.to(device), labels.to(device) # New\n",
-    "\n",
-    "        with torch.no_grad():\n",
-    "            logits = model(features)\n",
-    "\n",
-    "        predictions = torch.argmax(logits, dim=1)\n",
-    "        compare = labels == predictions\n",
-    "        correct += torch.sum(compare)\n",
-    "        total_examples += len(compare)\n",
-    "\n",
-    "    return (correct / total_examples).item()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "1_-BfkfEf4HX",
-    "outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.0"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "compute_accuracy(model, train_loader, device=device)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "iYtXKBGEgKss",
-    "outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1.0"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "compute_accuracy(model, test_loader, device=device)"
-   ]
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "gpuType": "T4",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From f99f5da0bca994f7cdc44b8527a61908563ef8a8 Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:24:26 +0800
Subject: [PATCH 5/8] Delete
 appendix-A/03_main-chapter-code/exercise-solutions.ipynb

---
 .../exercise-solutions.ipynb                  | 176 ------------------
 1 file changed, 176 deletions(-)
 delete mode 100644 appendix-A/03_main-chapter-code/exercise-solutions.ipynb

diff --git a/appendix-A/03_main-chapter-code/exercise-solutions.ipynb b/appendix-A/03_main-chapter-code/exercise-solutions.ipynb
deleted file mode 100644
index f934b2d..0000000
--- a/appendix-A/03_main-chapter-code/exercise-solutions.ipynb
+++ /dev/null
@@ -1,176 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Exercise A.3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "\n",
-    "class NeuralNetwork(torch.nn.Module):\n",
-    "    def __init__(self, num_inputs, num_outputs):\n",
-    "        super().__init__()\n",
-    "\n",
-    "        self.layers = torch.nn.Sequential(\n",
-    "                \n",
-    "            # 1st hidden layer\n",
-    "            torch.nn.Linear(num_inputs, 30),\n",
-    "            torch.nn.ReLU(),\n",
-    "\n",
-    "            # 2nd hidden layer\n",
-    "            torch.nn.Linear(30, 20),\n",
-    "            torch.nn.ReLU(),\n",
-    "\n",
-    "            # output layer\n",
-    "            torch.nn.Linear(20, num_outputs),\n",
-    "        )\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        logits = self.layers(x)\n",
-    "        return logits"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Total number of trainable model parameters: 752\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = NeuralNetwork(2, 2)\n",
-    "\n",
-    "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
-    "print(\"Total number of trainable model parameters:\", num_params)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Exercise A.4"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "id": "qGgnamiyLJxp"
-   },
-   "outputs": [],
-   "source": [
-    "import torch\n",
-    "\n",
-    "a = torch.rand(100, 200)\n",
-    "b = torch.rand(200, 300)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "CvGvIeVkLzXE",
-    "outputId": "44d027be-0787-4348-9c06-4e559d94d0e1"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "63.8 µs ± 8.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%timeit a @ b"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "id": "OmRtZLa9L2ZG"
-   },
-   "outputs": [],
-   "source": [
-    "a, b = a.to(\"cuda\"), b.to(\"cuda\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "duLEhXDPL6k0",
-    "outputId": "3486471d-fd62-446f-9855-2d01f41fd101"
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "13.8 µs ± 425 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
-     ]
-    }
-   ],
-   "source": [
-    "%timeit a @ b"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "Zqqa-To2L749"
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "gpuType": "V100",
-   "machine_shape": "hm",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From 279931120faf0bc747f634384119fbb55afb10b4 Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:24:41 +0800
Subject: [PATCH 6/8] Add files via upload

---
 .../03_main-chapter-code/code-part1.ipynb     | 1301 +++++++++++++++++
 1 file changed, 1301 insertions(+)
 create mode 100644 appendix-A/03_main-chapter-code/code-part1.ipynb

diff --git a/appendix-A/03_main-chapter-code/code-part1.ipynb b/appendix-A/03_main-chapter-code/code-part1.ipynb
new file mode 100644
index 0000000..435909e
--- /dev/null
+++ b/appendix-A/03_main-chapter-code/code-part1.ipynb
@@ -0,0 +1,1301 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ca7fc8a0-280c-4979-b0c7-fc3a99b3b785",
+   "metadata": {},
+   "source": [
+    "# 附件A：PyTorch的介绍（第一部分）"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f5bf13d2-8fc2-483e-88cc-6b4310221e68",
+   "metadata": {},
+   "source": [
+    "## A.1 什么是PyTorch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96ee5660-5327-48e2-9104-a882b3b2afa4",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.0.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "# 显示PyTorch的版本\n",
+    "print(torch.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f73ad4e4-7ec6-4467-a9e9-0cdf6d195264",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "False\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 显示PyTorch是否是GPU版本，False表示CPU版本，True表示GPU版本\n",
+    "print(torch.cuda.is_available())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2100cf2e-7459-4ab3-92a8-43e86ab35a9b",
+   "metadata": {},
+   "source": [
+    "## A.2 向量"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26d7f785-e048-42bc-9182-a556af6bb7f4",
+   "metadata": {},
+   "source": [
+    "### A.2.1 标量、向量、矩阵和张量\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a3a464d6-cec8-4363-87bd-ea4f900baced",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import numpy as np\n",
+    "\n",
+    "# 用Python整数创建一个0维张量\n",
+    "tensor0d = torch.tensor(1)\n",
+    "\n",
+    "# 用Python列表创建一个1维张量（向量）\n",
+    "tensor1d = torch.tensor([1, 2, 3])\n",
+    "\n",
+    "# 用Python列表创建一个2维张量（向量）\n",
+    "tensor2d = torch.tensor([[1, 2], [3, 4]])\n",
+    "\n",
+    "# 用嵌套的Python列表创建一个3维张量\n",
+    "tensor3d_1 = torch.tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
+    "\n",
+    "# 从NumPy数组创建一个3维张量\n",
+    "ary3d = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])\n",
+    "tensor3d_2 = torch.tensor(ary3d)  # 复制NumPy数组\n",
+    "tensor3d_3 = torch.from_numpy(ary3d)  # 与NumPy数组共享内存"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "dbe14c47-499a-4d48-b354-a0e6fd957872",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[1, 2],\n",
+      "         [3, 4]],\n",
+      "\n",
+      "        [[5, 6],\n",
+      "         [7, 8]]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "ary3d[0, 0, 0] = 999\n",
+    "print(tensor3d_2) # 保持不变"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "e3e4c23a-cdba-46f5-a2dc-5fb32bf9117b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[[999,   2],\n",
+      "         [  3,   4]],\n",
+      "\n",
+      "        [[  5,   6],\n",
+      "         [  7,   8]]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tensor3d_3) # 由于内存共享需要改变"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "63dec48d-2b60-41a2-ac06-fef7e718605a",
+   "metadata": {},
+   "source": [
+    "### A.2.2 向量的数据类型"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "3f48c014-e1a2-4a53-b5c5-125812d4034c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "tensor1d = torch.tensor([1, 2, 3])\n",
+    "print(tensor1d.dtype)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5429a086-9de2-4ac7-9f14-d087a7507394",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.float32\n"
+     ]
+    }
+   ],
+   "source": [
+    "floatvec = torch.tensor([1.0, 2.0, 3.0])\n",
+    "print(floatvec.dtype)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a9a438d1-49bb-481c-8442-7cc2bb3dd4af",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.float32\n"
+     ]
+    }
+   ],
+   "source": [
+    "floatvec = tensor1d.to(torch.float32)\n",
+    "print(floatvec.dtype)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2020deb5-aa02-4524-b311-c010f4ad27ff",
+   "metadata": {},
+   "source": [
+    "### A.2.3 PyTorch中常见的张量操作"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c02095f2-8a48-4953-b3c9-5313d4362ce7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[1, 2, 3],\n",
+       "        [4, 5, 6]])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor2d = torch.tensor([[1, 2, 3], [4, 5, 6]])\n",
+    "tensor2d"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "f33e1d45-5b2c-4afe-b4b2-66ac4099fd1a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2, 3])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor2d.shape # 张量形状"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "f3a4129d-f870-4e03-9c32-cd8521cb83fe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[1, 2],\n",
+       "        [3, 4],\n",
+       "        [5, 6]])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor2d.reshape(3, 2) # 修改形状"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "589ac0a7-adc7-41f3-b721-155f580e9369",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[1, 2],\n",
+       "        [3, 4],\n",
+       "        [5, 6]])"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor2d.view(3, 2) # 查看张量"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "344e307f-ba5d-4f9a-a791-2c75a3d1417e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[1, 4],\n",
+       "        [2, 5],\n",
+       "        [3, 6]])"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor2d.T # 转置张量"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "19a75030-6a41-4ca8-9aae-c507ae79225c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[14, 32],\n",
+       "        [32, 77]])"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor2d.matmul(tensor2d.T) # 张量乘法：tensor2d与其转置相乘"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "e7c950bc-d640-4203-b210-3ac8932fe4d4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[14, 32],\n",
+       "        [32, 77]])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor2d @ tensor2d.T # 张量乘法的另一种实现方式：tensor2d与其转置相乘"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4c15bdeb-78e2-4870-8a4f-a9f591666f38",
+   "metadata": {},
+   "source": [
+    "## A.3 把模型作为计算图"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "22af61e9-0443-4705-94d7-24c21add09c7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor(0.0852)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch.nn.functional as F\n",
+    "\n",
+    "y = torch.tensor([1.0])  # 真实样本\n",
+    "x1 = torch.tensor([1.1]) # 输入特征\n",
+    "w1 = torch.tensor([2.2]) # 权重变量\n",
+    "b = torch.tensor([0.0])  # 偏置单元\n",
+    "\n",
+    "z = x1 * w1 + b          # 网络输入\n",
+    "a = torch.sigmoid(z)     # 激活函数 & 输出\n",
+    "\n",
+    "loss = F.binary_cross_entropy(a, y)\n",
+    "print(loss)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f9424f26-2bac-47e7-b834-92ece802247c",
+   "metadata": {},
+   "source": [
+    "## A.4 自动求导"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "ebf5cef7-48d6-4d2a-8ab0-0fb10bdd7d1a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(tensor([-0.0898]),)\n",
+      "(tensor([-0.0817]),)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch.nn.functional as F\n",
+    "from torch.autograd import grad\n",
+    "\n",
+    "y = torch.tensor([1.0])\n",
+    "x1 = torch.tensor([1.1])\n",
+    "w1 = torch.tensor([2.2], requires_grad=True)\n",
+    "b = torch.tensor([0.0], requires_grad=True)\n",
+    "\n",
+    "z = x1 * w1 + b \n",
+    "a = torch.sigmoid(z)\n",
+    "\n",
+    "loss = F.binary_cross_entropy(a, y)\n",
+    "\n",
+    "grad_L_w1 = grad(loss, w1, retain_graph=True)\n",
+    "grad_L_b = grad(loss, b, retain_graph=True)\n",
+    "\n",
+    "print(grad_L_w1)\n",
+    "print(grad_L_b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "93c5875d-f6b2-492c-b5ef-7e132f93a4e0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([-0.0898])\n",
+      "tensor([-0.0817])\n"
+     ]
+    }
+   ],
+   "source": [
+    "loss.backward()# 反向传播\n",
+    "\n",
+    "print(w1.grad)\n",
+    "print(b.grad)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f53bdd7d-44e6-40ab-8a5a-4eef74ef35dc",
+   "metadata": {},
+   "source": [
+    "## A.5 多层神经网络的实现"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "84b749e1-7768-4cfe-94d6-a08c7feff4a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class NeuralNetwork(torch.nn.Module):\n",
+    "    def __init__(self, num_inputs, num_outputs):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.layers = torch.nn.Sequential(\n",
+    "                \n",
+    "            # 第一个隐藏层\n",
+    "            torch.nn.Linear(num_inputs, 30),\n",
+    "            torch.nn.ReLU(),\n",
+    "\n",
+    "            # 第二个隐藏层\n",
+    "            torch.nn.Linear(30, 20),\n",
+    "            torch.nn.ReLU(),\n",
+    "\n",
+    "            # 输出层\n",
+    "            torch.nn.Linear(20, num_outputs),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        logits = self.layers(x)\n",
+    "        return logits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "c5b59e2e-1930-456d-93b9-f69263e3adbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = NeuralNetwork(50, 3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "39d02a21-33e7-4879-8fd2-d6309faf2f8d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "NeuralNetwork(\n",
+      "  (layers): Sequential(\n",
+      "    (0): Linear(in_features=50, out_features=30, bias=True)\n",
+      "    (1): ReLU()\n",
+      "    (2): Linear(in_features=30, out_features=20, bias=True)\n",
+      "    (3): ReLU()\n",
+      "    (4): Linear(in_features=20, out_features=3, bias=True)\n",
+      "  )\n",
+      ")\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "94535738-de02-4c2a-9b44-1cd186fa990a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total number of trainable model parameters: 2213\n"
+     ]
+    }
+   ],
+   "source": [
+    "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
+    "print(\"Total number of trainable model parameters:\", num_params)# 打印训练模型的参数"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "2c394106-ad71-4ccb-a3c9-9b60af3fa748",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Parameter containing:\n",
+      "tensor([[-0.0064,  0.0004, -0.0903,  ..., -0.1316,  0.0910,  0.0363],\n",
+      "        [ 0.1354,  0.1124, -0.0476,  ...,  0.0578,  0.1014,  0.0008],\n",
+      "        [ 0.0975, -0.0478,  0.0298,  ...,  0.0416,  0.0849,  0.1314],\n",
+      "        ...,\n",
+      "        [ 0.0118,  0.0240,  0.0420,  ..., -0.1305, -0.0517, -0.0826],\n",
+      "        [-0.0323,  0.1073,  0.0215,  ..., -0.1264, -0.1100,  0.1232],\n",
+      "        [ 0.0861,  0.0403, -0.0545,  ...,  0.1352,  0.0817, -0.0938]],\n",
+      "       requires_grad=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(model.layers[0].weight) # 打印神经网络模型的第一层的权重"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "b201882b-9285-4db9-bb63-43afe6a2ff9e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Parameter containing:\n",
+      "tensor([[-0.0577,  0.0047, -0.0702,  ...,  0.0222,  0.1260,  0.0865],\n",
+      "        [ 0.0502,  0.0307,  0.0333,  ...,  0.0951,  0.1134, -0.0297],\n",
+      "        [ 0.1077, -0.1108,  0.0122,  ...,  0.0108, -0.1049, -0.1063],\n",
+      "        ...,\n",
+      "        [-0.0787,  0.1259,  0.0803,  ...,  0.1218,  0.1303, -0.1351],\n",
+      "        [ 0.1359,  0.0175, -0.0673,  ...,  0.0674,  0.0676,  0.1058],\n",
+      "        [ 0.0790,  0.1343, -0.0293,  ...,  0.0344, -0.0971, -0.0509]],\n",
+      "       requires_grad=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 设置随机数种子，以确保可复现性\n",
+    "torch.manual_seed(123)\n",
+    "\n",
+    "# 假设 NeuralNetwork 是一个神经网络类，且其构造函数接受两个参数，分别为输入特征的维度和输出特征的维度\n",
+    "model = NeuralNetwork(50, 3)\n",
+    "\n",
+    "# 打印神经网络模型的第一层的权重\n",
+    "print(model.layers[0].weight)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "1da9a35e-44f3-460c-90fe-304519736fd6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([30, 50])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 打印神经网络模型的第一层权重的形状\n",
+    "print(model.layers[0].weight.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "57eadbae-90fe-43a3-a33f-c23a095ba42a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[-0.1262,  0.1080, -0.1792]], grad_fn=<AddmmBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 设置随机数种子，以确保可复现性\n",
+    "torch.manual_seed(123)\n",
+    "\n",
+    "# 模型输入特征的维度为 50\n",
+    "X = torch.rand((1, 50))\n",
+    "\n",
+    "# 使用模型进行前向传播计算输出\n",
+    "out = model(X)\n",
+    "\n",
+    "# 打印输出结果\n",
+    "print(out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "48d720cb-ef73-4b7b-92e0-8198a072defd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[-0.1262,  0.1080, -0.1792]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 使用 torch.no_grad() 上下文管理器，以便在推断时不计算梯度\n",
+    "with torch.no_grad():\n",
+    "    out = model(X)\n",
+    "print(out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "10df3640-83c3-4061-a74d-08f07a5cc6ac",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[0.3113, 0.3934, 0.2952]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "with torch.no_grad():\n",
+    "    out = torch.softmax(model(X), dim=1)\n",
+    "print(out)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "19858180-0f26-43a8-b2c3-7ed40abf9f85",
+   "metadata": {},
+   "source": [
+    "## A.6 建立高效的数据加载器"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "b9dc2745-8be8-4344-80ef-325f02cda7b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 定义输入特征张量 X_train\n",
+    "X_train = torch.tensor([\n",
+    "    [-1.2, 3.1],\n",
+    "    [-0.9, 2.9],\n",
+    "    [-0.5, 2.6],\n",
+    "    [2.3, -1.1],\n",
+    "    [2.7, -1.5]\n",
+    "])\n",
+    "\n",
+    "# 定义对应的标签张量 y_train\n",
+    "y_train = torch.tensor([0, 0, 0, 1, 1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "88283948-5fca-461a-98a1-788b6be191d5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X_test = torch.tensor([\n",
+    "    [-0.8, 2.8],\n",
+    "    [2.6, -1.6],\n",
+    "])\n",
+    "\n",
+    "y_test = torch.tensor([0, 1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "edf323e2-1789-41a0-8e44-f3cab16e5f5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import Dataset\n",
+    "\n",
+    "\n",
+    "class ToyDataset(Dataset):\n",
+    "    # 初始化 ToyDataset 类\n",
+    "    def __init__(self, X, y):\n",
+    "        self.features = X\n",
+    "        self.labels = y\n",
+    "    #  获取指定索引的数据\n",
+    "    def __getitem__(self, index):\n",
+    "        one_x = self.features[index]\n",
+    "        one_y = self.labels[index]        \n",
+    "        return one_x, one_y\n",
+    "    # 获取数据集的长度\n",
+    "    def __len__(self):\n",
+    "        return self.labels.shape[0]\n",
+    "# 创建训练数据集和测试数据集实例\n",
+    "train_ds = ToyDataset(X_train, y_train)\n",
+    "test_ds = ToyDataset(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "b7014705-1fdc-4f72-b892-d8db8bebc331",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(train_ds)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "3ec6627a-4c3f-481a-b794-d2131be95eaf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "torch.manual_seed(123)\n",
+    "\n",
+    "# 创建训练数据加载器 train_loader\n",
+    "# dataset 参数传入了您定义的 ToyDataset 类的实例 train_ds\n",
+    "# batch_size 参数指定了每个批次包含的样本数量\n",
+    "# shuffle 参数指定是否在每个 epoch 之前对数据进行洗牌\n",
+    "# num_workers 参数指定用于数据加载的子进程数量\n",
+    "train_loader = DataLoader(\n",
+    "    dataset=train_ds,\n",
+    "    batch_size=2,\n",
+    "    shuffle=True,\n",
+    "    num_workers=0\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "8c9446de-5e4b-44fa-bf9a-a63e2661027e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_ds = ToyDataset(X_test, y_test)\n",
+    "\n",
+    "# 创建测试数据加载器 test_loader\n",
+    "# dataset 参数传入了您定义的 ToyDataset 类的实例 test_ds\n",
+    "# batch_size 参数指定了每个批次包含的样本数量\n",
+    "# shuffle 参数指定是否在每个 epoch 之前对数据进行洗牌，这里设为 False 表示不洗牌\n",
+    "# num_workers 参数指定用于数据加载的子进程数量\n",
+    "test_loader = DataLoader(\n",
+    "    dataset=test_ds,\n",
+    "    batch_size=2,\n",
+    "    shuffle=False,\n",
+    "    num_workers=0\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "99d4404c-9884-419f-979c-f659742d86ef",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Batch 1: tensor([[ 2.3000, -1.1000],\n",
+      "        [-0.9000,  2.9000]]) tensor([1, 0])\n",
+      "Batch 2: tensor([[-1.2000,  3.1000],\n",
+      "        [-0.5000,  2.6000]]) tensor([0, 0])\n",
+      "Batch 3: tensor([[ 2.7000, -1.5000]]) tensor([1])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 迭代训练数据加载器 train_loader\n",
+    "for idx, (x, y) in enumerate(train_loader):\n",
+    "    # 打印每个批次的索引、输入特征和对应的标签\n",
+    "    print(f\"Batch {idx+1}:\", x, y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "9d003f7e-7a80-40bf-a7fb-7a0d7dbba9db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_loader = DataLoader(\n",
+    "    dataset=train_ds,\n",
+    "    batch_size=2,\n",
+    "    shuffle=True,\n",
+    "    num_workers=0,\n",
+    "    drop_last=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4db4d7f4-82da-44a4-b94e-ee04665d9c3c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for idx, (x, y) in enumerate(train_loader):\n",
+    "    print(f\"Batch {idx+1}:\", x, y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d904ca82-e50f-4f3d-a3ac-fc6ca53dd00e",
+   "metadata": {},
+   "source": [
+    "## A.7 一个示例训练轮次"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "93f1791a-d887-4fc5-a307-5e5bde9e06f6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
+      "Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
+      "Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
+      "Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
+      "Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
+      "Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch.nn.functional as F\n",
+    "\n",
+    "\n",
+    "torch.manual_seed(123)\n",
+    "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
+    "\n",
+    "num_epochs = 3\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    \n",
+    "    model.train()\n",
+    "    for batch_idx, (features, labels) in enumerate(train_loader):\n",
+    "\n",
+    "        logits = model(features)\n",
+    "        \n",
+    "        loss = F.cross_entropy(logits, labels) # 损失函数\n",
+    "        \n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "    \n",
+    "        ### 日志\n",
+    "        print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
+    "              f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
+    "              f\" | Train/Val Loss: {loss:.2f}\")\n",
+    "\n",
+    "    model.eval()\n",
+    "    # 可选的模型评估指标"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "00dcf57f-6a7e-4af7-aa5a-df2cb0866fa5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[ 2.8569, -4.1618],\n",
+      "        [ 2.5382, -3.7548],\n",
+      "        [ 2.0944, -3.1820],\n",
+      "        [-1.4814,  1.4816],\n",
+      "        [-1.7176,  1.7342]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.eval()\n",
+    "\n",
+    "with torch.no_grad():\n",
+    "    outputs = model(X_train)\n",
+    "\n",
+    "print(outputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "19be7390-18b8-43f9-9841-d7fb1919f6fd",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[    0.9991,     0.0009],\n",
+      "        [    0.9982,     0.0018],\n",
+      "        [    0.9949,     0.0051],\n",
+      "        [    0.0491,     0.9509],\n",
+      "        [    0.0307,     0.9693]])\n",
+      "tensor([0, 0, 0, 1, 1])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 设置 PyTorch 的打印选项，以关闭科学计数法\n",
+    "torch.set_printoptions(sci_mode=False)\n",
+    "\n",
+    "# 假设 outputs 是模型的输出张量\n",
+    "\n",
+    "# 对模型的输出进行 softmax 操作，计算类别概率\n",
+    "probas = torch.softmax(outputs, dim=1)\n",
+    "print(probas)\n",
+    "\n",
+    "# 获取模型的预测结果，即具有最大概率的类别\n",
+    "predictions = torch.argmax(outputs, dim=1)\n",
+    "print(predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "07e7e530-f8d3-429c-9f5e-cf8078078c0e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([0, 0, 0, 1, 1])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 使用 torch.argmax() 函数沿着 dim=1 维度获取每个样本最大值的索引，即模型的预测结果\n",
+    "predictions = torch.argmax(outputs, dim=1)\n",
+    "print(predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "5f756f0d-63c8-41b5-a5d8-01baa847e026",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([True, True, True, True, True])"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions == y_train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "da274bb0-f11c-4c81-a880-7a031fbf2943",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor(5)"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "torch.sum(predictions == y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "16d62314-8dee-45b0-8f55-9e5aae2b24f4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_accuracy(model, dataloader):\n",
+    "    \"\"\"\n",
+    "    计算模型在给定数据加载器上的准确率。\n",
+    "\n",
+    "    参数：\n",
+    "        model (torch.nn.Module): 待评估的模型。\n",
+    "        dataloader (torch.utils.data.DataLoader): 包含输入数据的数据加载器。\n",
+    "\n",
+    "    返回：\n",
+    "        float: 准确率值。\n",
+    "    \"\"\"\n",
+    "    # 将模型设为评估模式\n",
+    "    model = model.eval()\n",
+    "    correct = 0.0\n",
+    "    total_examples = 0\n",
+    "    \n",
+    "    # 遍历数据加载器\n",
+    "    for idx, (features, labels) in enumerate(dataloader):\n",
+    "        \n",
+    "        # 使用 no_grad 上下文，以便不跟踪梯度\n",
+    "        with torch.no_grad():\n",
+    "            # 使用模型进行前向传播获取预测结果\n",
+    "            logits = model(features)\n",
+    "        \n",
+    "        # 获取预测结果并计算正确预测的数量\n",
+    "        predictions = torch.argmax(logits, dim=1)\n",
+    "        compare = labels == predictions\n",
+    "        correct += torch.sum(compare)\n",
+    "        total_examples += len(compare)\n",
+    "\n",
+    "    # 计算并返回准确率\n",
+    "    return (correct / total_examples).item()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "4f6c9c17-2a5f-46c0-804b-873f169b729a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compute_accuracy(model, train_loader)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "311ed864-e21e-4aac-97c7-c6086caef27a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compute_accuracy(model, test_loader)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4d5cd469-3a45-4394-944b-3ce543f41dac",
+   "metadata": {},
+   "source": [
+    "## A.8 保存并加载模型"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "b013127d-a2c3-4b04-9fb3-a6a7c88d83c5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "torch.save(model.state_dict(), \"model.pth\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "id": "b2b428c2-3a44-4d91-97c4-8298cf2b51eb",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<All keys matched successfully>"
+      ]
+     },
+     "execution_count": 48,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = NeuralNetwork(2, 2) # 需要与原始模型完全匹配\n",
+    "model.load_state_dict(torch.load(\"model.pth\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f891c013-43da-4a05-973d-997be313d2d8",
+   "metadata": {},
+   "source": [
+    "## A.9 使用GPU来优化训练性能"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e68ae888-cabf-49c9-bad6-ecdce774db57",
+   "metadata": {},
+   "source": [
+    "### A.9.1 在GPU上进行 PyTorch 的运算"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "141c845f-efe3-4614-b376-b8b7a9a2c887",
+   "metadata": {},
+   "source": [
+    "See [code-part2.ipynb](code-part2.ipynb)  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "99811829-b817-42ea-b03e-d35374debcc0",
+   "metadata": {},
+   "source": [
+    "### A.9.2 单个GPU的训练"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0b21456c-4af7-440f-9e78-37770277b5bc",
+   "metadata": {},
+   "source": [
+    "See [code-part2.ipynb](code-part2.ipynb)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db6eb2d1-a341-4489-b04b-635c26945333",
+   "metadata": {},
+   "source": [
+    "### A.9.3 多GPU的训练"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9d049a81-5fb0-49b5-9d6a-17a9976d8520",
+   "metadata": {},
+   "source": [
+    "See [DDP-script.py](DDP-script.py)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b32db94f-f159-4aa3-91cc-5b937eef7fc7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 7a08b3ef4f75236ea9bb4ae2183150a935eddf55 Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:25:03 +0800
Subject: [PATCH 7/8] Add files via upload

---
 .../03_main-chapter-code/code-part2.ipynb     | 463 ++++++++++++++++++
 1 file changed, 463 insertions(+)
 create mode 100644 appendix-A/03_main-chapter-code/code-part2.ipynb

diff --git a/appendix-A/03_main-chapter-code/code-part2.ipynb b/appendix-A/03_main-chapter-code/code-part2.ipynb
new file mode 100644
index 0000000..03a59df
--- /dev/null
+++ b/appendix-A/03_main-chapter-code/code-part2.ipynb
@@ -0,0 +1,463 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "O9i6kzBsZVaZ"
+   },
+   "source": [
+    "# 附件A：PyTorch的介绍（第二部分）"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "ppbG5d-NZezH"
+   },
+   "source": [
+    "## A.9 使用GPU优化训练性能"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "6jH0J_DPZhbn"
+   },
+   "source": [
+    "### A.9.1 在GPU上进行 PyTorch 计算"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "RM7kGhwMF_nO",
+    "outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.0.1+cu118\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "# 显示PyTorch的版本\n",
+    "print(torch.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "OXLCKXhiUkZt",
+    "outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 显示PyTorch是否支持GPU\n",
+    "print(torch.cuda.is_available())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "MTTlfh53Va-T",
+    "outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([5., 7., 9.])"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tensor_1 = torch.tensor([1., 2., 3.])\n",
+    "tensor_2 = torch.tensor([4., 5., 6.])\n",
+    "\n",
+    "print(tensor_1 + tensor_2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "Z4LwTNw7Vmmb",
+    "outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([5., 7., 9.], device='cuda:0')\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 将两个张量移动到CUDA设备上\n",
+    "tensor_1 = tensor_1.to(\"cuda\")\n",
+    "tensor_2 = tensor_2.to(\"cuda\")\n",
+    "\n",
+    "print(tensor_1 + tensor_2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 184
+    },
+    "id": "tKT6URN1Vuft",
+    "outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
+   },
+   "outputs": [
+    {
+     "ename": "RuntimeError",
+     "evalue": "ignored",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
+     ]
+    }
+   ],
+   "source": [
+    "tensor_1 = tensor_1.to(\"cpu\")\n",
+    "print(tensor_1 + tensor_2)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "c8j1cWDcWAMf"
+   },
+   "source": [
+    "## A.9.2 单GPU训练"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "id": "GyY59cjieitv"
+   },
+   "outputs": [],
+   "source": [
+    "X_train = torch.tensor([\n",
+    "    [-1.2, 3.1],\n",
+    "    [-0.9, 2.9],\n",
+    "    [-0.5, 2.6],\n",
+    "    [2.3, -1.1],\n",
+    "    [2.7, -1.5]\n",
+    "])\n",
+    "\n",
+    "y_train = torch.tensor([0, 0, 0, 1, 1])\n",
+    "\n",
+    "X_test = torch.tensor([\n",
+    "    [-0.8, 2.8],\n",
+    "    [2.6, -1.6],\n",
+    "])\n",
+    "\n",
+    "y_test = torch.tensor([0, 1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "id": "v41gKqEJempa"
+   },
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import Dataset\n",
+    "\n",
+    "\n",
+    "class ToyDataset(Dataset):\n",
+    "    def __init__(self, X, y):\n",
+    "        self.features = X\n",
+    "        self.labels = y\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        one_x = self.features[index]\n",
+    "        one_y = self.labels[index]\n",
+    "        return one_x, one_y\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.labels.shape[0]\n",
+    "\n",
+    "train_ds = ToyDataset(X_train, y_train)\n",
+    "test_ds = ToyDataset(X_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "id": "UPGVRuylep8Y"
+   },
+   "outputs": [],
+   "source": [
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "torch.manual_seed(123)\n",
+    "\n",
+    "train_loader = DataLoader(\n",
+    "    dataset=train_ds,\n",
+    "    batch_size=2,\n",
+    "    shuffle=True,\n",
+    "    num_workers=1,\n",
+    "    drop_last=True\n",
+    ")\n",
+    "\n",
+    "test_loader = DataLoader(\n",
+    "    dataset=test_ds,\n",
+    "    batch_size=2,\n",
+    "    shuffle=False,\n",
+    "    num_workers=1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "id": "drhg6IXofAXh"
+   },
+   "outputs": [],
+   "source": [
+    "class NeuralNetwork(torch.nn.Module):\n",
+    "    def __init__(self, num_inputs, num_outputs):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.layers = torch.nn.Sequential(\n",
+    "\n",
+    "            # 第一个隐藏层\n",
+    "            torch.nn.Linear(num_inputs, 30),\n",
+    "            torch.nn.ReLU(),\n",
+    "\n",
+    "            # 第二个隐藏层\n",
+    "            torch.nn.Linear(30, 20),\n",
+    "            torch.nn.ReLU(),\n",
+    "\n",
+    "            # 输出层\n",
+    "            torch.nn.Linear(20, num_outputs),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        logits = self.layers(x)\n",
+    "        return logits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "7jaS5sqPWCY0",
+    "outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
+      "Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
+      "Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
+      "Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
+      "Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
+      "Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch.nn.functional as F\n",
+    "\n",
+    "\n",
+    "# 设置随机数种子，以确保可复现性\n",
+    "torch.manual_seed(123)\n",
+    "\n",
+    "# 创建神经网络模型\n",
+    "model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
+    "\n",
+    "# 根据设备可用情况选择设备\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "# 将模型移动到所选设备上\n",
+    "model = model.to(device)\n",
+    "\n",
+    "# 定义优化器，使用随机梯度下降 (SGD)\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
+    "\n",
+    "# 定义训练循环的 epoch 数量\n",
+    "num_epochs = 3\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "\n",
+    "    model.train()\n",
+    "    for batch_idx, (features, labels) in enumerate(train_loader):\n",
+    "\n",
+    "        features, labels = features.to(device), labels.to(device) \n",
+    "        logits = model(features)\n",
+    "        loss = F.cross_entropy(logits, labels) # 损失函数\n",
+    "\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "        ### 训练日志\n",
+    "        print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
+    "              f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
+    "              f\" | Train/Val Loss: {loss:.2f}\")\n",
+    "\n",
+    "    model.eval()\n",
+    "    # 可选的模型参数"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "id": "4qrlmnPPe7FO"
+   },
+   "outputs": [],
+   "source": [
+    "# 使用accuracy（准确率）作为指标\n",
+    "def compute_accuracy(model, dataloader, device):\n",
+    "\n",
+    "    model = model.eval()\n",
+    "    correct = 0.0\n",
+    "    total_examples = 0\n",
+    "\n",
+    "    for idx, (features, labels) in enumerate(dataloader):\n",
+    "        # 将数据移动到指定的设备上\n",
+    "        features, labels = features.to(device), labels.to(device) # New\n",
+    "\n",
+    "        with torch.no_grad():\n",
+    "            logits = model(features)\n",
+    "        # 获取预测结果并计算准确数量\n",
+    "        predictions = torch.argmax(logits, dim=1)\n",
+    "        compare = labels == predictions\n",
+    "        correct += torch.sum(compare)\n",
+    "        total_examples += len(compare)\n",
+    "    # 计算并返回准确率\n",
+    "    return (correct / total_examples).item()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "1_-BfkfEf4HX",
+    "outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compute_accuracy(model, train_loader, device=device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "iYtXKBGEgKss",
+    "outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "compute_accuracy(model, test_loader, device=device)"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From ec12a321cc4b3cd9ebb0651a8bc36f9018e5da2e Mon Sep 17 00:00:00 2001
From: Beyondzjl <84648701+Beyondzjl@users.noreply.github.com>
Date: Sun, 3 Mar 2024 15:25:26 +0800
Subject: [PATCH 8/8] Add files via upload

---
 .../exercise-solutions.ipynb                  | 171 ++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 appendix-A/03_main-chapter-code/exercise-solutions.ipynb

diff --git a/appendix-A/03_main-chapter-code/exercise-solutions.ipynb b/appendix-A/03_main-chapter-code/exercise-solutions.ipynb
new file mode 100644
index 0000000..4c49997
--- /dev/null
+++ b/appendix-A/03_main-chapter-code/exercise-solutions.ipynb
@@ -0,0 +1,171 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 练习 A.3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "class NeuralNetwork(torch.nn.Module):\n",
+    "    def __init__(self, num_inputs, num_outputs):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.layers = torch.nn.Sequential(\n",
+    "                \n",
+    "            # 第一个隐藏层\n",
+    "            torch.nn.Linear(num_inputs, 30),\n",
+    "            torch.nn.ReLU(),\n",
+    "\n",
+    "            # 第二个隐藏层\n",
+    "            torch.nn.Linear(30, 20),\n",
+    "            torch.nn.ReLU(),\n",
+    "\n",
+    "            # 输出层\n",
+    "            torch.nn.Linear(20, num_outputs),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        logits = self.layers(x)\n",
+    "        return logits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total number of trainable model parameters: 752\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = NeuralNetwork(2, 2)\n",
+    "\n",
+    "num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
+    "print(\"Total number of trainable model parameters:\", num_params)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 练习 A.4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "id": "qGgnamiyLJxp"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "# 创建随机向量\n",
+    "a = torch.rand(100, 200)\n",
+    "b = torch.rand(200, 300)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "CvGvIeVkLzXE",
+    "outputId": "44d027be-0787-4348-9c06-4e559d94d0e1"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "63.8 µs ± 8.7 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 使用 @ 符号进行矩阵相乘，并计算执行时间\n",
+    "# %timeit 是 IPython 提供的魔术命令，用于多次执行代码以获取平均执行时间\n",
+    "# 它会自动选择执行次数以确保结果的准确性\n",
+    "%timeit a @ b"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "OmRtZLa9L2ZG"
+   },
+   "outputs": [],
+   "source": [
+    "# 将 a 和 b 移动到 CUDA 设备上以利用 GPU 加速计算\n",
+    "a, b = a.to(\"cuda\"), b.to(\"cuda\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "duLEhXDPL6k0",
+    "outputId": "3486471d-fd62-446f-9855-2d01f41fd101"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "13.8 µs ± 425 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit a @ b"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "V100",
+   "machine_shape": "hm",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}