llms-from-scratch-cn/Codes/appendix-A/03_main-chapter-code/code-part2.ipynb
2024-06-10 17:00:23 +08:00

464 lines
11 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "O9i6kzBsZVaZ"
},
"source": [
"# 附件APyTorch的介绍第二部分"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ppbG5d-NZezH"
},
"source": [
"## A.9 使用GPU优化训练性能"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6jH0J_DPZhbn"
},
"source": [
"### A.9.1 在GPU上进行 PyTorch 计算"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RM7kGhwMF_nO",
"outputId": "ac60b048-b81f-4bb0-90fa-1ca474f04e9a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2.0.1+cu118\n"
]
}
],
"source": [
"import torch\n",
"# 显示PyTorch的版本\n",
"print(torch.__version__)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "OXLCKXhiUkZt",
"outputId": "39fe5366-287e-47eb-cc34-3508d616c4f9"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"True\n"
]
}
],
"source": [
"# 显示PyTorch是否支持GPU\n",
"print(torch.cuda.is_available())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MTTlfh53Va-T",
"outputId": "f31d8bbe-577f-4db4-9939-02e66b9f96d1"
},
"outputs": [
{
"data": {
"text/plain": [
"tensor([5., 7., 9.])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tensor_1 = torch.tensor([1., 2., 3.])\n",
"tensor_2 = torch.tensor([4., 5., 6.])\n",
"\n",
"print(tensor_1 + tensor_2)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Z4LwTNw7Vmmb",
"outputId": "1c025c6a-e3ed-4c7c-f5fd-86c14607036e"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([5., 7., 9.], device='cuda:0')\n"
]
}
],
"source": [
"# 将两个张量移动到CUDA设备上\n",
"tensor_1 = tensor_1.to(\"cuda\")\n",
"tensor_2 = tensor_2.to(\"cuda\")\n",
"\n",
"print(tensor_1 + tensor_2)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 184
},
"id": "tKT6URN1Vuft",
"outputId": "e6f01e7f-d9cf-44cb-cc6d-46fc7907d5c0"
},
"outputs": [
{
"ename": "RuntimeError",
"evalue": "ignored",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-7-4ff3c4d20fc3>\u001b[0m in \u001b[0;36m<cell line: 2>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mtensor_1\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtensor_1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor_1\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mtensor_2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!"
]
}
],
"source": [
"tensor_1 = tensor_1.to(\"cpu\")\n",
"print(tensor_1 + tensor_2)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "c8j1cWDcWAMf"
},
"source": [
"## A.9.2 单GPU训练"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "GyY59cjieitv"
},
"outputs": [],
"source": [
"X_train = torch.tensor([\n",
" [-1.2, 3.1],\n",
" [-0.9, 2.9],\n",
" [-0.5, 2.6],\n",
" [2.3, -1.1],\n",
" [2.7, -1.5]\n",
"])\n",
"\n",
"y_train = torch.tensor([0, 0, 0, 1, 1])\n",
"\n",
"X_test = torch.tensor([\n",
" [-0.8, 2.8],\n",
" [2.6, -1.6],\n",
"])\n",
"\n",
"y_test = torch.tensor([0, 1])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"id": "v41gKqEJempa"
},
"outputs": [],
"source": [
"from torch.utils.data import Dataset\n",
"\n",
"\n",
"class ToyDataset(Dataset):\n",
" def __init__(self, X, y):\n",
" self.features = X\n",
" self.labels = y\n",
"\n",
" def __getitem__(self, index):\n",
" one_x = self.features[index]\n",
" one_y = self.labels[index]\n",
" return one_x, one_y\n",
"\n",
" def __len__(self):\n",
" return self.labels.shape[0]\n",
"\n",
"train_ds = ToyDataset(X_train, y_train)\n",
"test_ds = ToyDataset(X_test, y_test)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"id": "UPGVRuylep8Y"
},
"outputs": [],
"source": [
"from torch.utils.data import DataLoader\n",
"\n",
"torch.manual_seed(123)\n",
"\n",
"train_loader = DataLoader(\n",
" dataset=train_ds,\n",
" batch_size=2,\n",
" shuffle=True,\n",
" num_workers=1,\n",
" drop_last=True\n",
")\n",
"\n",
"test_loader = DataLoader(\n",
" dataset=test_ds,\n",
" batch_size=2,\n",
" shuffle=False,\n",
" num_workers=1\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"id": "drhg6IXofAXh"
},
"outputs": [],
"source": [
"class NeuralNetwork(torch.nn.Module):\n",
" def __init__(self, num_inputs, num_outputs):\n",
" super().__init__()\n",
"\n",
" self.layers = torch.nn.Sequential(\n",
"\n",
" # 第一个隐藏层\n",
" torch.nn.Linear(num_inputs, 30),\n",
" torch.nn.ReLU(),\n",
"\n",
" # 第二个隐藏层\n",
" torch.nn.Linear(30, 20),\n",
" torch.nn.ReLU(),\n",
"\n",
" # 输出层\n",
" torch.nn.Linear(20, num_outputs),\n",
" )\n",
"\n",
" def forward(self, x):\n",
" logits = self.layers(x)\n",
" return logits"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "7jaS5sqPWCY0",
"outputId": "84c74615-38f2-48b8-eeda-b5912fed1d3a"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75\n",
"Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65\n",
"Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44\n",
"Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13\n",
"Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03\n",
"Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00\n"
]
}
],
"source": [
"import torch.nn.functional as F\n",
"\n",
"\n",
"# 设置随机数种子,以确保可复现性\n",
"torch.manual_seed(123)\n",
"\n",
"# 创建神经网络模型\n",
"model = NeuralNetwork(num_inputs=2, num_outputs=2)\n",
"\n",
"# 根据设备可用情况选择设备\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"\n",
"# 将模型移动到所选设备上\n",
"model = model.to(device)\n",
"\n",
"# 定义优化器,使用随机梯度下降 (SGD)\n",
"optimizer = torch.optim.SGD(model.parameters(), lr=0.5)\n",
"\n",
"# 定义训练循环的 epoch 数量\n",
"num_epochs = 3\n",
"\n",
"for epoch in range(num_epochs):\n",
"\n",
" model.train()\n",
" for batch_idx, (features, labels) in enumerate(train_loader):\n",
"\n",
" features, labels = features.to(device), labels.to(device) \n",
" logits = model(features)\n",
" loss = F.cross_entropy(logits, labels) # 损失函数\n",
"\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
"\n",
" ### 训练日志\n",
" print(f\"Epoch: {epoch+1:03d}/{num_epochs:03d}\"\n",
" f\" | Batch {batch_idx:03d}/{len(train_loader):03d}\"\n",
" f\" | Train/Val Loss: {loss:.2f}\")\n",
"\n",
" model.eval()\n",
" # 可选的模型参数"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"id": "4qrlmnPPe7FO"
},
"outputs": [],
"source": [
"# 使用accuracy准确率作为指标\n",
"def compute_accuracy(model, dataloader, device):\n",
"\n",
" model = model.eval()\n",
" correct = 0.0\n",
" total_examples = 0\n",
"\n",
" for idx, (features, labels) in enumerate(dataloader):\n",
" # 将数据移动到指定的设备上\n",
" features, labels = features.to(device), labels.to(device) # New\n",
"\n",
" with torch.no_grad():\n",
" logits = model(features)\n",
" # 获取预测结果并计算准确数量\n",
" predictions = torch.argmax(logits, dim=1)\n",
" compare = labels == predictions\n",
" correct += torch.sum(compare)\n",
" total_examples += len(compare)\n",
" # 计算并返回准确率\n",
" return (correct / total_examples).item()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1_-BfkfEf4HX",
"outputId": "473bf21d-5880-4de3-fc8a-051d75315b94"
},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compute_accuracy(model, train_loader, device=device)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iYtXKBGEgKss",
"outputId": "508edd84-3fb7-4d04-cb23-9df0c3d24170"
},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"compute_accuracy(model, test_loader, device=device)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}