diff --git a/Codes/ch03/01_main-chapter-code/ch03.ipynb b/Codes/ch03/01_main-chapter-code/ch03.ipynb index a08fba0..47dfd0b 100644 --- a/Codes/ch03/01_main-chapter-code/ch03.ipynb +++ b/Codes/ch03/01_main-chapter-code/ch03.ipynb @@ -1288,7 +1288,7 @@ " \n", " # 使用 softmax 函数和缩放因子归一化注意力分数\n", " # 注意这里的 dim=1,表示沿着键向量的维度进行归一化\n", - " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=1)\n", + " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n", "\n", " # 使用归一化的注意力权重和值向量计算上下文向量\n", " context_vec = attn_weights @ values\n", @@ -1380,7 +1380,7 @@ "keys = sa_v2.W_key(inputs) \n", "attn_scores = queries @ keys.T\n", "# 此处的注意力权重和上一节中的一致\n", - "attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=1)\n", + "attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n", "print(attn_weights)" ] }, @@ -1759,7 +1759,7 @@ " attn_scores.masked_fill_( # New, _ ops are in-place\n", " self.mask.bool()[:num_tokens, :num_tokens], -torch.inf)\n", " # 经过 softmax \n", - " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=1)\n", + " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n", " # 进行 dropout\n", " attn_weights = self.dropout(attn_weights) # New\n", " # 得到最后结果\n", diff --git a/Codes/ch03/01_main-chapter-code/exercise-solutions.ipynb b/Codes/ch03/01_main-chapter-code/exercise-solutions.ipynb index cc47754..fd254eb 100644 --- a/Codes/ch03/01_main-chapter-code/exercise-solutions.ipynb +++ b/Codes/ch03/01_main-chapter-code/exercise-solutions.ipynb @@ -92,7 +92,7 @@ " values = self.W_value(x)\n", " \n", " attn_scores = queries @ keys.T\n", - " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=1)\n", + " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n", "\n", " context_vec = attn_weights @ values\n", " return context_vec\n", diff --git a/Codes/ch03/01_main-chapter-code/multihead-attention.ipynb b/Codes/ch03/01_main-chapter-code/multihead-attention.ipynb index 9677f78..94afcca 100644 --- a/Codes/ch03/01_main-chapter-code/multihead-attention.ipynb +++ b/Codes/ch03/01_main-chapter-code/multihead-attention.ipynb @@ -230,7 +230,7 @@ " # 使用掩码将未来位置的注意力分数置为负无穷,实现因果自注意力\n", " attn_scores.masked_fill_(self.mask.bool()[:n_tokens, :n_tokens], -torch.inf)\n", " # 归一化注意力分数\n", - " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=1)\n", + " attn_weights = torch.softmax(attn_scores / keys.shape[-1]**0.5, dim=-1)\n", " # 应用dropout\n", " attn_weights = self.dropout(attn_weights)\n", "\n", diff --git a/README.md b/README.md index 7167311..9a929c7 100644 --- a/README.md +++ b/README.md @@ -149,7 +149,7 @@ | 姓名 | 职责 | 简介 | GitHub | | :-----:| :----------:| :-----------:|:------:| -| 陈可为 | 项目负责人 | 华中科技大学 |[@Ethan-Chen-plus](https://github.com/Ethan-Chen-plus)| +| 陈可为 | 项目负责人 | 中国科学院大学 |[@Ethan-Chen-plus](https://github.com/Ethan-Chen-plus)| | 王训志 | 第2章贡献者 | 南开大学 |[@aJupyter](https://github.com/aJupyter)| | 汪健麟 | 第2章贡献者 | || | Aria | 第2章贡献者 | |[@ariafyy](https://github.com/ariafyy)| diff --git a/Translated_Book/ch02/2.1理解词嵌入.ipynb b/Translated_Book/ch02/2.1理解词嵌入.ipynb index 0bf5450..c2ce205 100644 --- a/Translated_Book/ch02/2.1理解词嵌入.ipynb +++ b/Translated_Book/ch02/2.1理解词嵌入.ipynb @@ -46,7 +46,7 @@ "id": "490fa60b", "metadata": {}, "source": [ - "![fig2.2](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-2.jpg?raw=true)" + "![fig2.2](../img/fig-2-2.jpg)" ] }, { @@ -92,7 +92,7 @@ "id": "92e1e8d6", "metadata": {}, "source": [ - "![fig2.3](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-3.jpg?raw=true)" + "![fig2.3](../img/fig-2-3.jpg)" ] }, { diff --git a/Translated_Book/ch02/2.2文本分词(序列化).ipynb b/Translated_Book/ch02/2.2文本分词(序列化).ipynb index 3ce6a60..9c7b5cf 100644 --- a/Translated_Book/ch02/2.2文本分词(序列化).ipynb +++ b/Translated_Book/ch02/2.2文本分词(序列化).ipynb @@ -32,7 +32,7 @@ "id": "f2df060a", "metadata": {}, "source": [ - "![fig2.4](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-4.jpg?raw=true)" + "![fig2.4](../img/fig-2-4.jpg)" ] }, { @@ -377,7 +377,7 @@ "id": "f85ecff5", "metadata": {}, "source": [ - "![fig2.5](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-5.jpg?raw=true)" + "![fig2.5](../img/fig-2-5.jpg)" ] }, { diff --git a/Translated_Book/ch02/2.3将令牌转换为令牌 ID.ipynb b/Translated_Book/ch02/2.3将令牌转换为令牌 ID.ipynb index 0ad714c..4ee9a41 100644 --- a/Translated_Book/ch02/2.3将令牌转换为令牌 ID.ipynb +++ b/Translated_Book/ch02/2.3将令牌转换为令牌 ID.ipynb @@ -43,7 +43,7 @@ "id": "e843aae2", "metadata": {}, "source": [ - "![fig2.6](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-6.jpg?raw=true)" + "![fig2.6](../img/fig-2-6.jpg)" ] }, { @@ -220,7 +220,7 @@ "id": "187ca144", "metadata": {}, "source": [ - "![fig2.7](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-7.jpg?raw=true)" + "![fig2.7](../img/fig-2-7.jpg)" ] }, { @@ -305,7 +305,7 @@ "id": "cdae01fd", "metadata": {}, "source": [ - "![fig2.8](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-8.jpg?raw=true)" + "![fig2.8](../img/fig-2-8.jpg)" ] }, { diff --git a/Translated_Book/ch02/2.4添加特殊上下文tokens.ipynb b/Translated_Book/ch02/2.4添加特殊上下文tokens.ipynb index 7eb7119..0181c22 100644 --- a/Translated_Book/ch02/2.4添加特殊上下文tokens.ipynb +++ b/Translated_Book/ch02/2.4添加特殊上下文tokens.ipynb @@ -45,7 +45,7 @@ "id": "490fa60b", "metadata": {}, "source": [ - "![fig2.20](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-20.jpg?raw=true)" + "![fig2.20](../img/fig-2-20.png)" ] }, { @@ -70,7 +70,7 @@ "id": "acc76cd1", "metadata": {}, "source": [ - "![fig2.21](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-21.jpg?raw=true)" + "![fig2.21](../img/fig-2-21.png)" ] }, { diff --git a/Translated_Book/ch02/2.6使用滑动窗口进行数据采样.ipynb b/Translated_Book/ch02/2.6使用滑动窗口进行数据采样.ipynb index c8d29f4..a1ba229 100644 --- a/Translated_Book/ch02/2.6使用滑动窗口进行数据采样.ipynb +++ b/Translated_Book/ch02/2.6使用滑动窗口进行数据采样.ipynb @@ -37,7 +37,7 @@ "id": "e843aae2", "metadata": {}, "source": [ - "![fig2.12](https://github.com/Pr04Ark/llms-from-scratch-cn/blob/trans01/Translated_Book/img/fig-2-12.jpg?raw=true)" + "![fig2.12](../img/fig-2-12.jpg)" ] }, { @@ -308,7 +308,7 @@ "id": "70af4d55", "metadata": {}, "source": [ - "![fig2.13](https://github.com/Pr04Ark/llms-from-scratch-cn/blob/trans01/Translated_Book/img/fig-2-13.jpg?raw=true)" + "![fig2.13](../img/fig-2-13.jpg)" ] }, { @@ -535,7 +535,7 @@ "id": "3b942805", "metadata": {}, "source": [ - "![fig2.14](https://github.com/Pr04Ark/llms-from-scratch-cn/blob/trans01/Translated_Book/img/fig-2-14.jpg?raw=true)" + "![fig2.14](../img/fig-2-14.jpg)" ] }, { diff --git a/Translated_Book/ch02/2.7 构建词符嵌入.ipynb b/Translated_Book/ch02/2.7 构建词符嵌入.ipynb index 43f7142..9667439 100644 --- a/Translated_Book/ch02/2.7 构建词符嵌入.ipynb +++ b/Translated_Book/ch02/2.7 构建词符嵌入.ipynb @@ -23,7 +23,7 @@ "id": "e85089aa-8671-4e5f-a2b3-ef252004ee4c", "metadata": {}, "source": [ - "" + "" ] }, { @@ -178,7 +178,7 @@ "id": "f33c2741-bf1b-4c60-b7fd-61409d556646", "metadata": {}, "source": [ - "" + "" ] }, { diff --git a/Translated_Book/ch02/2.文本数据处理.ipynb b/Translated_Book/ch02/2.文本数据处理.ipynb index b78d734..91a0262 100644 --- a/Translated_Book/ch02/2.文本数据处理.ipynb +++ b/Translated_Book/ch02/2.文本数据处理.ipynb @@ -61,7 +61,7 @@ "id": "972f6e5a", "metadata": {}, "source": [ - "![fig2.1](https://github.com/datawhalechina/llms-from-scratch-cn/blob/main/Translated_Book/img/fig-2-1.jpg?raw=true)" + "![fig2.1](../img/fig-2-1.jpg)" ] }, {