mirror of
https://github.com/datawhalechina/llms-from-scratch-cn.git
synced 2026-04-25 08:58:17 +08:00
commit
4d81949de8
@ -75,7 +75,7 @@
|
||||
"import tiktoken\n",
|
||||
"\n",
|
||||
"tokenizer = tiktoken.get_encoding(\"gpt2\")\n",
|
||||
"with open(\"/Users/zhihu123/Project/other/llms-from-scratch-cn/ch02/01_main-chapter-code/the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
||||
"with open(\"the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
||||
" raw_text = f.read()\n",
|
||||
"enc_text = tokenizer.encode(raw_text)\n",
|
||||
"print(len(enc_text))"
|
||||
@ -441,7 +441,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"with open(\"/Users/zhihu123/Project/other/llms-from-scratch-cn/ch02/01_main-chapter-code/the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
||||
"with open(\"the-verdict.txt\", \"r\", encoding=\"utf-8\") as f:\n",
|
||||
" raw_text = f.read()\n",
|
||||
" dataloader = create_dataloader_v1(\n",
|
||||
" raw_text, batch_size=1, max_length=4, stride=1, shuffle=False)\n",
|
||||
|
||||
1950
Translated_Book/ch05/5.1 在未标记的数据上进行预训练.ipynb
Normal file
1950
Translated_Book/ch05/5.1 在未标记的数据上进行预训练.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user