mirror of
https://github.com/datawhalechina/llms-from-scratch-cn.git
synced 2026-02-19 17:24:43 +08:00
286 lines
8.7 KiB
Plaintext
286 lines
8.7 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "dd05f32c-a90f-4122-b6d7-a5ec7b3b9ba0",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"env: HF_ENDPOINT=https://hf-mirror.com\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"%env HF_ENDPOINT=https://hf-mirror.com"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"id": "744c6db7-53f9-4911-adcb-4f0618693071",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "3fb1d88bb7d54d8d8681ab3862aa0590",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"config.json: 0%| | 0.00/476 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "325be298ec084bfb8c18d3fc60f78dc5",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"model.safetensors.index.json: 0%| | 0.00/832 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "05ade4e1f4d248ddbc14fe8c4431d765",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "452ca2bec6cb4fbf8fca383c43c4cc6b",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"model-00001-of-00002.safetensors: 0%| | 0.00/4.97G [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "483d2013cfd748d4a092c33159acaa99",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"model-00002-of-00002.safetensors: 0%| | 0.00/2.67G [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "68ec36a444a14750b81c4df4168722e5",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "1098c0ff0d974d5caa88b0010c82ed92",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"generation_config.json: 0%| | 0.00/172 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "73bd79df1db44f738fbbdc9632f45342",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"tokenizer_config.json: 0%| | 0.00/546 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "cd27d44e6f8143778af56b46f496f2b9",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"tokenizer.model: 0%| | 0.00/500k [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "6247143bea8044b3af8cf4ea30b03ec8",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"tokenizer.json: 0.00B [00:00, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "d8fe960d384c4f49bb71b14d654d268a",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"added_tokens.json: 0%| | 0.00/293 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"data": {
|
||
"application/vnd.jupyter.widget-view+json": {
|
||
"model_id": "3a870f288de84e1f97fcd2b1b2bf3bd5",
|
||
"version_major": 2,
|
||
"version_minor": 0
|
||
},
|
||
"text/plain": [
|
||
"special_tokens_map.json: 0%| | 0.00/143 [00:00<?, ?B/s]"
|
||
]
|
||
},
|
||
"metadata": {},
|
||
"output_type": "display_data"
|
||
},
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
|
||
"You are not running the flash-attention implementation, expect numerical differences.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.\\n'"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"from transformers import AutoTokenizer\n",
|
||
"from modeling_phi3 import Phi3ForCausalLM\n",
|
||
"\n",
|
||
"model = Phi3ForCausalLM.from_pretrained(\"microsoft/phi-3-mini-4k-instruct\")\n",
|
||
"tokenizer = AutoTokenizer.from_pretrained(\"microsoft/phi-3-mini-4k-instruct\")\n",
|
||
"\n",
|
||
"prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.'\n",
|
||
"inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
|
||
"\n",
|
||
"# Generate\n",
|
||
"generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
|
||
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"id": "060b86f9-fda5-4d9f-8292-4d9464c7b2ef",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"\"\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial \\nintelligence. We provide a variety of online courses for different skill levels and goals. Our courses are designed to be \\nengaging, interactive, and effective, with a focus on practical application and real-world problem-solving. Whether you're \\na beginner looking to get started in AI or an experienced professional looking to expand your skills, we have something \\nfor everyone.\\n\\nOur courses cover a wide range of topics, including but not limited to:\\n\\n1. Introduction to Artificial Intelligence: Learn the basics of AI, including its history, key concepts, and real-world applications.\\n2. Machine Learning: Explore the fundamentals of machine learning, including supervised and unsupervised learning, and popular \\nalgorithms such as linear regression, decision trees, and neural networks.\\n3. Deep Learning: Dive into the world of deep learning, including neural networks, convolutional neural networks (CNNs), and \\nrecurrent neural networks (RNNs).\\n4. Natural Language Processing (NLP): Learn how to build AI systems that can understand and generate human language, including \\nsentiment analysis, language translation, and chatbots.\\n5. Computer Vision: Discover how to teach computers\""
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial '\n",
|
||
"inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
|
||
"\n",
|
||
"# Generate\n",
|
||
"generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
|
||
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "raw",
|
||
"id": "6c0f8954-aca3-496b-86e4-843cdb00b104",
|
||
"metadata": {},
|
||
"source": [
|
||
"phi3的回复,感觉还比较贴合datawhale的实际情况哈哈"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "kewei-ai",
|
||
"language": "python",
|
||
"name": "kewei-ai"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|