llms-from-scratch-cn/Model_Architecture_Discussions/phi-3/phi-3.ipynb
2024-06-01 14:52:18 +08:00

286 lines
8.7 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "dd05f32c-a90f-4122-b6d7-a5ec7b3b9ba0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: HF_ENDPOINT=https://hf-mirror.com\n"
]
}
],
"source": [
"%env HF_ENDPOINT=https://hf-mirror.com"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "744c6db7-53f9-4911-adcb-4f0618693071",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3fb1d88bb7d54d8d8681ab3862aa0590",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"config.json: 0%| | 0.00/476 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "325be298ec084bfb8c18d3fc60f78dc5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model.safetensors.index.json: 0%| | 0.00/832 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "05ade4e1f4d248ddbc14fe8c4431d765",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "452ca2bec6cb4fbf8fca383c43c4cc6b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model-00001-of-00002.safetensors: 0%| | 0.00/4.97G [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "483d2013cfd748d4a092c33159acaa99",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model-00002-of-00002.safetensors: 0%| | 0.00/2.67G [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "68ec36a444a14750b81c4df4168722e5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "1098c0ff0d974d5caa88b0010c82ed92",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"generation_config.json: 0%| | 0.00/172 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "73bd79df1db44f738fbbdc9632f45342",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/546 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cd27d44e6f8143778af56b46f496f2b9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.model: 0%| | 0.00/500k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6247143bea8044b3af8cf4ea30b03ec8",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0.00B [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d8fe960d384c4f49bb71b14d654d268a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"added_tokens.json: 0%| | 0.00/293 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "3a870f288de84e1f97fcd2b1b2bf3bd5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/143 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
"You are not running the flash-attention implementation, expect numerical differences.\n"
]
},
{
"data": {
"text/plain": [
"'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.\\n'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer\n",
"from modeling_phi3 import Phi3ForCausalLM\n",
"\n",
"model = Phi3ForCausalLM.from_pretrained(\"microsoft/phi-3-mini-4k-instruct\")\n",
"tokenizer = AutoTokenizer.from_pretrained(\"microsoft/phi-3-mini-4k-instruct\")\n",
"\n",
"prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.'\n",
"inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
"\n",
"# Generate\n",
"generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "060b86f9-fda5-4d9f-8292-4d9464c7b2ef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"\"\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial \\nintelligence. We provide a variety of online courses for different skill levels and goals. Our courses are designed to be \\nengaging, interactive, and effective, with a focus on practical application and real-world problem-solving. Whether you're \\na beginner looking to get started in AI or an experienced professional looking to expand your skills, we have something \\nfor everyone.\\n\\nOur courses cover a wide range of topics, including but not limited to:\\n\\n1. Introduction to Artificial Intelligence: Learn the basics of AI, including its history, key concepts, and real-world applications.\\n2. Machine Learning: Explore the fundamentals of machine learning, including supervised and unsupervised learning, and popular \\nalgorithms such as linear regression, decision trees, and neural networks.\\n3. Deep Learning: Dive into the world of deep learning, including neural networks, convolutional neural networks (CNNs), and \\nrecurrent neural networks (RNNs).\\n4. Natural Language Processing (NLP): Learn how to build AI systems that can understand and generate human language, including \\nsentiment analysis, language translation, and chatbots.\\n5. Computer Vision: Discover how to teach computers\""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial '\n",
"inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
"\n",
"# Generate\n",
"generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
]
},
{
"cell_type": "raw",
"id": "6c0f8954-aca3-496b-86e4-843cdb00b104",
"metadata": {},
"source": [
"phi3的回复感觉还比较贴合datawhale的实际情况哈哈"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "kewei-ai",
"language": "python",
"name": "kewei-ai"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}