llms-from-scratch-cn/Model_Architecture_Discussions/phi-3/phi-3.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dd05f32c-a90f-4122-b6d7-a5ec7b3b9ba0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "env: HF_ENDPOINT=https://hf-mirror.com\n"
     ]
    }
   ],
   "source": [
    "%env HF_ENDPOINT=https://hf-mirror.com"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "744c6db7-53f9-4911-adcb-4f0618693071",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3fb1d88bb7d54d8d8681ab3862aa0590",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/476 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "325be298ec084bfb8c18d3fc60f78dc5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors.index.json:   0%|          | 0.00/832 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "05ade4e1f4d248ddbc14fe8c4431d765",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "452ca2bec6cb4fbf8fca383c43c4cc6b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "483d2013cfd748d4a092c33159acaa99",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "68ec36a444a14750b81c4df4168722e5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1098c0ff0d974d5caa88b0010c82ed92",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/172 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "73bd79df1db44f738fbbdc9632f45342",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/546 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cd27d44e6f8143778af56b46f496f2b9",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6247143bea8044b3af8cf4ea30b03ec8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d8fe960d384c4f49bb71b14d654d268a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3a870f288de84e1f97fcd2b1b2bf3bd5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/143 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "You are not running the flash-attention implementation, expect numerical differences.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.\\n'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "from modeling_phi3 import Phi3ForCausalLM\n",
    "\n",
    "model = Phi3ForCausalLM.from_pretrained(\"microsoft/phi-3-mini-4k-instruct\")\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/phi-3-mini-4k-instruct\")\n",
    "\n",
    "prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.'\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
    "\n",
    "# Generate\n",
    "generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
    "tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "060b86f9-fda5-4d9f-8292-4d9464c7b2ef",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial \\nintelligence. We provide a variety of online courses for different skill levels and goals. Our courses are designed to be \\nengaging, interactive, and effective, with a focus on practical application and real-world problem-solving. Whether you're \\na beginner looking to get started in AI or an experienced professional looking to expand your skills, we have something \\nfor everyone.\\n\\nOur courses cover a wide range of topics, including but not limited to:\\n\\n1. Introduction to Artificial Intelligence: Learn the basics of AI, including its history, key concepts, and real-world applications.\\n2. Machine Learning: Explore the fundamentals of machine learning, including supervised and unsupervised learning, and popular \\nalgorithms such as linear regression, decision trees, and neural networks.\\n3. Deep Learning: Dive into the world of deep learning, including neural networks, convolutional neural networks (CNNs), and \\nrecurrent neural networks (RNNs).\\n4. Natural Language Processing (NLP): Learn how to build AI systems that can understand and generate human language, including \\nsentiment analysis, language translation, and chatbots.\\n5. Computer Vision: Discover how to teach computers\""
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial '\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
    "\n",
    "# Generate\n",
    "generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
    "tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
   ]
  },
  {
   "cell_type": "raw",
   "id": "6c0f8954-aca3-496b-86e4-843cdb00b104",
   "metadata": {},
   "source": [
    "phi3的回复，感觉还比较贴合datawhale的实际情况哈哈"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "kewei-ai",
   "language": "python",
   "name": "kewei-ai"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}