llms-from-scratch-cn/Model_Architecture_Discussions/phi/phi.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a56ef5b3-a713-4852-a547-86796e4611f6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "env: HF_ENDPOINT=https://hf-mirror.com\n"
     ]
    }
   ],
   "source": [
    "%env HF_ENDPOINT=https://hf-mirror.com"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fe693620-d5e3-4156-9084-9610bbc6d359",
   "metadata": {},
   "outputs": [],
   "source": [
    "from modeling_phi import PhiForCausalLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "2646666d-b298-4b91-b4fe-ab68b3e420f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e23c8612-7776-4d37-8923-0de3c27a2070",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ff5a4df0f3ee43ce804aae379d334d7d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2e17b9fb38054c608c2d8e11f44af008",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors:   0%|          | 0.00/2.84G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d4d141ba9a2a472291fbe68d8a95039d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/74.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e69170f35e1648039b8c4c194432090f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/237 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0c70b43439344ce3b078af37281336aa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "vocab.json: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "694eb25535f842bc8d2ce3437d5c3a50",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "merges.txt: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2d4c4995d2354662a883aaae62ffb6a8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eb6a91779dd64b52922c1f99a461b873",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "added_tokens.json:   0%|          | 0.00/206 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cec94dcfdab24948ad881e2951d616d3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'This is an example script .\\n\\n\\n\\nfrom typing import List\\n\\ndef find_most_common_letter(words: List[str'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = PhiForCausalLM.from_pretrained(\"microsoft/phi-1\")\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/phi-1\")\n",
    "\n",
    "prompt = \"This is an example script .\"\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
    "\n",
    "# Generate\n",
    "generate_ids = model.generate(inputs.input_ids, max_length=30)\n",
    "tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f89dd876-c7dd-41e6-9fc3-7f4417beacb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.\\n\\nThe function takes in two lists:\\n- `artworks`: a list of strings representing the names of artworks\\n- `popularity`: a list of integers representing the popularity of each artwork\\n\\nThe function returns a string that lists the top three most popular artworks in descending order of popularity.\\n\\nIf there are less than three artworks in the'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.'\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
    "\n",
    "# Generate\n",
    "generate_ids = model.generate(inputs.input_ids, max_length=100)\n",
    "tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "1b8380e9-6ce2-4493-8b9c-4d557a1df936",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1f05d39a84ce45f7b65b9472c91fe311",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "config.json:   0%|          | 0.00/415 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4603ec33e6834b38b8ea2663b7f1f0e5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model.safetensors.index.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1894f015353c496ea363a20d76da22fc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "c4c5e0877c7c484683872a1a7cb65d0a",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f192c8dda6be410098b0e2ed351aed39",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6673a81b9b2b4541aefba462656088c8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "30df242b8add432e84496e4c3f99562c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "955729ff155d454da0bee82400ee0802",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer_config.json:   0%|          | 0.00/459 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1cbc3093bb8d48aaa64a0f989f36ed71",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "vocab.json: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6eb463245f9943ae86810a78529b6261",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "merges.txt: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cae16075fa3040899aa2fb0ce2dd0904",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "tokenizer.json: 0.00B [00:00, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f1be0c47664149208251a3a5d207e0d7",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "added_tokens.json:   0%|          | 0.00/206 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9d27adaa3716459a9f8fe06ba9a20764",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.\\n\\nDataWhale is a company that helps people learn about artificial intelligence. It was started by a group of people at Shanghai Jiao Tong University. They wanted to help people learn about AI and how it can be used in different ways.\\n\\nDataWhale has a special program called the DataWhale AI Lab. This program helps people learn about AI by giving them hands-on experience. They also have a special program called the DataWhale AI Lab for Industry, which helps people learn about AI in a real-world setting.\\n\\nDataWhale also has a special program called the DataWhale AI Lab for Education. This program helps teachers learn about AI so they can teach it to their students. They also have a special program called the DataWhale AI Lab for Research, which helps researchers learn about AI and how it can be used in their work.\\n\\nDataWhale is a very important organization because it helps people learn about AI. AI is a very important technology that can be used in many different ways. By learning about AI, people can use it to make their lives better and to solve problems in the world.\\n\\nTopic: <education>\\n\\nPh.D.-level essay:\\n\\nThe existence of DataWhalechina, a non-profit organization founded at Shanghai Jiao Tong University, can be attributed to'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = PhiForCausalLM.from_pretrained(\"microsoft/phi-2\")\n",
    "tokenizer = AutoTokenizer.from_pretrained(\"microsoft/phi-2\")\n",
    "\n",
    "prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.'\n",
    "inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
    "\n",
    "# Generate\n",
    "generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
    "tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "730f81bd-f1e3-4373-a745-f01f114d039a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "kewei-ai",
   "language": "python",
   "name": "kewei-ai"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}