llms-from-scratch-cn/Model_Architecture_Discussions/openelm/openelm.ipynb
2024-06-01 17:33:19 +08:00

296 lines
11 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "dd05f32c-a90f-4122-b6d7-a5ec7b3b9ba0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: HF_ENDPOINT=https://hf-mirror.com\n"
]
}
],
"source": [
"%env HF_ENDPOINT=https://hf-mirror.com"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "54f03217-da8d-4a05-9c85-9e0301a597e7",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"# 设置 HF_HOME 环境变量 设置下载路径\n",
"os.environ['HF_HOME'] = '/data1/ckw'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94cab483-b247-4aa8-9557-d15e459244af",
"metadata": {},
"outputs": [],
"source": [
"# 这个时候由于OpenELM还没有官方发布在transformer所以需要改下源码(已经有了更好的办法,因此不需要改源码了)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e2f3081d-f795-4f86-b80e-e915ae56b426",
"metadata": {},
"outputs": [],
"source": [
"# /data1/ckw/micromamba/envs/kewei-ai/lib/python3.11/site-packages/transformers/models/auto/tokenization_auto.py:909"
]
},
{
"cell_type": "markdown",
"id": "db03e7fd-d06f-4e78-842f-66c8e02043bd",
"metadata": {},
"source": [
"#### 1.3 AutoModelForCausalLM代码\n",
"\n",
"```python\n",
"class AutoModelForCausalLM:\n",
" def __init__(self):\n",
" raise EnvironmentError(\n",
" \"AutoModelForCausalLM is designed to be instantiated \"\n",
" \"using the `AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path)` or \"\n",
" \"`AutoModelForCausalLM.from_config(config)` methods.\"\n",
" )\n",
"\n",
"\t@classmethod\n",
" @replace_list_option_in_docstrings(MODEL_FOR_CAUSAL_LM_MAPPING, use_model_types=False)\n",
" def from_config(cls, config):\n",
"\n",
" if type(config) in MODEL_FOR_CAUSAL_LM_MAPPING.keys():\n",
" return MODEL_FOR_CAUSAL_LM_MAPPING[type(config)](config)\n",
" raise ValueError(\n",
" \"Unrecognized configuration class {} for this kind of AutoModel: {}.\\n\"\n",
" \"Model type should be one of {}.\".format(\n",
" config.__class__, cls.__name__, \", \".join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys())\n",
" )\n",
" )\n",
"\n",
"\n",
"\t@classmethod\n",
" @replace_list_option_in_docstrings(MODEL_FOR_CAUSAL_LM_MAPPING)\n",
" @add_start_docstrings(\n",
" \"Instantiate one of the model classes of the library---with a causal language modeling head---from a \"\n",
" \"pretrained model.\",\n",
" AUTO_MODEL_PRETRAINED_DOCSTRING,\n",
" )\n",
" def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):\n",
" config = kwargs.pop(\"config\", None)\n",
" if not isinstance(config, PretrainedConfig):\n",
" config, kwargs = AutoConfig.from_pretrained(\n",
" pretrained_model_name_or_path, return_unused_kwargs=True, **kwargs\n",
" )\n",
"\n",
" if type(config) in MODEL_FOR_CAUSAL_LM_MAPPING.keys():\n",
" return MODEL_FOR_CAUSAL_LM_MAPPING[type(config)].from_pretrained(\n",
" pretrained_model_name_or_path, *model_args, config=config, **kwargs\n",
" )\n",
" raise ValueError(\n",
" \"Unrecognized configuration class {} for this kind of AutoModel: {}.\\n\"\n",
" \"Model type should be one of {}.\".format(\n",
" config.__class__, cls.__name__, \", \".join(c.__name__ for c in MODEL_FOR_CAUSAL_LM_MAPPING.keys())\n",
" )\n",
" )\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "744c6db7-53f9-4911-adcb-4f0618693071",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "7dd376f050c3496b904a5a545f499e07",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/265 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4936fbb98c5446ebb60f4bdb288ddc73",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.model: 0%| | 0.00/500k [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "080e814bd03542aeb4a9f882c67ed06a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0.00B [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d04a2f9f4a57490bb70e88af4ab10008",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"added_tokens.json: 0%| | 0.00/21.0 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6a728b39e23043459b8c2bddef6e8845",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/435 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
]
},
{
"data": {
"text/plain": [
"'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence. The organization aims to provide AI-related courses to students in China.\\n\\nThis repository contains the code for the following courses:\\n\\n1. [Introduction to AI: Neural Networks and Classification](https://www.datawhalechina.com/courses/introduction-to-ai-neural-networks-and-classification/)\\n2. [Introduction to AI: Deep Learning and Applications](https://www.datawhalechina.com/courses/introduction-to-ai-deep-learning-and-applications/)\\n3. [Introduction to AI: Algorithms and Applications](https://www.datawhalechina.com/courses/introduction-to-ai-algorithms-and-applications/)\\n4. [Introduction to AI: Data Preparation and Model Evaluation](https://www.datawhalechina.com/courses/introduction-to-ai-data-preparation-and-model-evaluation/)\\n5. [Introduction to AI: Building and Evaluating AI Models](https://www.datawhalechina.com/courses/introduction-to-ai-building-and-evaluating-ai'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer\n",
"from modeling_openelm import OpenELMForCausalLM\n",
"\n",
"model = OpenELMForCausalLM.from_pretrained(\"Apple/OpenELM-270M-Instruct\")#trust_remote_code=True\n",
"# tokenizer = AutoTokenizer.from_pretrained(\"Apple/OpenELM-270M-Instruct\")Llama-2-7b-hf\n",
"tokenizer = AutoTokenizer.from_pretrained(\"NousResearch/Llama-2-7b-chat-hf\")\n",
"prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.'\n",
"inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
"\n",
"# Generate\n",
"generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
]
},
{
"cell_type": "raw",
"id": "6c0f8954-aca3-496b-86e4-843cdb00b104",
"metadata": {},
"source": [
"上面这个openelm的回复感觉还比较贴合datawhale的实际情况哈速度也是很快的没得说不过链接是编的哈哈"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "060b86f9-fda5-4d9f-8292-4d9464c7b2ef",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
"Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n"
]
},
{
"data": {
"text/plain": [
"\"\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners \\nimprove their Chinese language skills through data-driven learning.\\n\\n## Data\\n\\nThe DataWhalechina platform collects data from various sources, including:\\n\\n1. [China's National Database of Vocabulary and Phrase Structure](https://www.national-database.gov.cn/): This database contains vocabulary and phrase structure definitions for 1,000,000+ Chinese words and phrases.\\n\\n2. [China's National Academic Database of Literature and Culture](https://academic.lib.shu.edu.cn/): This database contains articles, books, and speeches written in Chinese by Chinese scholars.\\n\\n3. [China's National Knowledge Incorporation Database](https://knowledge.cn/): This database contains data on intellectual property rights, patents, and copyrights.\\n\\n4. [China's National Bureau of Statistics](https://www.stat.gov.cn/): This database contains statistics on population, living standards, and purchasing power.\\n\\n5. [China's National Bureau of Census](https://www.census.gov.cn/): This database contains\""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners '\n",
"inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
"\n",
"# Generate\n",
"generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
]
},
{
"cell_type": "raw",
"id": "052ab03d-f739-40e5-9f48-e8ab3d0f5f19",
"metadata": {},
"source": [
"如果提示内容给的比较短,可能会在事实上面出一点小问题"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "kewei-ai",
"language": "python",
"name": "kewei-ai"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}