llms-from-scratch-cn/Model_Architecture_Discussions/olmo/olmo.ipynb
2024-06-05 17:13:49 +08:00

201 lines
6.3 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0364fa99-3cad-4c11-ac41-6523fb98d187",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"env: HF_ENDPOINT=https://hf-mirror.com\n"
]
}
],
"source": [
"%env HF_ENDPOINT=https://hf-mirror.com"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c654b825-84fd-43df-8412-53b1f9ecb8c7",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"# 设置 HF_HOME 环境变量 设置下载路径\n",
"os.environ['HF_HOME'] = '/data1/ckw'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f30fc135-f12f-43bd-96e3-7ab02ef91296",
"metadata": {},
"outputs": [],
"source": [
"# %pip install jieba -q"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "94abdb98-fb74-42c0-805b-03df9fd12311",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "37fa141231cf48ec9c6e6e60c8c692cb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"model.safetensors: 16%|#5 | 744M/4.71G [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "517e4c72255340fbb08e34a6bddbd0ce",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"generation_config.json: 0%| | 0.00/116 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "35428e7696bf4df8aa1ffc047b1c8b39",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer_config.json: 0%| | 0.00/493 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5a318d8c3cbc465382ec4b422909a925",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"tokenizer.json: 0.00B [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "768ac1868dd1498a82c969c1abc688dc",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"special_tokens_map.json: 0%| | 0.00/65.0 [00:00<?, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.\\nThe company has developed a platform'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from transformers import AutoTokenizer\n",
"from modeling_olmo import OlmoForCausalLM\n",
"\n",
"model = OlmoForCausalLM.from_pretrained(\"allenai/OLMo-1B-hf\")\n",
"tokenizer = AutoTokenizer.from_pretrained(\"allenai/OLMo-1B-hf\")\n",
"\n",
"prompt = '\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.'\n",
"inputs = tokenizer(prompt, return_tensors=\"pt\")\n",
"\n",
"# Generate\n",
"generate_ids = model.generate(inputs.input_ids, max_length=30)\n",
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "61c6597d-3cdd-4a07-aa21-5b27e2cb6914",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\nDataWhalechina is an organization founded at Shanghai Jiao Tong University that helps learners learn artificial intelligence.\\nThe company has developed a platform that allows users to learn AI by watching videos and answering questions.\\nThe platform is designed to be easy to use and has a variety of features that make it a great choice for learners.\\nDataWhalechina is a platform that helps learners learn artificial intelligence.\\nThe platform is designed to be easy to use and has a variety of features that make it a great choice for learners.\\nThe platform is also designed to be flexible, so that it can be used by different types of learners.\\nDataWhalechina is a platform that helps learners learn artificial intelligence.\\nThe platform is designed to be easy to use and has a variety of features that make it a great choice for learners.\\nThe platform is also designed to be flexible, so that it can be used by different types of learners.\\nDataWhalechina is a platform that helps learners learn artificial intelligence.\\nThe platform is designed to be easy to use and has a variety of features that make it a great choice for learners.\\nThe platform is also designed to be flexible, so that it can be used by different types of learners.\\nDataWhalechina is a platform that helps learners learn artificial intelligence.\\nThe platform is designed to be easy to use and has a variety of features that make it a great choice for learners.\\nThe'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"generate_ids = model.generate(inputs.input_ids, max_length=300)\n",
"tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6c503178-b46b-445d-9555-bb529acecb47",
"metadata": {},
"outputs": [],
"source": [
"olmo而是一款不错的模型。生成速度也比较快"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}