NLP_tutorial/3-NLP_services/src/OpenNRE/Untitled0.ipynb
2025-04-09 09:39:40 +03:30

480 lines
16 KiB
Plaintext

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "apMnCeBi0kAa",
"executionInfo": {
"status": "ok",
"timestamp": 1668340169194,
"user_tz": -210,
"elapsed": 26751,
"user": {
"displayName": "Mohammad Ebrahimi",
"userId": "10407139745331958037"
}
},
"outputId": "852beb1d-3b81-4f1e-8b41-511ee62a6458"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "juTvp3cXy0vT",
"executionInfo": {
"status": "ok",
"timestamp": 1668340312085,
"user_tz": -210,
"elapsed": 2,
"user": {
"displayName": "Mohammad Ebrahimi",
"userId": "10407139745331958037"
}
},
"outputId": "76fc9722-9631-4573-b68c-f2d460ebaf96"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"/content/drive/MyDrive/OpenNRE-master\n"
]
}
],
"source": [
"%cd /content/drive/MyDrive/OpenNRE-master"
]
},
{
"cell_type": "code",
"source": [
"!pip install -r requirements.txt"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Zw-L_bBWy3q6",
"executionInfo": {
"status": "ok",
"timestamp": 1668340295967,
"user_tz": -210,
"elapsed": 119014,
"user": {
"displayName": "Mohammad Ebrahimi",
"userId": "10407139745331958037"
}
},
"outputId": "c73fdd46-9d49-4643-a588-3021e6395c32"
},
"execution_count": 5,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"ERROR: Could not find a version that satisfies the requirement torch==1.6.0 (from versions: 1.7.1, 1.8.0, 1.8.1, 1.9.0, 1.9.1, 1.10.0, 1.10.1, 1.10.2, 1.11.0, 1.12.0, 1.12.1, 1.13.0, 1.13.1)\n",
"ERROR: No matching distribution found for torch==1.6.0\n",
"\n",
"[notice] A new release of pip available: 22.3.1 -> 23.0\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pip in e:\\hamed\\work\\5\\opennre-master\\venv\\lib\\site-packages (22.3.1)\n",
"Collecting pip\n",
" Using cached pip-23.0-py3-none-any.whl (2.1 MB)\n",
"Installing collected packages: pip\n",
" Attempting uninstall: pip\n",
" Found existing installation: pip 22.3.1\n",
" Uninstalling pip-22.3.1:\n",
" Successfully uninstalled pip-22.3.1\n",
"Successfully installed pip-23.0\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# %%shell\n",
"!python train_supervised_bert.py \\\n",
" --pretrain_path HooshvareLab/bert-base-parsbert-uncased \\\n",
" --dataset none \\\n",
" --train_file ./Perlex/Perlex_train.txt \\\n",
" --val_file ./Perlex/Perlex_val.txt \\\n",
" --test_file ./Perlex/Perlex_test.txt \\\n",
" --rel2id_file ./Perlex/Perlex_rel2id.json \\\n",
" --max_epoch 20"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aEiTLWpPzV3J",
"executionInfo": {
"status": "ok",
"timestamp": 1665308620117,
"user_tz": -210,
"elapsed": 7519388,
"user": {
"displayName": "arian ebrahimi",
"userId": "00418818321983401320"
}
},
"outputId": "90c72478-d09d-4f96-9417-7c2ad805a66b"
},
"execution_count": 3,
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Traceback (most recent call last):\n",
" File \"E:\\Hamed\\Work\\5\\OpenNRE-master\\train_supervised_bert.py\", line 2, in <module>\n",
" import torch\n",
"ModuleNotFoundError: No module named 'torch'\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"%%shell\n",
"python train_supervised_bert.py \\\n",
" --pretrain_path HooshvareLab/bert-base-parsbert-uncased \\\n",
" --dataset none \\\n",
" --train_file ./Perlex/Perlex_train.txt \\\n",
" --val_file ./Perlex/Perlex_val.txt \\\n",
" --test_file ./Perlex/Perlex_test.txt \\\n",
" --rel2id_file ./Perlex/Perlex_rel2id.json \\\n",
" --max_epoch 20"
],
"metadata": {
"id": "zhYjakhv13c8",
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"status": "ok",
"timestamp": 1665318000439,
"user_tz": -210,
"elapsed": 7490397,
"user": {
"displayName": "arian ebrahimi",
"userId": "00418818321983401320"
}
},
"outputId": "d2d9d494-0222-425f-ff3a-50e01b33419f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"100% 242/242 [05:39<00:00, 1.40s/it, acc=0.285, loss=2.38]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.627]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.643, loss=1.2]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.718]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.739, loss=0.848]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.742]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.797, loss=0.675]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.742]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.842, loss=0.544]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.752]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.872, loss=0.452]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.742]\n",
"100% 242/242 [05:45<00:00, 1.43s/it, acc=0.901, loss=0.354]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.749]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.926, loss=0.292]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.746]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.941, loss=0.236]\n",
"100% 47/47 [00:25<00:00, 1.81it/s, acc=0.747]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.951, loss=0.195]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.748]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.962, loss=0.162]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.746]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.969, loss=0.14]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.744]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.974, loss=0.118]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.746]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.978, loss=0.102]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.746]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.981, loss=0.0912]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.74]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.985, loss=0.081]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.742]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.986, loss=0.0736]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.74]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.986, loss=0.0724]\n",
"100% 47/47 [00:25<00:00, 1.81it/s, acc=0.742]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.99, loss=0.065]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.742]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.99, loss=0.0624]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.739]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.744]\n",
"Test set results:\n",
"Accuracy: 0.7444963308872582\n",
"Micro precision: 0.7831612390786339\n",
"Micro recall: 0.7919678714859437\n",
"Micro F1: 0.7875399361022364\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": []
},
"metadata": {},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"source": [
"%%shell\n",
"python train_supervised_bert.py \\\n",
" --pretrain_path HooshvareLab/bert-base-parsbert-uncased \\\n",
" --dataset none \\\n",
" --train_file ./Perlex/Perlex_train.txt \\\n",
" --val_file ./Perlex/Perlex_val.txt \\\n",
" --test_file ./Perlex/Perlex_test.txt \\\n",
" --rel2id_file ./Perlex/Perlex_rel2id.json \\\n",
" --max_epoch 20 \\\n",
" --lr 15e-6"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "bcdc6f55-f9b9-40b0-a661-777145267ede",
"id": "zUa7CDEeFBGJ"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"100% 242/242 [05:38<00:00, 1.40s/it, acc=0.444, loss=1.84]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.704]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.733, loss=0.842]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.753]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.871, loss=0.432]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.764]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.942, loss=0.208]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.747]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.979, loss=0.0991]\n",
"100% 47/47 [00:25<00:00, 1.81it/s, acc=0.75]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.991, loss=0.0488]\n",
"100% 47/47 [00:25<00:00, 1.81it/s, acc=0.737]\n",
"100% 242/242 [05:46<00:00, 1.43s/it, acc=0.998, loss=0.0243]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.746]\n",
"100% 242/242 [05:45<00:00, 1.43s/it, acc=0.999, loss=0.0157]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.747]\n",
"100% 242/242 [05:45<00:00, 1.43s/it, acc=1, loss=0.0098]\n",
"100% 47/47 [00:25<00:00, 1.83it/s, acc=0.753]\n",
"100% 242/242 [05:45<00:00, 1.43s/it, acc=0.999, loss=0.00806]\n",
"100% 47/47 [00:25<00:00, 1.82it/s, acc=0.751]\n",
" 65% 157/242 [03:45<02:01, 1.43s/it, acc=0.999, loss=0.00759]"
]
}
]
},
{
"cell_type": "code",
"source": [
"%%shell\n",
"python train_supervised_bert.py \\\n",
" --pretrain_path HooshvareLab/bert-base-parsbert-uncased \\\n",
" --dataset none \\\n",
" --train_file ./Perlex/Perlex_train.txt \\\n",
" --val_file ./Perlex/Perlex_val.txt \\\n",
" --test_file ./Perlex/Perlex_test.txt \\\n",
" --rel2id_file ./Perlex/Perlex_rel2id.json \\\n",
" --max_epoch 20 \\\n",
" --lr 15e-6"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "spUwXoWnPchu",
"outputId": "b1b389c8-c1b5-41bf-ee81-7dfc2cb5f8a0"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Downloading: 100% 654M/654M [00:09<00:00, 66.7MB/s]\n",
"Downloading: 100% 1.22M/1.22M [00:01<00:00, 872kB/s]\n",
"100% 242/242 [05:18<00:00, 1.32s/it, acc=0.444, loss=1.84]\n",
"100% 47/47 [00:23<00:00, 1.99it/s, acc=0.704]\n",
"100% 242/242 [05:29<00:00, 1.36s/it, acc=0.733, loss=0.842]\n",
"100% 47/47 [00:23<00:00, 1.98it/s, acc=0.753]\n",
"100% 242/242 [05:29<00:00, 1.36s/it, acc=0.871, loss=0.432]\n",
"100% 47/47 [00:23<00:00, 1.99it/s, acc=0.764]\n",
"100% 242/242 [05:29<00:00, 1.36s/it, acc=0.942, loss=0.208]\n",
"100% 47/47 [00:23<00:00, 1.99it/s, acc=0.747]\n",
"100% 242/242 [05:28<00:00, 1.36s/it, acc=0.979, loss=0.0991]\n",
"100% 47/47 [00:23<00:00, 1.99it/s, acc=0.75]\n",
"100% 242/242 [05:29<00:00, 1.36s/it, acc=0.991, loss=0.0488]\n",
"100% 47/47 [00:23<00:00, 2.00it/s, acc=0.737]\n",
" 31% 75/242 [01:43<03:47, 1.36s/it, acc=0.998, loss=0.0257]"
]
}
]
},
{
"cell_type": "code",
"source": [
"%%shell\n",
"python train_supervised_bert.py \\\n",
" --pretrain_path HooshvareLab/bert-base-parsbert-uncased \\\n",
" --dataset none \\\n",
" --train_file ./Perlex/Perlex_train.txt \\\n",
" --val_file ./Perlex/Perlex_val.txt \\\n",
" --test_file ./Perlex/Perlex_test.txt \\\n",
" --rel2id_file ./Perlex/Perlex_rel2id.json \\\n",
" --max_epoch 20 \\\n",
" --lr 15e-6"
],
"metadata": {
"id": "_lWuRi4EuuV4",
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"status": "ok",
"timestamp": 1668347083563,
"user_tz": -210,
"elapsed": 441647,
"user": {
"displayName": "Mohammad Ebrahimi",
"userId": "10407139745331958037"
}
},
"outputId": "4080fb9e-cf2c-498c-a412-a8ba1b44ad74"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"100% 101/101 [02:00<00:00, 1.20s/it, acc=0.278, loss=2.4]\n",
"100% 25/25 [00:12<00:00, 2.06it/s, acc=0.541]\n",
"100% 101/101 [02:11<00:00, 1.30s/it, acc=0.667, loss=1.15]\n",
"100% 25/25 [00:12<00:00, 1.97it/s, acc=0.615]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.784, loss=0.721]\n",
"100% 25/25 [00:12<00:00, 1.99it/s, acc=0.66]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.879, loss=0.422]\n",
"100% 25/25 [00:12<00:00, 2.00it/s, acc=0.656]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.947, loss=0.209]\n",
"100% 25/25 [00:12<00:00, 1.98it/s, acc=0.659]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.981, loss=0.0976]\n",
"100% 25/25 [00:12<00:00, 1.97it/s, acc=0.655]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.99, loss=0.0528]\n",
"100% 25/25 [00:12<00:00, 1.98it/s, acc=0.655]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.997, loss=0.0308]\n",
"100% 25/25 [00:12<00:00, 1.96it/s, acc=0.659]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.998, loss=0.0199]\n",
"100% 25/25 [00:12<00:00, 1.98it/s, acc=0.657]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.999, loss=0.0145]\n",
"100% 25/25 [00:12<00:00, 1.93it/s, acc=0.659]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=1, loss=0.0111]\n",
"100% 25/25 [00:12<00:00, 1.97it/s, acc=0.653]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=1, loss=0.00921]\n",
"100% 25/25 [00:12<00:00, 1.98it/s, acc=0.658]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=1, loss=0.00797]\n",
"100% 25/25 [00:13<00:00, 1.89it/s, acc=0.659]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=1, loss=0.00623]\n",
"100% 25/25 [00:12<00:00, 1.97it/s, acc=0.655]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=1, loss=0.00641]\n",
"100% 25/25 [00:12<00:00, 1.97it/s, acc=0.65]\n",
"100% 101/101 [02:11<00:00, 1.31s/it, acc=1, loss=0.00551]\n",
"100% 25/25 [00:12<00:00, 1.99it/s, acc=0.655]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=1, loss=0.00564]\n",
"100% 25/25 [00:12<00:00, 1.99it/s, acc=0.655]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=1, loss=0.00474]\n",
"100% 25/25 [00:12<00:00, 1.93it/s, acc=0.654]\n",
"100% 101/101 [02:12<00:00, 1.31s/it, acc=0.999, loss=0.00528]\n",
"100% 25/25 [00:12<00:00, 1.95it/s, acc=0.655]\n",
"100% 101/101 [02:11<00:00, 1.31s/it, acc=1, loss=0.00506]\n",
"100% 25/25 [00:12<00:00, 1.98it/s, acc=0.653]\n",
"100% 43/43 [00:21<00:00, 2.04it/s, acc=0.725]\n",
"Test set results:\n",
"Accuracy: 0.7245949926362297\n",
"Micro precision: 0.7602262837249782\n",
"Micro recall: 0.7723253757736517\n",
"Micro F1: 0.7662280701754387\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": []
},
"metadata": {},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "K8ArBYUg1gmV"
},
"execution_count": null,
"outputs": []
}
]
}