{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "import requests\n", "from bs4 import BeautifulSoup\n", "from pathlib import Path\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "this_dir_path = Path().resolve()\n", "data_path = this_dir_path / \"Data\"\n", "input_verb_path = data_path / \"verbs_noor.txt\"\n", "htmls_path = data_path / \"htmls_noor\"\n", "output_verb_path = data_path / \"verbs_noor.json\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open(input_verb_path, \"r\") as f:\n", " verbs = [verb[:-1] for verb in f.readlines()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 3/3 [00:01<00:00, 2.62it/s]\n" ] } ], "source": [ "for verb in tqdm(verbs):\n", " response = requests.get(\"http://search.dadegan.ir/\", params={\"q\": verb})\n", " with open(htmls_path / (verb + \".html\"), \"w\") as f:\n", " f.write(response.text)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 4167/4167 [00:22<00:00, 186.02it/s]\n" ] } ], "source": [ "result = {}\n", "\n", "for verb in tqdm(verbs):\n", " with open(htmls_path / (verb + \".html\"), \"r\") as f:\n", " text = f.read()\n", " soup = BeautifulSoup(text, \"html.parser\")\n", "\n", " stems = soup.findAll(\"td\", {\"class\": \"c3\"})[1:]\n", " past_stem = stems[0].text\n", " present_stem = stems[1].text\n", "\n", " result[verb] = {}\n", "\n", " result[verb][\"past_stem\"] = past_stem\n", " result[verb][\"present_stem\"] = present_stem" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.15" } }, "nbformat": 4, "nbformat_minor": 2 }