{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Exercises\n", "#### String Manipulation" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "import re\n", "import requests\n", "from bs4 import BeautifulSoup" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "page = requests.get(\"http://www.vatican.va/archive/bible/genesis/documents/bible_genesis_en.html\")\n", "soup = BeautifulSoup(page.content, 'html.parser')\n", "\n", "for script in soup([\"script\", \"style\"]):\n", " script.decompose()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "genesis = soup.get_text()\n", "genesis = re.sub(r'[\\n]+','\\n', genesis)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "The Book of Genesis\n", "THE BOOK\n", " OF GENESIS\n", "1, 2,\n", "3, 4, 5,\n", "6, 7, 8,\n", "9, 10, 11,\n", "12, 13, 14,\n", "15, 16, 17,\n", "18, 19, 20,\n", "21, 22, 23,\n", "24, 25, 26,\n", "27, 28, 29,\n", "30, 31, 32,\n", "33, 34, 35,\n", "36, 37, 38,\n", "39, 40, 41,\n", "42, 43, 44,\n", "45, 46, 47,\n", "48, 49, 50 \n", "Chapter 1\n", "[1:1] In the beginning when God created\n" ] } ], "source": [ "print(genesis[0:300])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Questions:\n", "\n", "\n", "#### 1. How many times the word 'God' appear (as an isolated word)? " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2. What are the 5 most common words? " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 3. What are the words that appear only once ([hapaxes](https://en.wikipedia.org/wiki/Hapax_legomenon))? " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }