diff --git a/Assignments/EN/Assignment_5.ipynb b/Assignments/EN/Assignment_5.ipynb index f842b189..31406937 100644 --- a/Assignments/EN/Assignment_5.ipynb +++ b/Assignments/EN/Assignment_5.ipynb @@ -31,8 +31,10 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, + "execution_count": 157, + "metadata": { + "scrolled": false + }, "outputs": [ { "name": "stdout", @@ -100,35 +102,147 @@ "#### 1. Count words in a text\n", "\n", "a. Output a list of words in the file along with their frequency counts (ignoring case). \n", - "a. Count how many unique words there are (ignoring case). \n", + "b. Count how many unique words there are (ignoring case). \n", "c. Check how common are all different sequences of vowels (e.g. the sequences \"ieu\" or just \"e\" in \"lieutenant\")?" ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], + "execution_count": 158, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the 22\n", + "and 13\n", + "in 12\n", + "of 11\n", + "to 11\n", + "a 8\n", + "that 7\n", + "on 5\n", + "projects 5\n", + "from 5\n", + "kbr 4\n", + "said 4\n", + "could 4\n", + "is 4\n", + "customers 4\n", + "or 4\n", + "are 4\n", + "he 4\n", + "much 4\n", + "cents 4\n", + "oil 4\n", + "have 4\n", + "so 3\n", + "has 3\n", + "business 3\n" + ] + } + ], "source": [ - "# a)" + "# a)\n", + "\n", + "text_lc = text.lower()\n", + "words_lc = text_lc.split()\n", + "word_counts = {}\n", + "\n", + "for word in words_lc[:500]:\n", + " if word in word_counts:\n", + " word_counts[word] += 1\n", + " else:\n", + " word_counts[word] = 1\n", + "\n", + "word_counts_list = sorted(word_counts.items(), key=lambda x: x[1], reverse=True)\n", + "\n", + "for word, count in word_counts_list[:25]:\n", + " print(word, count)" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 159, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "326\n" + ] + } + ], "source": [ - "# b)" + "# b)\n", + "\n", + "unique_words = set()\n", + "\n", + "for word in words[:500]:\n", + " unique_words.add(word)\n", + "\n", + "num_unique_words = len(unique_words)\n", + "\n", + "print(num_unique_words)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 149, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a 142\n", + "ai 12\n", + "au 2\n", + "e 277\n", + "ea 12\n", + "ee 6\n", + "ei 1\n", + "eo 1\n", + "i 145\n", + "ia 6\n", + "ie 4\n", + "io 15\n", + "o 128\n", + "oi 6\n", + "oo 5\n", + "ou 13\n", + "u 64\n", + "ua 3\n", + "ue 1\n", + "ui 3\n" + ] + } + ], "source": [ - "# c)" + "# c)\n", + "\n", + "import re\n", + "\n", + "vowel_regex = re.compile('[aeiou]+', re.IGNORECASE)\n", + "vowel_counts = {}\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " words = f.read().split()[:500]\n", + " data = ' '.join(words)\n", + "\n", + "for match in vowel_regex.findall(data):\n", + " vowel_sequence = match.lower()\n", + " if vowel_sequence in vowel_counts:\n", + " vowel_counts[vowel_sequence] += 1\n", + " else:\n", + " vowel_counts[vowel_sequence] = 1\n", + "\n", + "for vowel_sequence, count in sorted(vowel_counts.items()):\n", + " print(vowel_sequence, count)" ] }, { @@ -145,38 +259,137 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 150, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\"With the economic outlook remaining uncertain, it is possible\n", + "could be in jeopardy if the headwinds persist into next year.\n", + "customers may cancel or delay projects that are under way,\" said\n", + "had\n", + "KBR said Friday the global economic downturn so far has\n", + "little effect on its business but warned some projects on its books\n", + "that\n", + "Utt, chief executive of the Houston-based engineering and\n", + "William\n" + ] + } + ], "source": [ - "# a)" + "# a)\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " lines = f.readlines()[:10] # read first 10 lines\n", + "\n", + "alphabetically_sorted_lines = sorted(lines, key=lambda x: x.lower()[0])\n", + "\n", + "for line in alphabetically_sorted_lines:\n", + " print(line.strip())" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 151, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\"With the economic outlook remaining uncertain, it is possible\n", + "KBR said Friday the global economic downturn so far has\n", + "Utt, chief executive of the Houston-based engineering and\n", + "William\n", + "could be in jeopardy if the headwinds persist into next year.\n", + "customers may cancel or delay projects that are under way,\" said\n", + "had\n", + "little effect on its business but warned some projects on its books\n", + "that\n" + ] + } + ], "source": [ - "# b)" + "# b)\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " lines = f.readlines()[:10]\n", + " \n", + "numerically_sorted_lines = sorted(lines, key=lambda line: line[0])\n", + "\n", + "for line in numerically_sorted_lines:\n", + " print(line.strip())" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 154, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "William\n", + "Utt, chief executive of the Houston-based engineering and\n", + "that\n", + "little effect on its business but warned some projects on its books\n", + "KBR said Friday the global economic downturn so far has\n", + "had\n", + "could be in jeopardy if the headwinds persist into next year.\n", + "customers may cancel or delay projects that are under way,\" said\n", + "\"With the economic outlook remaining uncertain, it is possible\n", + "\n" + ] + } + ], "source": [ - "# c)" + "# c)\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " lines = f.readlines()[:10]\n", + "\n", + "alphabetically_sorted_lines_reverse = sorted(lines, key=lambda x: x.lower()[0], reverse=True)\n", + "\n", + "for line in alphabetically_sorted_lines_reverse:\n", + " print(line.strip())" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 153, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "that\n", + "little effect on its business but warned some projects on its books\n", + "had\n", + "could be in jeopardy if the headwinds persist into next year.\n", + "customers may cancel or delay projects that are under way,\" said\n", + "William\n", + "Utt, chief executive of the Houston-based engineering and\n", + "KBR said Friday the global economic downturn so far has\n", + "\"With the economic outlook remaining uncertain, it is possible\n", + "\n" + ] + } + ], "source": [ - "# d)" + "# d)\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " lines = f.readlines()[:10]\n", + "\n", + "numerically_sorted_lines_reverse = sorted(lines, key=lambda line: line[0], reverse=True)\n", + "\n", + "for line in numerically_sorted_lines_reverse:\n", + " print(line.strip())" ] }, { @@ -196,61 +409,233 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the 22\n", + "and 13\n", + "in 12\n", + "of 11\n", + "to 11\n", + "a 8\n", + "that 7\n", + "on 5\n", + "projects 5\n", + "from 5\n", + "kbr 4\n", + "said 4\n", + "could 4\n", + "is 4\n", + "customers 4\n", + "or 4\n", + "are 4\n", + "he 4\n", + "much 4\n", + "cents 4\n", + "oil 4\n", + "have 4\n", + "so 3\n", + "has 3\n", + "business 3\n", + "but 3\n", + "be 3\n", + "into 3\n", + "which 3\n", + "industry 3\n", + "more 3\n", + "prices 3\n", + "economic 2\n", + "its 2\n", + "if 2\n", + "next 2\n", + "it 2\n", + "delay 2\n", + "utt, 2\n", + "engineering 2\n", + "government 2\n", + "company's 2\n", + "kbr's 2\n", + "third-quarter 2\n", + "financial 2\n", + "period 2\n", + "2007. 2\n", + "co. 2\n", + "year, 2\n", + "analyst 2\n" + ] + } + ], "source": [ - "# a)" + "# a)\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " text = f.read().lower()\n", + "\n", + "words = text.split()\n", + "word_freq = {}\n", + "\n", + "for word in words[:500]:\n", + " if word in word_freq:\n", + " word_freq[word] += 1\n", + " else:\n", + " word_freq[word] = 1\n", + "\n", + "most_common = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:50]\n", + "\n", + "for word, count in most_common:\n", + " print(word, count)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'buzz', 'pizazz', 'jazz'}\n" + ] + } + ], "source": [ - "# b)" + "# b)\n", + "\n", + "text_lc = text.lower()\n", + "words = text_lc.split()\n", + "\n", + "unique_words_ending_in_zz = set()\n", + "\n", + "for word in words:\n", + " if word.lower().endswith(\"zz\"):\n", + " unique_words_ending_in_zz.add(word)\n", + "\n", + "print(unique_words_ending_in_zz)\n" ] }, { "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], + "execution_count": 35, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Lines: 70335\n", + "Words: 509851\n", + "Characters: 3052299\n" + ] + } + ], "source": [ - "# c)" + "# c)\n", + "\n", + "number_lines = text.count(\"\\n\") + 1\n", + "number_words = len(text.split())\n", + "number_characters = len(text)\n", + "\n", + "print(f\"Lines: {number_lines}\")\n", + "print(f\"Words: {number_words}\")\n", + "print(f\"Characters: {number_characters}\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 31, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6952\n" + ] + } + ], "source": [ - "# d)" + "# d)\n", + "\n", + "uppercase_counter = 0\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " for line in f:\n", + " uppercase_words = line.strip().split()\n", + " for word in uppercase_words:\n", + " if word.isupper():\n", + " uppercase_counter += 1\n", + "\n", + "print(uppercase_counter)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "79536\n" + ] + } + ], "source": [ - "# e)" + "# e)\n", + "\n", + "text_lc = text.lower()\n", + "words = text_lc.split()\n", + "four_letter_count = 0\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " for word in words:\n", + " if len(word) == 4:\n", + " four_letter_count += 1\n", + "\n", + "print(four_letter_count)" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3262\n" + ] + } + ], "source": [ - "# f)" + "# f)\n", + "\n", + "no_vowels_set = set()\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " for line in f:\n", + " words = line.strip().split()\n", + " for word in words:\n", + " if all(char not in 'aeiouAEIOU' for char in word):\n", + " no_vowels_set.add(word)\n", + "\n", + "no_vowels_count = len(no_vowels_set)\n", + "\n", + "print(no_vowels_count)" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -269,20 +654,30 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 168, "metadata": {}, - "outputs": [], - "source": [ - "# a)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(('of', 'the'), 3091), (('in', 'the'), 2499), (('to', 'the'), 1171), (('on', 'the'), 1082), (('for', 'the'), 882), (('and', 'the'), 803), (('in', 'a'), 751), (('to', 'be'), 709), (('at', 'the'), 675), (('with', 'the'), 561)]\n" + ] + } + ], "source": [ - "# b)" + "# a)\n", + "\n", + "def find_bigrams(input_list):\n", + " return zip(input_list, input_list[1:])\n", + "\n", + "with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " content = f.read()\n", + " words = content.split()\n", + " bigrams = find_bigrams(words)\n", + " bigram_counts = Counter(bigrams)\n", + " top_10_bigrams = bigram_counts.most_common(10)\n", + " print(top_10_bigrams)" ] }, { @@ -298,11 +693,220 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 57, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Line 1: KBR said Friday the global economic downturn so far has\n", + "Line 6: \"With the economic outlook remaining uncertain, it is\n", + "Line 148: KBR said Friday the global economic downturn so far has\n", + "Line 153: \"With the economic outlook remaining uncertain, it is\n", + "Line 867: the new world economic disorder has been whether sovereign\n", + "Line 906: Britain, who has written an economic history of Abu\n", + "Line 1367: have more economic independence. From her very first\n", + "Line 1398: How do you deal with economic uncertainty in emerging\n", + "Line 2232: spur economic growth. \"Any use of these\n", + "Line 3587: economic issues in the Southland.\n", + "Line 3667: economic reasons - realizing that maybe\n", + "Line 5029: economic issues in the Southland.\n", + "Line 5109: economic reasons -- realizing that maybe\n", + "Line 5301: through the weighty economic theories concerning this latest\n", + "Line 5378: The world is in economic turmoil -- much of it\n", + "Line 5818: more swayed by economic and bread-and-butter issues,\" said Amandi.\n", + "Line 6452: No one disputes that Iceland's economic troubles are\n", + "Line 6473: In a volatile economic climate, in which appearance matters\n", + "Line 6619: describes himself as an economic conservative and said he had\n", + "Line 6927: decades since then, despite periodic economic\n", + "Line 7197: Political and Economic Studies in Washington, said the\n", + "Line 7362: economic turmoil. Far from being isolated\n", + "Line 7376: could freeze Germany's economic system.\n", + "Line 8949: takes on both economic and foreign affairs. What we\n", + "Line 9109: As serious as Iceland's economic situation already was,\n", + "Line 9507: moderate -- and the mounting economic crisis.\n", + "Line 9777: energetically on an economic theme, telling audiences that\n", + "Line 9786: and voters, the shift toward economic\n", + "Line 9914: the only economic bright spot (of sorts) in\n", + "Line 10042: economic turmoil. Far from being isolated\n", + "Line 10056: could freeze Germany's economic system.\n", + "Line 10177: field, but also in the economic and financial fields and especially\n", + "Line 10195: provides to Iraq in security, economic and\n", + "Line 10250: field, but also in the economic and financial fields and especially\n", + "Line 10746: decades since then, despite periodic economic\n", + "Line 10926: describes himself as an economic conservative and said he had\n", + "Line 11566: also that the economic and other crises don't overwhelm\n", + "Line 12113: economic incentive, said Ryan Lamppa, a\n", + "Line 12443: high-end collectibles may withstand the economic\n", + "Line 12739: is best able to handle economic issues.\n", + "Line 12986: the economic downturn. The attendance at Atlanta\n", + "Line 13251: is best able to handle economic issues.\n", + "Line 13766: After eight years of economic policies custom-made for the hedge\n", + "Line 13767: is great to hear the economic interests of the middle\n", + "Line 15238: conference call to extol McCain's economic plans, Sen. Mel\n", + "Line 15765: agility. But facing the worst economic crisis since\n", + "Line 16248: get a two-fer: George Bush's economic policy and Dick Cheney's\n", + "Line 16376: Although McCain's proposed economic plan offers greater tax\n", + "Line 16695: Pressing economic demands will land on the\n", + "Line 16705: \"McCain could not govern on economic or other issues from the\n", + "Line 16730: and yet can't do big economic things because\n", + "Line 17206: appointments were related to economic policy, said Martha Joynt\n", + "Line 17285: been through three years of economic\n", + "Line 17750: phenomenon and the economic crisis that sent McCain's campaign\n", + "Line 18705: most voters, economic concerns fill that slot for\n", + "Line 18791: Congress reconvenes to craft another economic stimulus package. In\n", + "Line 18798: treasury -- (and) appointing an economic\n", + "Line 18917: are deeply concerned about the economic crisis. But what's\n", + "Line 18951: the issue to economic troubles and national security concerns.\n", + "Line 18978: \"Cheap labor is economic cocaine for the business community,\"\n", + "Line 18990: the hardest hit by the economic\n", + "Line 20321: economic crisis, measuring the seconds required\n", + "Line 20900: and economic experts simply \"did not forecast\"\n", + "Line 20907: economic disaster. It's just that the\n", + "Line 20933: home prices and economic confidence, and tended to come\n", + "Line 20956: economic advisory panel of the Federal\n", + "Line 21002: economic events. Behavioral economists are still\n", + "Line 21043: consumers are weighed down with economic worries,\n", + "Line 21409: time. During periods of economic growth, such inefficiencies are\n", + "Line 21420: economic crisis. That's the reason I\n", + "Line 21493: was shriveling the economic outlook.\n", + "Line 21512: The current economic downturn comes as no surprise\n", + "Line 21517: \"Creativity doesn't care about economic downturns,\" Lieberman\n", + "Line 21519: economic downturn, both Apple and Microsoft\n", + "Line 22822: election campaign now dominated by economic\n", + "Line 22927: economic development officials scrambled to attract\n", + "Line 25019: first election during a deep economic crisis since\n", + "Line 25025: during economic slumps and only once, in\n", + "Line 25031: severe economic downturns that affect millions of\n", + "Line 25060: unpopular war, a severe economic crisis and it's unusual for\n", + "Line 26017: site. Other factors included appealing economic terms, access\n", + "Line 28569: economic\n", + "Line 28574: approach that chokes off economic growth.\n", + "Line 28597: noted that Americans want more economic\n", + "Line 28621: the economic uncertainty will lead to a\n", + "Line 28821: middle-class Republicans question their party's economic\n", + "Line 29165: economic\n", + "Line 29247: foreign rivals and the economic downturn now threatening the\n", + "Line 29254: economic engine for the country is\n", + "Line 30076: as tough economic times make it difficult to\n", + "Line 30274: as well as the current economic\n", + "Line 30293: four-decades-old economic embargo. McCain says he would\n", + "Line 31014: the brush fires of the economic crisis.\n", + "Line 31045: an economic stimulus plan for Germany that\n", + "Line 31068: the economic crisis was raising the risk\n", + "Line 31072: the government's need to promote economic growth was\n", + "Line 31667: get blamed for the world's economic\n", + "Line 31957: ECONOMIC SIGNALS\n", + "Line 32589: have contributed to the nation's economic\n", + "Line 32635: East Lansing-based Anderson Economic Group. Senior consultant Ilhan\n", + "Line 32766: economic ruin.\n", + "Line 33279: the evidence,\" arguing that Obama's economic proposals were far\n", + "Line 34060: Trying to capitalize on economic uncertainty, House Democrats\n", + "Line 34084: emphasizing Republican culpability for the economic decline, a\n", + "Line 34119: president and an economic collapse.\n", + "Line 35151: As the economic downturn forces more workers like\n", + "Line 36690: would continue his foreign and economic policies. He\n", + "Line 37376: would continue his foreign and economic policies. He\n", + "Line 37633: on Nov. 14. Given the economic downturn, the election is ending\n", + "Line 37664: the brush fires of the economic crisis.\n", + "Line 37698: an economic stimulus plan for Germany that\n", + "Line 37722: the economic crisis was raising the risk\n", + "Line 37726: the government's need to promote economic growth was\n", + "Line 39448: deftness, Republican missteps and the economic crisis.\n", + "Line 39500: President Bush's unpopularity in threatening economic times\n", + "Line 40508: economic powers need to air their\n", + "Line 40708: Given the global economic meltdown and other crises, it\n", + "Line 42209: But they no longer make economic sense flying\n", + "Line 44382: reaching agreements on transportation and economic deals.\n", + "Line 44408: economic performance has been lackluster, and\n", + "Line 44438: discuss economic cooperation as a way to\n", + "Line 44658: tightened credit markets and an economic slowdown\n", + "Line 44662: a year ago amid \"an economic gauntlet, the likes of\n", + "Line 44739: dislocation, but also from the economic downturn,\" Mamoun Tazi, a\n", + "Line 44836: Weak economic data in Australia were also\n", + "Line 45212: and toward the stability of economic\n", + "Line 45215: number of households falling into economic jeopardy\n", + "Line 45812: problems, particularly during tough economic times when the\n", + "Line 47417: The reality of the economic game is that enormous changes\n", + "Line 47418: happening. When the senators plotted economic strategy, no one\n", + "Line 48536: found that during recessions or economic\n", + "Line 48544: to help stem the local economic\n", + "Line 49666: and an economic slowdown kept consumers away from\n", + "Line 53046: juggernaut of urbanization shifts the economic and demographic\n", + "Line 53146: welcomed the new economic connections. \"It can only help\n", + "Line 53454: use part of their $600 economic stimulus check to\n", + "Line 54112: and an economic slowdown kept consumers away from\n", + "Line 54426: been hit by a sharp economic downturn. And the troubles\n", + "Line 54432: economic times the chill is shocking.\n", + "Line 54451: Madrid, the government announced an economic stimulus program\n", + "Line 54453: summer, the nation's first quarterly economic\n", + "Line 54517: something of an economic miracle for this region, which\n", + "Line 54561: But Alberto Larraz, Aragon's economic minister, said he expected\n", + "Line 54907: higher fuel costs and the economic\n", + "Line 54926: economic slowdown has lowered demand for\n", + "Line 55161: \"continue to shoulder substantial economic burdens.\"\n", + "Line 55185: commensurate with its economic importance, its leadership cannot be\n", + "Line 55805: The economic crisis, for all its pain,\n", + "Line 55811: about the economic crisis on Friday night, she\n", + "Line 55843: American and a woman, an economic\n", + "Line 56696: interaction -- the intercourse of economic life -- had nearly\n", + "Line 56733: any kind of social and economic interchange is\n", + "Line 56737: the brink of the modern economic world the issues become even\n", + "Line 56882: Baumohl, managing director of the Economic Outlook Group, wrote in\n", + "Line 59814: economic era, a political era and\n", + "Line 60077: markets suffering because of serious economic\n", + "Line 60087: The nation's economic problems did not disappear by\n", + "Line 60106: still be looking at real economic problems\n", + "Line 60128: economic ills, people will rally behind\n", + "Line 60192: departed from Washington under ideal economic circumstances.\n", + "Line 60219: are bracing for a worsening economic downturn.\n", + "Line 60229: prepared for a fairly dramatic economic slowdown,\" said\n", + "Line 60255: fallout from an economic crisis originating in the housing\n", + "Line 60393: Weak economic data in Australia were also\n", + "Line 61305: during one of the worst economic conflagrations in a\n", + "Line 61352: economic policy during perilous financial times.\n", + "Line 61354: a renewed battle over which economic\n", + "Line 61513: Weak economic data in Australia were also\n", + "Line 61536: to keep up on global economic affairs. \"After the safety\n", + "Line 61710: economic turmoil.\n", + "Line 61773: economic conditions into their new-car orders.\n", + "Line 63754: their budget even with the economic\n", + "Line 64974: economic issues with his Taiwanese counterpart,\n", + "Line 65002: economic performance has been lackluster, and\n", + "Line 65028: discuss economic cooperation as a way to\n", + "Line 65688: in the middle of an economic downturn.\n", + "Line 65693: The economic downturn is making life tougher\n", + "Line 65727: deteriorating economic outlook and the banks' more\n", + "Line 65769: economic competence between Britain's two major\n", + "Line 65794: cope with \"the economic downturn made in Britain and\n", + "Line 67315: to cope with a growing economic\n", + "Line 67438: as both struggle to overcome economic slowdowns.\n", + "Line 67453: Taiwan's economic growth has lagged behind China's\n", + "Line 67516: channels for cross-straits economic exchanges,\" Chen said on\n", + "Line 68149: economic meltdown, the course of the\n", + "Line 68190: fiscal crisis created by the economic meltdown, then, the\n", + "Line 69164: economic crisis. By Nazila Fathi.\n", + "Line 69171: ECON-RISK (Undated) -- Today's economic turmoil, it seems, is an\n" + ] + } + ], "source": [ - "# a)" + "# a)\n", + "\n", + "def concordance_display(file_path, word):\n", + " with open(\"../../Data/txt/nyt_200811.txt\", \"r\") as f:\n", + " lines = f.readlines()\n", + " for i, line in enumerate(lines):\n", + " words = line.strip().split()\n", + " for j, w in enumerate(words):\n", + " if w.lower() == word.lower():\n", + " start = max(j-5, 0)\n", + " end = min(j+6, len(words))\n", + " context = ' '.join(words[start:end])\n", + " print(f\"Line {i+1}: {context}\")\n", + "\n", + "concordance_display('example.txt', 'economic')" ] }, { @@ -334,10 +938,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 93, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the\n", + "best\n" + ] + } + ], + "source": [ + "# a)\n", + "\n", + "with open(\"../../Data/txt/secret_ec.txt\", \"r\") as secret:\n", + " secret_text = secret.read()\n", + " \n", + "e_words = re.findall(r\"\\b\\w*e\\w*\\b\", secret_text)\n", + "\n", + "word_freq = Counter(e_words)\n", + "\n", + "most_common = word_freq.most_common(2)\n", + "\n", + "for word, count in most_common:\n", + " print(f\"{word}\")" + ] }, { "cell_type": "markdown", @@ -350,10 +977,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 167, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(('part', 'about'), 6), (('unix', 'is'), 4)]\n" + ] + } + ], + "source": [ + "# b)\n", + "\n", + "def find_new_bigrams(words):\n", + " bigrams = []\n", + " for i in range(len(words)-1):\n", + " if words[i+1][-1] in string.ascii_lowercase.translate(str.maketrans('', '', 'aeiou')):\n", + " bigrams.append((words[i], words[i+1]))\n", + " return bigrams\n", + "\n", + "with open(\"../../Data/txt/secret_ec.txt\", \"r\") as secret:\n", + " words = secret.read().lower().split()\n", + "\n", + "bigram_counts = Counter(find_new_bigrams(words))\n", + "common_bigrams = bigram_counts.most_common(2)\n", + "print(common_bigrams)" + ] }, { "cell_type": "markdown", @@ -368,10 +1019,61 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 78, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['every', 'thing']\n" + ] + } + ], + "source": [ + "with open(\"../../Data/txt/secret_ec.txt\", \"r\") as secret:\n", + " words = Counter(secret.read().split())\n", + " five_letter_words = [word for word, count in words.items() if len(word) == 5 and count == 1]\n", + " print(five_letter_words)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('the', 22), ('best', 11)]\n", + "[(('part', 'about'), 6), (('unix', 'is'), 4)]\n", + "['every', 'thing']\n" + ] + } + ], + "source": [ + " print(most_common_words)\n", + " print(most_common_bigrams)\n", + " print(five_letter_words)" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This is the secret message: \"The best part about unix is everything\".\n" + ] + } + ], + "source": [ + "print('This is the secret message: \"The best part about unix is everything\".')" + ] } ], "metadata": { @@ -390,7 +1092,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.9.13" } }, "nbformat": 4, diff --git a/Assignments/EN/Assignment_6.ipynb b/Assignments/EN/Assignment_6.ipynb index effa8fa8..7a826e6c 100644 --- a/Assignments/EN/Assignment_6.ipynb +++ b/Assignments/EN/Assignment_6.ipynb @@ -39,29 +39,102 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ - "# a)" + "# a)\n", + "\n", + "import numpy as np\n", + "\n", + "def biased_dice():\n", + " probabilities = np.arange(1, 9)\n", + " probabilities = probabilities / probabilities.sum()\n", + " while True:\n", + " yield np.random.choice(8, p=probabilities) + 1" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAGwCAYAAABIC3rIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAob0lEQVR4nO3dfXSU9Z3//9eYhAmEMJpAMmQZIspdIAEFXAhaA+U2GtDCEZWWBaGoKwIxUBXYXVOPBnUrsAstFQ4CGhDPVqF0qxFoBaTcR1IB80WUtBNqQgyGCYEwwXD9/vDnnI0BRJzhSubzfJxzneNc88nk/ak9h6fXXDM4LMuyBAAAYLDr7B4AAADAbgQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIwXafcAzcWFCxf0+eefKzY2Vg6Hw+5xAADAFbAsS6dPn1ZSUpKuu+7S14EIoiv0+eefy+Px2D0GAAC4CqWlperQocMlnyeIrlBsbKykr/8HbdOmjc3TAACAK1FdXS2PxxP4c/xSCKIr9M3bZG3atCGIAABoZr7rdhduqgYAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEi7R4AAAB8N6/Xq8rKSrvHCJm2bduqY8eOtv1+gggAgCbO6/WqW/cUnas9a/coIRPdspWO/L9i26KIIAIAoImrrKzUudqzis+apah4j93jBN35k6U6+b8vq7KykiACAACXFxXvkdPd2e4xwhI3VQMAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwnq1BNH/+fN12222KjY1VQkKC7r33Xh05cqTBGsuylJubq6SkJLVs2VKDBg3S4cOHG6zx+/2aPn262rZtq5iYGI0ePVrHjx9vsKaqqkoTJkyQy+WSy+XShAkTdOrUqVBvEQAANAO2BtG2bds0bdo07d69W5s3b9ZXX32l4cOH68yZM4E1L730khYsWKAlS5Zo3759crvdGjZsmE6fPh1Yk52drfXr12vdunXasWOHampqlJWVpfr6+sCa8ePHq6ioSAUFBSooKFBRUZEmTJhwTfcLAACapkg7f3lBQUGDxytXrlRCQoIKCwt15513yrIsLVq0SPPmzdOYMWMkSatXr1ZiYqLWrl2rRx55RD6fTytWrNDrr7+uoUOHSpLy8/Pl8Xi0ZcsWjRgxQsXFxSooKNDu3bvVv39/SdLy5cuVnp6uI0eOqFu3btd24wAAoElpUvcQ+Xw+SVJcXJwkqaSkROXl5Ro+fHhgjdPpVEZGhnbu3ClJKiws1Pnz5xusSUpKUmpqamDNrl275HK5AjEkSQMGDJDL5Qqs+Ta/36/q6uoGBwAACE9NJogsy1JOTo7uuOMOpaamSpLKy8slSYmJiQ3WJiYmBp4rLy9XixYtdMMNN1x2TUJCQqPfmZCQEFjzbfPnzw/cb+RyueTxeH7YBgEAQJPVZILo8ccf10cffaQ33nij0XMOh6PBY8uyGp37tm+vudj6y73OnDlz5PP5AkdpaemVbAMAADRDTSKIpk+fro0bN+r9999Xhw4dAufdbrckNbqKU1FREbhq5Ha7VVdXp6qqqsuuOXHiRKPf+8UXXzS6+vQNp9OpNm3aNDgAAEB4sjWILMvS448/rrffflt//vOf1alTpwbPd+rUSW63W5s3bw6cq6ur07Zt2zRw4EBJUt++fRUVFdVgTVlZmQ4dOhRYk56eLp/Pp7179wbW7NmzRz6fL7AGAACYy9ZPmU2bNk1r167V73//e8XGxgauBLlcLrVs2VIOh0PZ2dnKy8tTly5d1KVLF+Xl5alVq1YaP358YO2UKVM0a9YsxcfHKy4uTrNnz1ZaWlrgU2cpKSkaOXKkpk6dqldeeUWS9PDDDysrK4tPmAFAmPB6vaqsrLR7jJAoLi62e4SwZ2sQLV26VJI0aNCgBudXrlypSZMmSZKefPJJ1dbW6rHHHlNVVZX69++vTZs2KTY2NrB+4cKFioyM1Lhx41RbW6shQ4Zo1apVioiICKxZs2aNZsyYEfg02ujRo7VkyZLQbhAAcE14vV51656ic7Vn7R4FzZTDsizL7iGag+rqarlcLvl8Pu4nAoAm5sMPP1Tfvn0VnzVLUfHh96ng2mP75fsgX+6Ji+R0d7Z7nKDzl3+q8tXZKiwsVJ8+fYL62lf657etV4gAAAimqHhPWAbD+ZN80jnUmsSnzAAAAOxEEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADBepN0DAABCz+v1qrKy0u4xQqa4uNjuEdDMEUQAEOa8Xq+6dU/Rudqzdo8CNFkEEQCEucrKSp2rPav4rFmKivfYPU5I1B7bL98H+XaPgWaMIAIAQ0TFe+R0d7Z7jJA4f7LU7hHQzHFTNQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMZ2sQbd++XaNGjVJSUpIcDoc2bNjQ4PlJkybJ4XA0OAYMGNBgjd/v1/Tp09W2bVvFxMRo9OjROn78eIM1VVVVmjBhglwul1wulyZMmKBTp06FeHcAAKC5sDWIzpw5o969e2vJkiWXXDNy5EiVlZUFjnfeeafB89nZ2Vq/fr3WrVunHTt2qKamRllZWaqvrw+sGT9+vIqKilRQUKCCggIVFRVpwoQJIdsXAABoXiLt/OWZmZnKzMy87Bqn0ym3233R53w+n1asWKHXX39dQ4cOlSTl5+fL4/Foy5YtGjFihIqLi1VQUKDdu3erf//+kqTly5crPT1dR44cUbdu3YK7KQAA0Ow0+XuItm7dqoSEBHXt2lVTp05VRUVF4LnCwkKdP39ew4cPD5xLSkpSamqqdu7cKUnatWuXXC5XIIYkacCAAXK5XIE1F+P3+1VdXd3gAAAA4alJB1FmZqbWrFmjP//5z3r55Ze1b98+/fjHP5bf75cklZeXq0WLFrrhhhsa/FxiYqLKy8sDaxISEhq9dkJCQmDNxcyfPz9wz5HL5ZLH4wnizgAAQFNi61tm3+X+++8P/HNqaqr69eun5ORk/fGPf9SYMWMu+XOWZcnhcAQe/99/vtSab5szZ45ycnICj6urq4kiAADCVJO+QvRt7du3V3Jyso4ePSpJcrvdqqurU1VVVYN1FRUVSkxMDKw5ceJEo9f64osvAmsuxul0qk2bNg0OAAAQnppVEJ08eVKlpaVq3769JKlv376KiorS5s2bA2vKysp06NAhDRw4UJKUnp4un8+nvXv3Btbs2bNHPp8vsAYAAJjN1rfMampq9OmnnwYel5SUqKioSHFxcYqLi1Nubq7Gjh2r9u3b629/+5vmzp2rtm3b6ic/+YkkyeVyacqUKZo1a5bi4+MVFxen2bNnKy0tLfCps5SUFI0cOVJTp07VK6+8Ikl6+OGHlZWVxSfMAACAJJuDaP/+/Ro8eHDg8Tf37EycOFFLly7VwYMH9dprr+nUqVNq3769Bg8erDfffFOxsbGBn1m4cKEiIyM1btw41dbWasiQIVq1apUiIiICa9asWaMZM2YEPo02evToy373EQAAMIutQTRo0CBZlnXJ5997773vfI3o6GgtXrxYixcvvuSauLg45efnX9WMAAAg/DWre4gAAABCgSACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMZr0n+XGQBcK16vV5WVlXaPERLFxcV2jwA0eQQRAON5vV51656ic7Vn7R4FgE0IIgDGq6ys1Lnas4rPmqWoeI/d4wRd7bH98n3Al9MCl0MQAcD/LyreI6e7s91jBN35k6V2jwA0edxUDQAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjXVUQFRQUaMeOHYHHv/71r3XLLbdo/PjxqqqqCtpwAAAA18JVBdEvfvELVVdXS5IOHjyoWbNm6a677tKxY8eUk5MT1AEBAABCLfJqfqikpEQ9evSQJL311lvKyspSXl6ePvzwQ911111BHRAAACDUruoKUYsWLXT27FlJ0pYtWzR8+HBJUlxcXODKEQAAQHNxVVeI7rjjDuXk5Oj222/X3r179eabb0qSPvnkE3Xo0CGoAwIAAITaVV0hWrJkiSIjI/W73/1OS5cu1T/90z9Jkt59912NHDkyqAMCAACE2lVdIerYsaP+93//t9H5hQsX/uCBAAAArrUrDqLvc29QmzZtrmoYAAAAO1xxEF1//fVyOByXXWNZlhwOh+rr63/wYAAAANfKFQfR+++/H8o5AAAAbHPFQZSRkRHKOQA0YV6vV5WVlXaPETLFxcV2jwDAZlccRB999NEVv2ivXr2uahgATY/X61W37ik6V3vW7lEAIGSuOIhuueUWORwOWZZ12XXcQwSEl8rKSp2rPav4rFmKivfYPU5I1B7bL98H+XaPAcBGVxxEJSUloZwDQBMXFe+R093Z7jFC4vzJUrtHAGCzKw6i5OTkUM4BAABgm6v6YkZJ+uyzz7Ro0SIVFxfL4XAoJSVFM2fO1M033xzM+QAAAELuqv7qjvfee089evTQ3r171atXL6WmpmrPnj3q2bOnNm/eHOwZAQAAQuqqrhA9/fTTeuKJJ/TCCy80Ov/UU09p2LBhQRkOAADgWriqK0TFxcWaMmVKo/OTJ0/Wxx9//IOHAgAAuJauKojatWunoqKiRueLioqUkJDwQ2cCAAC4pq7qLbOpU6fq4Ycf1rFjxzRw4EA5HA7t2LFDL7zwgmbPnh3sGQEAAELqqoLo3//93xUbG6uXX35Zc+bMkSQlJSXp2Wef1U9+8pOgDggAABBqV/WWmcPh0BNPPKHjx4/L5/PJ5/Np3759Onr0qLp27RrsGQEAAELqewXRqVOn9NOf/lTt2rVTUlKS/vu//1sxMTH61a9+pc6dO2v37t169dVXQzUrAABASHyvt8zmzp2r7du3a+LEiSooKNATTzyhgoICnTt3Tu+8844yMjJCNScAAEDIfK8g+uMf/6iVK1dq6NCheuyxx9S5c2d17dpVixYtCtF4AAAAofe93jL7/PPP1aNHD0nSTTfdpOjoaP385z8PyWAAAADXyvcKogsXLigqKirwOCIiQjExMUEfCgAA4Fr6Xm+ZWZalSZMmyel0SpLOnTunRx99tFEUvf3228GbEAAAIMS+VxBNnDixweOf/exnQR0GAADADt8riFauXBnUX759+3b953/+pwoLC1VWVqb169fr3nvvDTxvWZZ++ctfatmyZaqqqlL//v3161//Wj179gys8fv9mj17tt544w3V1tZqyJAh+s1vfqMOHToE1lRVVWnGjBnauHGjJGn06NFavHixrr/++qDuBwAANE9X9cWMwXLmzBn17t1bS5YsuejzL730khYsWKAlS5Zo3759crvdGjZsmE6fPh1Yk52drfXr12vdunXasWOHampqlJWVpfr6+sCa8ePHq6ioSAUFBSooKFBRUZEmTJgQ8v0BAIDm4ar+6o5gyczMVGZm5kWfsyxLixYt0rx58zRmzBhJ0urVq5WYmKi1a9fqkUcekc/n04oVK/T6669r6NChkqT8/Hx5PB5t2bJFI0aMUHFxsQoKCrR79271799fkrR8+XKlp6fryJEj6tat27XZLAAAaLJsvUJ0OSUlJSovL9fw4cMD55xOpzIyMrRz505JUmFhoc6fP99gTVJSklJTUwNrdu3aJZfLFYghSRowYIBcLldgzcX4/X5VV1c3OAAAQHhqskFUXl4uSUpMTGxwPjExMfBceXm5WrRooRtuuOGyaxISEhq9fkJCQmDNxcyfP18ulytweDyeH7QfAADQdDXZIPqGw+Fo8NiyrEbnvu3bay62/rteZ86cOYG/uNbn86m0tPR7Tg4AAJqLJhtEbrdbkhpdxamoqAhcNXK73aqrq1NVVdVl15w4caLR63/xxReNrj79X06nU23atGlwAACA8NRkg6hTp05yu93avHlz4FxdXZ22bdumgQMHSpL69u2rqKioBmvKysp06NChwJr09HT5fD7t3bs3sGbPnj3y+XyBNQAAwGy2fsqspqZGn376aeBxSUmJioqKFBcXp44dOyo7O1t5eXnq0qWLunTpory8PLVq1Urjx4+XJLlcLk2ZMkWzZs1SfHy84uLiNHv2bKWlpQU+dZaSkqKRI0dq6tSpeuWVVyRJDz/8sLKysviEGQAAkGRzEO3fv1+DBw8OPM7JyZH09Tdir1q1Sk8++aRqa2v12GOPBb6YcdOmTYqNjQ38zMKFCxUZGalx48YFvphx1apVioiICKxZs2aNZsyYEfg02ujRoy/53UcAAMA8tgbRoEGDZFnWJZ93OBzKzc1Vbm7uJddER0dr8eLFWrx48SXXxMXFKT8//4eMCgAAwliTvYcIAADgWiGIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMaLtHsAIBx4vV5VVlbaPUZIFBcX2z0CAIQcQQT8QF6vV926p+hc7Vm7RwEAXCWCCPiBKisrda72rOKzZikq3mP3OEFXe2y/fB/k2z0GAIQUQQQESVS8R053Z7vHCLrzJ0vtHgEAQo6bqgEAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYLxIuweAGbxeryorK+0eIySKi4vtHgEA8AMRRAg5r9erbt1TdK72rN2jAABwUQQRQq6yslLnas8qPmuWouI9do8TdLXH9sv3Qb7dYwAAfgCCCNdMVLxHTndnu8cIuvMnS+0eAQDwA3FTNQAAMB5BBAAAjEcQAQAA4xFEAADAeE06iHJzc+VwOBocbrc78LxlWcrNzVVSUpJatmypQYMG6fDhww1ew+/3a/r06Wrbtq1iYmI0evRoHT9+/FpvBQAANGFNOogkqWfPniorKwscBw8eDDz30ksvacGCBVqyZIn27dsnt9utYcOG6fTp04E12dnZWr9+vdatW6cdO3aopqZGWVlZqq+vt2M7AACgCWryH7uPjIxscFXoG5ZladGiRZo3b57GjBkjSVq9erUSExO1du1aPfLII/L5fFqxYoVef/11DR06VJKUn58vj8ejLVu2aMSIEZf8vX6/X36/P/C4uro6yDsDAABNRZO/QnT06FElJSWpU6dOeuCBB3Ts2DFJUklJicrLyzV8+PDAWqfTqYyMDO3cuVOSVFhYqPPnzzdYk5SUpNTU1MCaS5k/f75cLlfg8HjC7wsFAQDA15p0EPXv31+vvfaa3nvvPS1fvlzl5eUaOHCgTp48qfLycklSYmJig59JTEwMPFdeXq4WLVrohhtuuOSaS5kzZ458Pl/gKC3ly/cAAAhXTfots8zMzMA/p6WlKT09XTfffLNWr16tAQMGSJIcDkeDn7Esq9G5b7uSNU6nU06n8yonBwAAzUmTvkL0bTExMUpLS9PRo0cD9xV9+0pPRUVF4KqR2+1WXV2dqqqqLrkGAACgWQWR3+9XcXGx2rdvr06dOsntdmvz5s2B5+vq6rRt2zYNHDhQktS3b19FRUU1WFNWVqZDhw4F1gAAADTpt8xmz56tUaNGqWPHjqqoqNBzzz2n6upqTZw4UQ6HQ9nZ2crLy1OXLl3UpUsX5eXlqVWrVho/frwkyeVyacqUKZo1a5bi4+MVFxen2bNnKy0tLfCpMwAAgCYdRMePH9eDDz6oyspKtWvXTgMGDNDu3buVnJwsSXryySdVW1urxx57TFVVVerfv782bdqk2NjYwGssXLhQkZGRGjdunGprazVkyBCtWrVKERERdm0LAAA0MU06iNatW3fZ5x0Oh3Jzc5Wbm3vJNdHR0Vq8eLEWL14c5OkAAEC4aFb3EAEAAIQCQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHgEEQAAMB5BBAAAjEcQAQAA4xFEAADAeAQRAAAwHkEEAACMRxABAADjEUQAAMB4BBEAADAeQQQAAIxHEAEAAOMRRAAAwHiRdg8Ayev1qrKy0u4xQqa4uNjuEQAAuCyCyGZer1fduqfoXO1Zu0cBAMBYBJHNKisrda72rOKzZikq3mP3OCFRe2y/fB/k2z0GAACXRBA1EVHxHjndne0eIyTOnyy1ewQAAC6Lm6oBAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIAAMYjiAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8o4LoN7/5jTp16qTo6Gj17dtXH3zwgd0jAQCAJsCYIHrzzTeVnZ2tefPm6cCBA/rRj36kzMxMeb1eu0cDAAA2MyaIFixYoClTpujnP/+5UlJStGjRInk8Hi1dutTu0QAAgM0i7R7gWqirq1NhYaGefvrpBueHDx+unTt3XvRn/H6//H5/4LHP55MkVVdXB3W2mpqar39f+ae6UHcuqK/dVJw/WSopfPfI/pq/cN9juO9PCv89hv3+vjwu6es/E4P95+w3r2dZ1uUXWgb4xz/+YUmy/vKXvzQ4//zzz1tdu3a96M8888wzliQODg4ODg6OMDhKS0sv2wpGXCH6hsPhaPDYsqxG574xZ84c5eTkBB5fuHBBX375peLj4y/5M1ejurpaHo9HpaWlatOmTdBetykJ9z2yv+Yv3PcY7vuTwn+P4b6/ULIsS6dPn1ZSUtJl1xkRRG3btlVERITKy8sbnK+oqFBiYuJFf8bpdMrpdDY4d/3114dqRLVp0ybs/08e7ntkf81fuO8x3Pcnhf8ew31/oeJyub5zjRE3Vbdo0UJ9+/bV5s2bG5zfvHmzBg4caNNUAACgqTDiCpEk5eTkaMKECerXr5/S09O1bNkyeb1ePfroo3aPBgAAbGZMEN1///06efKknn32WZWVlSk1NVXvvPOOkpOTbZ3L6XTqmWeeafT2XDgJ9z2yv+Yv3PcY7vuTwn+P4b6/psBhWd/1OTQAAIDwZsQ9RAAAAJdDEAEAAOMRRAAAwHgEEQAAMB5BZJPt27dr1KhRSkpKksPh0IYNG+weKajmz5+v2267TbGxsUpISNC9996rI0eO2D1WUC1dulS9evUKfFFaenq63n33XbvHCpn58+fL4XAoOzvb7lGCIjc3Vw6Ho8HhdrvtHivo/vGPf+hnP/uZ4uPj1apVK91yyy0qLCy0e6yguPHGGxv9O3Q4HJo2bZrdowXFV199pX/7t39Tp06d1LJlS91000169tlndeHCBbtHC0vGfOy+qTlz5ox69+6thx56SGPHjrV7nKDbtm2bpk2bpttuu01fffWV5s2bp+HDh+vjjz9WTEyM3eMFRYcOHfTCCy+oc+fOkqTVq1frnnvu0YEDB9SzZ0+bpwuuffv2admyZerVq5fdowRVz549tWXLlsDjiIgIG6cJvqqqKt1+++0aPHiw3n33XSUkJOizzz4L6bfuX0v79u1TfX194PGhQ4c0bNgw3XfffTZOFTwvvviifvvb32r16tXq2bOn9u/fr4ceekgul0szZ860e7ywQxDZJDMzU5mZmXaPETIFBQUNHq9cuVIJCQkqLCzUnXfeadNUwTVq1KgGj59//nktXbpUu3fvDqsgqqmp0U9/+lMtX75czz33nN3jBFVkZGRYXhX6xosvviiPx6OVK1cGzt144432DRRk7dq1a/D4hRde0M0336yMjAybJgquXbt26Z577tHdd98t6et/d2+88Yb2799v82ThibfMcE34fD5JUlxcnM2ThEZ9fb3WrVunM2fOKD093e5xgmratGm6++67NXToULtHCbqjR48qKSlJnTp10gMPPKBjx47ZPVJQbdy4Uf369dN9992nhIQE3XrrrVq+fLndY4VEXV2d8vPzNXny5KD+Bdx2uuOOO/SnP/1Jn3zyiSTpr3/9q3bs2KG77rrL5snCE1eIEHKWZSknJ0d33HGHUlNT7R4nqA4ePKj09HSdO3dOrVu31vr169WjRw+7xwqadevW6cMPP9S+ffvsHiXo+vfvr9dee01du3bViRMn9Nxzz2ngwIE6fPiw4uPj7R4vKI4dO6alS5cqJydHc+fO1d69ezVjxgw5nU79y7/8i93jBdWGDRt06tQpTZo0ye5Rguapp56Sz+dT9+7dFRERofr6ej3//PN68MEH7R4tLBFECLnHH39cH330kXbs2GH3KEHXrVs3FRUV6dSpU3rrrbc0ceJEbdu2LSyiqLS0VDNnztSmTZsUHR1t9zhB93/fsk5LS1N6erpuvvlmrV69Wjk5OTZOFjwXLlxQv379lJeXJ0m69dZbdfjwYS1dujTsgmjFihXKzMxUUlKS3aMEzZtvvqn8/HytXbtWPXv2VFFRkbKzs5WUlKSJEyfaPV7YIYgQUtOnT9fGjRu1fft2dejQwe5xgq5FixaBm6r79eunffv26b/+67/0yiuv2DzZD1dYWKiKigr17ds3cK6+vl7bt2/XkiVL5Pf7w+om5JiYGKWlpeno0aN2jxI07du3bxTnKSkpeuutt2yaKDT+/ve/a8uWLXr77bftHiWofvGLX+jpp5/WAw88IOnrcP/73/+u+fPnE0QhQBAhJCzL0vTp07V+/Xpt3bpVnTp1snuka8KyLPn9frvHCIohQ4bo4MGDDc499NBD6t69u5566qmwiiFJ8vv9Ki4u1o9+9CO7Rwma22+/vdHXXXzyySe2/6XWwfbNhza+ufk4XJw9e1bXXdfwVt+IiAg+dh8iBJFNampq9OmnnwYel5SUqKioSHFxcerYsaONkwXHtGnTtHbtWv3+979XbGysysvLJUkul0stW7a0ebrgmDt3rjIzM+XxeHT69GmtW7dOW7dubfQJu+YqNja20T1fMTExio+PD4t7wWbPnq1Ro0apY8eOqqio0HPPPafq6uqw+i/vJ554QgMHDlReXp7GjRunvXv3atmyZVq2bJndowXNhQsXtHLlSk2cOFGRkeH1R9qoUaP0/PPPq2PHjurZs6cOHDigBQsWaPLkyXaPFp4s2OL999+3JDU6Jk6caPdoQXGxvUmyVq5cafdoQTN58mQrOTnZatGihdWuXTtryJAh1qZNm+weK6QyMjKsmTNn2j1GUNx///1W+/btraioKCspKckaM2aMdfjwYbvHCro//OEPVmpqquV0Oq3u3btby5Yts3ukoHrvvfcsSdaRI0fsHiXoqqurrZkzZ1odO3a0oqOjrZtuusmaN2+e5ff77R4tLDksy7LsSTEAAICmge8hAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABiPIAIQ9hwOhzZs2HDJ5//2t7/J4XCoqKjoms0EoGkhiAA0exUVFXrkkUfUsWNHOZ1Oud1ujRgxQrt27ZIklZWVKTMz0+YpATRl4fU34QEw0tixY3X+/HmtXr1aN910k06cOKE//elP+vLLLyVJbrfb5gkBNHVcIQLQrJ06dUo7duzQiy++qMGDBys5OVn//M//rDlz5ujuu++W1Pgts7179+rWW29VdHS0+vXrpwMHDjR63Y8//lh33XWXWrdurcTERE2YMEGVlZWB53/3u98pLS1NLVu2VHx8vIYOHaozZ86EfL8AQoMgAtCstW7dWq1bt9aGDRvk9/u/c/2ZM2eUlZWlbt26qbCwULm5uZo9e3aDNWVlZcrIyNAtt9yi/fv3q6CgQCdOnNC4ceMCzz/44IOaPHmyiouLtXXrVo0ZM0b8XdlA88VbZgCatcjISK1atUpTp07Vb3/7W/Xp00cZGRl64IEH1KtXr0br16xZo/r6er366qtq1aqVevbsqePHj+tf//VfA2uWLl2qPn36KC8vL3Du1Vdflcfj0SeffKKamhp99dVXGjNmjJKTkyVJaWlpod8sgJDhChGAZm/s2LH6/PPPtXHjRo0YMUJbt25Vnz59tGrVqkZri4uL1bt3b7Vq1SpwLj09vcGawsJCvf/++4GrT61bt1b37t0lSZ999pl69+6tIUOGKC0tTffdd5+WL1+uqqqqkO4RQGgRRADCQnR0tIYNG6b/+I//0M6dOzVp0iQ988wzjdZdydtaFy5c0KhRo1RUVNTgOHr0qO68805FRERo8+bNevfdd9WjRw8tXrxY3bp1U0lJSSi2BuAaIIgAhKUePXpc9CbnHj166K9//atqa2sD53bv3t1gTZ8+fXT48GHdeOON6ty5c4MjJiZG0tc3at9+++365S9/qQMHDqhFixZav359aDcFIGQIIgDN2smTJ/XjH/9Y+fn5+uijj1RSUqL/+Z//0UsvvaR77rmn0frx48fruuuu05QpU/Txxx/rnXfe0a9+9asGa6ZNm6Yvv/xSDz74oPbu3atjx45p06ZNmjx5surr67Vnzx7l5eVp//798nq9evvtt/XFF18oJSXlWm0bQJBxUzWAZq1169bq37+/Fi5cqM8++0znz5+Xx+PR1KlTNXfu3Iuu/8Mf/qBHH31Ut956q3r06KEXX3xRY8eODaxJSkrSX/7yFz311FMaMWKE/H6/kpOTNXLkSF133XVq06aNtm/frkWLFqm6ulrJycl6+eWX+fJHoBlzWHxOFAAAGI63zAAAgPEIIgAAYDyCCAAAGI8gAgAAxiOIAACA8QgiAABgPIIIAAAYjyACAADGI4gAAIDxCCIAAGA8gggAABjv/wMiesUXyNBGKgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 640x480 with 1 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# b)" + "# b)\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "probabilities = np.arange(1, 9)\n", + "probabilities = probabilities / probabilities.sum()\n", + "\n", + "results = np.random.choice(8, size=10000, p=probabilities) + 1\n", + "\n", + "plt.hist(results, bins=np.arange(1, 10), density=False, edgecolor='black')\n", + "plt.xticks(np.arange(1, 9))\n", + "plt.xlabel('Sides')\n", + "plt.ylabel('Rolls')\n", + "plt.show()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n", + "7\n", + "6\n", + "5\n", + "5\n", + "7\n", + "8\n", + "7\n", + "7\n", + "6\n", + "4\n", + "2\n", + "1\n", + "8\n", + "3\n", + "0.000644125000007989\n" + ] + } + ], "source": [ - "# c)" + "# c)\n", + "\n", + "import numpy as np\n", + "import time\n", + "\n", + "def modded_biased_dice():\n", + " probabilities = np.arange(1, 9)\n", + " probabilities = probabilities / probabilities.sum()\n", + " rolls = set()\n", + " start_time = time.monotonic()\n", + " while len(rolls) < 8:\n", + " roll = np.random.choice(8, p=probabilities) + 1\n", + " rolls.add(roll)\n", + " yield roll\n", + " yield time.monotonic() - start_time\n", + "\n", + "results = modded_biased_dice()\n", + "for result in results:\n", + " print(result)" ] }, { @@ -124,7 +197,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.6" + "version": "3.9.13" } }, "nbformat": 4,