In [1]:
import time

# Introduction to Python 

# [Comprehensions](https://python-3-patterns-idioms-test.readthedocs.io/en/latest/Comprehensions.html) and [Generators](https://www.programiz.com/python-programming/generator)

## [Comprehensions](https://towardsdatascience.com/list-comprehensions-in-python-28d54c9286ca):

Python offers some nice "synctatic sugar" constructions. They allow the creation of sequences in a clear in concise way 
They are: 

- List comprehension
- Set comprehension
- Dict comprehension

## 1) List Comprehensions

List comprehensions returns a list filled with elements derived from another sequence ot iterable, being modified or not. A common use is to build a new list where each element is the result of some expression applied to the original, or to build a sequence where the elements satisfy some conditions 

The patterns are: 

 [ for in ]
 [ for in if ]
 [ if else for in ]
 [ if else for in if ]
 

See the examples below: 

In [1]:
cubes = []
for number in range(10):
 cubes.append(number**3)

In [2]:
print(cubes)

[0, 1, 8, 27, 64, 125, 216, 343, 512, 729]


#### We can rewrite it in a more concise way using list comprehension: 

In [3]:
cubes2 = [number**3 for number in range(10)]

In [4]:
print(cubes2)

[0, 1, 8, 27, 64, 125, 216, 343, 512, 729]


#### Another example of the use of list comprehensions 
Let's sum the square of the divisors of 3 and 5 below 1000: 

In [1]:
%%time

my_list = []
for x in range(10001):
 if x%5 == 0 or x%3 == 0:
 my_list.append(x**2)
print(sum(my_list))

155638903886
CPU times: user 2.72 ms, sys: 0 ns, total: 2.72 ms
Wall time: 2.72 ms


In [2]:
%%time

my_new_list = [x**2 for x in range(10001) if x%5 == 0 or x%3 == 0]
print(sum(my_new_list))

155638903886
CPU times: user 1.72 ms, sys: 326 µs, total: 2.04 ms
Wall time: 2.05 ms


In [9]:
print(type(my_new_list))




### More examples:

+ #### Modifying a sequence 

In [10]:
sequence = range(11)
sequence

range(0, 11)

In [11]:
list(sequence)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [11]:
[element**2 for element in sequence if element%2 == 0]

[0, 4, 16, 36, 64, 100]

+ #### More than one variable

In [12]:
[x + y for x,y in [(9,4),(8,6),(2,9)]]

[13, 14, 11]

In [14]:
[(y,x) for x,y in [(9,4),(8,6),(2,9)]]

[(4, 9), (6, 8), (9, 2)]

In [25]:
[x if x%2==0 else y for x,y in [(1,2),(2,3),(4,5)] if isinstance(x,int) or isinstance(y,float)]

[2, 2, 4]

In [1]:
def swapcase(string):
 swapped = "".join([x.upper() if x.islower() else x.lower() for x in string])
 return swapped

swapcase("SomE OdD strIng")

'sOMe oDd STRiNG'

+ #### No variables

In [15]:
%%time

print([3+2 for numero in range(4)])

[5, 5, 5, 5]
CPU times: user 82 µs, sys: 22 µs, total: 104 µs
Wall time: 125 µs


In [16]:
%%time

print(4 * [5])

[5, 5, 5, 5]
CPU times: user 67 µs, sys: 0 ns, total: 67 µs
Wall time: 71 µs


+ #### Filtering by type

In [18]:
my_list = [1, "4", 9, "a", 0, 4]
test_type = [elem for elem in my_list if isinstance(elem, int)]
print(test_type)

[1, 9, 0, 4]


+ #### Filtering by type, two conditions: 

In [20]:
my_list = [1, "4", 9, "a", 0, 4]
test_type = [int(i) for i in my_list if isinstance(i, int) or (isinstance(i, str) and (i.isnumeric()))]
print(test_type)

[1, 4, 9, 0, 4]


+ #### Multiple loops 

In [21]:
%%time

points = []
for x in [1,2,3]:
 for y in [3,4,5]:
 if x != y:
 points.append((x,y))
 
print(points)

[(1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 4), (3, 5)]
CPU times: user 106 µs, sys: 0 ns, total: 106 µs
Wall time: 111 µs


In [22]:
%%time

points = [(x,y) for x in [1,2,3] for y in [3,4,5] if x != y]
print(points)

[(1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5), (3, 4), (3, 5)]
CPU times: user 115 µs, sys: 0 ns, total: 115 µs
Wall time: 120 µs


In [31]:
[y for x in [(9,4),(8,6),(2,9)] for y in x]

[9, 4, 8, 6, 2, 9]

+ #### Using for filtering strings 

In [19]:
import string

In [20]:
print(string.punctuation)
print(string.digits)
print(string.ascii_letters)
print(string.ascii_lowercase)
print(string.ascii_uppercase)
print(string.hexdigits)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
0123456789
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
abcdefghijklmnopqrstuvwxyz
ABCDEFGHIJKLMNOPQRSTUVWXYZ
0123456789abcdefABCDEF


Now that we know the string package, let's use it for cleaning strings with list comprehensions:

In [21]:
my_string = 'This is a string. It is full of surprises! Would you believe it? I can clean it...'

In [22]:
tokens = [token for token in my_string.split()]
print(tokens)

['This', 'is', 'a', 'string.', 'It', 'is', 'full', 'of', 'surprises!', 'Would', 'you', 'believe', 'it?', 'I', 'can', 'clean', 'it...']


In [23]:
lowercase_tokens = [token.lower() for token in my_string.split()]
print(lowercase_tokens)

['this', 'is', 'a', 'string.', 'it', 'is', 'full', 'of', 'surprises!', 'would', 'you', 'believe', 'it?', 'i', 'can', 'clean', 'it...']


In [24]:
lower_strip_tokens = [token.strip(string.punctuation).lower() for token in my_string.split()]
print(lower_strip_tokens)

['this', 'is', 'a', 'string', 'it', 'is', 'full', 'of', 'surprises', 'would', 'you', 'believe', 'it', 'i', 'can', 'clean', 'it']


## 2) Set Comprehensions:


Set comprehensions returns a set filled with elements derived from a sequence or iterable, being modified or not. The only difference is that the results are a set instead of a list, that is, without repeated elements and not following a specific order 

The patterns are: 

 { for in }
 { for in if }
 { if else for in }
 { if else for in if }

See the examples below: 

In [28]:
vowels = {letter for letter in "python lovers" if letter in "aeiou"}
print(vowels)

{'e', 'o'}


In [29]:
type(vowels)

set

In [30]:
consonants = {letter for letter in 'summer camp' if letter not in "aeiou" and letter != ' '}
print(consonants)

{'s', 'c', 'p', 'r', 'm'}


## 3) Dict Comprehensions

Dict comprehensions returns a dictionary filled with elements derived from sequences or iterable, being modified or not. The pattern is a varies a bit from list and set comprehensions, because we have keys and values deriving from a sequence.

See the examples below: 

In [11]:
my_dict = {k:v for k,v in [('first',1),('second',2)]}
print(my_dict)
print(type(my_dict))

{'first': 1, 'second': 2}



In [12]:
squares = {x:x**2 for x in range(4)}
print(squares)

{0: 0, 1: 1, 2: 4, 3: 9}


+ #### Using _zip_ to merge two sequences in a list of tuples: 

In [14]:
{x:y for x,y in zip(range(5), range(5,9))}

{0: 5, 1: 6, 2: 7, 3: 8}

+ #### Three ways to do the same thing

In [19]:
%%time

new_dict = dict([('one',1),('two',2),('three',3)])
print(new_dict)

{'one': 1, 'two': 2, 'three': 3}
CPU times: user 247 µs, sys: 64 µs, total: 311 µs
Wall time: 202 µs


In [20]:
%%time

new_dict = {x:y for x,y in [('one',1),('two',2),('three',3)]}
print(new_dict)

{'one': 1, 'two': 2, 'three': 3}
CPU times: user 87 µs, sys: 24 µs, total: 111 µs
Wall time: 114 µs


In [21]:
%%time

new_dict = {x:y for x,y in zip(['one', 'two', 'three'],[1 ,2 ,3])}
print(new_dict)

{'one': 1, 'two': 2, 'three': 3}
CPU times: user 75 µs, sys: 21 µs, total: 96 µs
Wall time: 99.4 µs


+ #### Inverting key and value

In [22]:
new_dict

{'one': 1, 'two': 2, 'three': 3}

In [23]:
dict_new = {value:key for key, value in new_dict.items()}
dict_new

{1: 'one', 2: 'two', 3: 'three'}

+ #### Increasing a value

In [24]:
plus_one = {key:value + 1 for key, value in new_dict.items()}
plus_one

{'one': 2, 'two': 3, 'three': 4}

+ #### Two ways for converting lists of tuples: 

In [25]:
list_of_tuples = [(2,3),(4,5),(1,7)]

In [26]:
%%time

dict(list_of_tuples)

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 9.78 µs


{2: 3, 4: 5, 1: 7}

In [27]:
%%time

{key:value for key, value in list_of_tuples}

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 7.87 µs


{2: 3, 4: 5, 1: 7}

In [76]:
lista = [[x for x in range(12)],[x**2 for x in range(5)]]

In [77]:
lista

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [0, 1, 4, 9, 16]]

### [Generator Expressions](https://www.programiz.com/python-programming/generator) 

Generator expressions are similar to generator functions, but they are created like list comprehensions using simple brackets instead of square brackets 

In [74]:
generator1 = (x**(0.5) for x in range(10) if x%5 == 0 or x%3 == 0)
type(generator1)

generator

In [51]:
print(generator1)

 at 0x7f5eac23ebd0>


+ #### One way to materialise the generator is to convert it to a list:

In [52]:
list(generator1)

[0.0, 1.7320508075688772, 2.23606797749979, 2.449489742783178, 3.0]

In [75]:
tuple(generator1)

(0.0, 1.7320508075688772, 2.23606797749979, 2.449489742783178, 3.0)

+ #### In order to access each generator element, we can use the command _next_

In [72]:
generator2 = (x**x for x in range(2,99999999999999)) #A really big number. If it was a list, the memory would overflow.

In [73]:
next(generator2)

4

In [44]:
next(generator2)

27

+ #### When a list command materialises the generator, it materializes all its elements and terminates the generator
+ #### If we use _next_ to access it again, it will raise the _StopIteration_ error 

In [79]:
generator3 = (x**2 for x in range(10))

In [80]:
next(generator3)

0

In [81]:
next(generator3)

1

In [82]:
list(generator3)

[4, 9, 16, 25, 36, 49, 64, 81]

In [83]:
next(generator3) #error

StopIteration: 

In [84]:
generator4 = (letter for letter in "abcd")

In [85]:
next(generator4)

'a'

In [86]:
next(generator4)

'b'

In [87]:
next(generator4)

'c'

In [88]:
next(generator4)

'd'

In [89]:
next(generator4) #error

StopIteration: 

In [90]:
generator4 = (letter for letter in "abcd")

In [91]:
while True:
 print(next(generator4)) #error

a
b
c
d


StopIteration: 

In [95]:
generator4 = (letter for letter in "abcd")

In [96]:
for element in generator4: # for iterations can handle stop iteration errors
 print(element)

a
b
c
d


+ #### Each Generators keeps its state, regardless of the iterators calling it:

In [102]:
generator4 = (letter for letter in "abcd")

In [104]:
for ele1 in generator4:
 print("first for", ele1)
 for ele2 in generator4:
 print("second for", ele2)

## Special generators: _range*_, _zip_, _filter_, _map_ 

Obs:
range is a class of immutable iterable objects. Their iteration behavior can be compared to tuples: you can't call next directly on them; you have to get an iterator by using iter. So no, range is not a proper generator.
+ Ranges are immutable, so they can be used as dictionary keys.
+ Ranges have the start, stop and step attributes (since Python 3.3), count and index methods and they support in, len and __getitem__ operations.
* You can iterate over the same range multiple times.


In [125]:
r = range(10)
type(r)

range

In [126]:
dir(r)

['__bool__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'count',
 'index',
 'start',
 'step',
 'stop']

In [124]:
next(r) #error

TypeError: 'range' object is not an iterator

In [115]:
r2 = iter(r)

0

In [127]:
dir(r2)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__length_hint__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

In [121]:
next(r2)

6

In [62]:
z = zip([1,2,3],[4,5,6],[6,7,8])
type(z)

zip

In [63]:
f = filter(str.isupper,['a','A','E','r'])
type(f)

filter

In [64]:
m = map(int,['1', '2', '3'])
type(m)

map