Jure Sorn - Comprehensive Python Cheatsheet (2023)
Jure Sorn - Comprehensive Python Cheatsheet (2023)
# Contents
ToC = {
'1. Collections': [List, Dictionary, Set, Tuple, Range, Enumerate, Iterator, Generator],
'2. Types': [Type, String, Regular_Exp, Format, Numbers, Combinatorics, Datetime],
'3. Syntax': [Args, Inline, Import, Decorator, Class, Duck_Types, Enum, Exception],
'4. System': [Exit, Print, Input, Command_Line_Arguments, Open, Path, OS_Commands],
'5. Data': [JSON, Pickle, CSV, SQLite, Bytes, Struct, Array, Memory_View, Deque],
'6. Advanced': [Threading, Operator, Introspection, Metaprograming, Eval, Coroutine],
'7. Libraries': [Progress_Bar, Plot, Tables, Curses, Logging, Scraping, Web, Profile],
'8. Multimedia': [NumPy, Image, Animation, Audio, Pygame, Pandas, Plotly, PySimpleGUI]
}
# Main
# List
sum_of_elements = sum(<collection>)
elementwise_sum = [sum(pair) for pair in zip(list_a, list_b)]
sorted_by_second = sorted(<collection>, key=lambda el: el[1])
sorted_by_both = sorted(<collection>, key=lambda el: (el[1], el[0]))
flatter_list = list(itertools.chain.from_iterable(<list>))
product_of_elems = functools.reduce(lambda out, el: out * el, <collection>)
list_of_chars = list(<str>)
# Dictionary
Counter
# Set
Frozen Set
<frozenset> = frozenset(<collection>)
# Tuple
Tuple is an immutable and hashable list.
Named Tuple
# Range
Immutable and hashable sequence of integers.
# Enumerate
# Iterator
Itertools
import itertools as it
# Type
Everything is an object.
Every object has a type.
Type and class are synonymous.
Each abstract base class specifies a set of virtual subclasses. These classes are then recognized
by isinstance() and issubclass() as subclasses of the ABC, although they are really not. ABC
can also manually decide whether or not a specific class is its virtual subclass, usually based
on which methods the class has implemented. For instance, Iterable ABC looks for method
iter(), while Collection ABC looks for iter(), contains() and len().
┏━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┓
┃ │ Iterable │ Collection │ Sequence ┃
┠──────────────────┼────────────┼────────────┼────────────┨
┃ list, range, str │ ✓ │ ✓ │ ✓ ┃
┃ dict, set │ ✓ │ ✓ │ ┃
┃ iter │ ✓ │ │ ┃
┗━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┛
┏━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━┓
┃ │ Number │ Complex │ Real │ Rational │ Integral ┃
┠────────────────────┼──────────┼──────────┼──────────┼──────────┼──────────┨
┃ int │ ✓ │ ✓ │ ✓ │ ✓ │ ✓ ┃
┃ fractions.Fraction │ ✓ │ ✓ │ ✓ │ ✓ │ ┃
┃ float │ ✓ │ ✓ │ ✓ │ │ ┃
┃ complex │ ✓ │ ✓ │ │ │ ┃
┃ decimal.Decimal │ ✓ │ │ │ │ ┃
┗━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━┛
# String
Immutable sequence of characters.
Property Methods
# Regex
Functions for regular expression matching.
import re
<str> = re.sub(<regex>, new, text, count=0) # Substitutes all occurrences with 'new'.
<list> = re.findall(<regex>, text) # Returns all occurrences as strings.
<list> = re.split(<regex>, text, maxsplit=0) # Add brackets around regex to include matches.
<Match> = re.search(<regex>, text) # First occurrence of the pattern or None.
<Match> = re.match(<regex>, text) # Searches only at the beginning of the text.
<iter> = re.finditer(<regex>, text) # Returns all occurrences as Match objects.
Argument 'new' can be a function that accepts a Match object and returns a string.
Argument 'flags=re.IGNORECASE' can be used with all functions.
Argument 'flags=re.MULTILINE' makes '^' and '$' match the start/end of each line.
Argument 'flags=re.DOTALL' makes '.' also accept the '\n'.
Use r'\1' or '\\1' for backreference ('\1' returns a character with octal code 1).
Add '?' after '*' and '+' to make them non-greedy.
Match Object
By default, decimal characters, alphanumerics and whitespaces from all alphabets are
matched unless 'flags=re.ASCII' argument is used.
As shown above, it restricts all special sequence matches to the first 128 characters and
prevents '\s' from accepting '[\x1c-\x1f]' (the so-called separator characters).
Use a capital letter for negation (all non-ASCII characters will be matched when used in
combination with ASCII flag).
# Format
Example
General Options
Strings
Numbers
Floats
┏━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┓
┃ │ {<float>} │ {<float>:f} │ {<float>:e} │ {<float>:%} ┃
┠──────────────┼────────────────┼────────────────┼────────────────┼────────────────┨
┃ 0.000056789 │ '5.6789e-05' │ '0.000057' │ '5.678900e-05' │ '0.005679%' ┃
┃ 0.00056789 │ '0.00056789' │ '0.000568' │ '5.678900e-04' │ '0.056789%' ┃
┃ 0.0056789 │ '0.0056789' │ '0.005679' │ '5.678900e-03' │ '0.567890%' ┃
┃ 0.056789 │ '0.056789' │ '0.056789' │ '5.678900e-02' │ '5.678900%' ┃
┃ 0.56789 │ '0.56789' │ '0.567890' │ '5.678900e-01' │ '56.789000%' ┃
┃ 5.6789 │ '5.6789' │ '5.678900' │ '5.678900e+00' │ '567.890000%' ┃
┃ 56.789 │ '56.789' │ '56.789000' │ '5.678900e+01' │ '5678.900000%' ┃
┗━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┛
┏━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┓
┃ │ {<float>:.2} │ {<float>:.2f} │ {<float>:.2e} │ {<float>:.2%} ┃
┠──────────────┼────────────────┼────────────────┼────────────────┼────────────────┨
┃ 0.000056789 │ '5.7e-05' │ '0.00' │ '5.68e-05' │ '0.01%' ┃
┃ 0.00056789 │ '0.00057' │ '0.00' │ '5.68e-04' │ '0.06%' ┃
┃ 0.0056789 │ '0.0057' │ '0.01' │ '5.68e-03' │ '0.57%' ┃
┃ 0.056789 │ '0.057' │ '0.06' │ '5.68e-02' │ '5.68%' ┃
┃ 0.56789 │ '0.57' │ '0.57' │ '5.68e-01' │ '56.79%' ┃
┃ 5.6789 │ '5.7' │ '5.68' │ '5.68e+00' │ '567.89%' ┃
┃ 56.789 │ '5.7e+01' │ '56.79' │ '5.68e+01' │ '5678.90%' ┃
┗━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┛
Ints
{90:c} # 'Z'
{90:b} # '1011010'
{90:X} # '5A'
# Numbers
Basic Functions
Math
from math import e, pi, inf, nan, isinf, isnan # `<el> == nan` is always False.
from math import sin, cos, tan, asin, acos, atan # Also: degrees, radians.
from math import log, log10, log2 # Log can accept base as second arg.
Statistics
from statistics import mean, median, variance # Also: stdev, quantiles, groupby.
Random
from random import random, randint, choice # Also: shuffle, gauss, triangular, seed.
<float> = random() # A float inside [0, 1).
<int> = randint(from_inc, to_inc) # An int inside [from_inc, to_inc].
<el> = choice(<sequence>) # Keeps the sequence intact.
Bin, Hex
Bitwise Operators
# Combinatorics
import itertools as it
# Datetime
Provides 'date', 'time', 'datetime' and 'timedelta' classes. All are immutable and hashable.
<D> = date(year, month, day) # Only accepts valid dates from 1 to 9999 AD.
<T> = time(hour=0, minute=0, second=0) # Also: `microsecond=0, tzinfo=None, fold=0`.
<DT> = datetime(year, month, day, hour=0) # Also: `minute=0, second=0, microsecond=0, …`.
<TD> = timedelta(weeks=0, days=0, hours=0) # Also: `minutes=0, seconds=0, microseconds=0`.
Aware <a> time and datetime objects have defined timezone, while naive <n> don't. If
object is naive, it is presumed to be in the system's timezone!
'fold=1' means the second pass in case of time jumping back for one hour.
Timedelta normalizes arguments to ±days, seconds (< 86 400) and microseconds (< 1M).
Use '<D/DT>.weekday()' to get the day of the week as an int, with Monday being 0.
Now
Timezone
Timezones returned by gettz(), tzlocal(), and implicit local timezone of naive objects have
offsets that vary through time due to DST and historical changes of the zone's base offset.
Standard library's zoneinfo.ZoneInfo() can be used instead of gettz() on Python 3.9 and
later. It requires 'tzdata' package on Windows. It doesn't return local tz if arg. is omitted.
Encode
Decode
Format
'%z' accepts '±HH[:]MM' and returns '±HHMM' or empty string if datetime is naive.
'%Z' accepts 'UTC/GMT' and local timezone's code and returns timezone's name,
'UTC[±HH:MM]' if timezone is nameless, or an empty string if datetime is naive.
Arithmetics
<bool> = <D/T/DTn> > <D/T/DTn> # Ignores time jumps (fold attribute). Also ==.
<bool> = <DTa> > <DTa> # Ignores time jumps if they share tzinfo object.
<TD> = <D/DTn> - <D/DTn> # Ignores jumps. Convert to UTC for actual delta.
<TD> = <DTa> - <DTa> # Ignores time jumps if they share tzinfo object.
<D/DT> = <D/DT> ± <TD> # Returned datetime can fall into missing hour.
<TD> = <TD> * <float> # Also: <TD> = abs(<TD>) and <TD> = <TD> ±% <TD>.
<float> = <TD> / <TD> # How many weeks/years there are in TD. Also //.
# Arguments
Inside Function Call
func(<positional_args>) # func(0, 0)
func(<keyword_args>) # func(x=0, y=0)
func(<positional_args>, <keyword_args>) # func(0, y=0)
Default values are evaluated when function is first encountered in the scope.
Any mutation of a mutable default value will persist between invocations!
# Splat Operator
Inside Function Call
Splat expands a collection into positional arguments, while splatty-splat expands a dictionary
into keyword arguments.
args = (1, 2)
kwargs = {'x': 3, 'y': 4, 'z': 5}
func(*args, **kwargs)
Splat combines zero or more positional arguments into a tuple, while splatty-splat combines
zero or more keyword arguments into a dictionary.
def add(*a):
return sum(a)
>>> add(1, 2, 3)
6
def f(*args, **kwargs): ... # f(x=1, y=2, z=3) | f(1, y=2, z=3) | f(1, 2, z=3) | f(1, 2, 3)
def f(x, *args, **kwargs): ... # f(x=1, y=2, z=3) | f(1, y=2, z=3) | f(1, 2, z=3) | f(1, 2, 3)
def f(*args, y, **kwargs): ... # f(x=1, y=2, z=3) | f(1, y=2, z=3)
# Inline
Lambda
Comprehensions
Any, All
Conditional Expression
<obj> = <exp> if <condition> else <exp> # Only one expression gets evaluated.
>>> [a if a else 'zero' for a in (0, 1, 2, 3)] # `any([0, '', [], None]) == False`
['zero', 1, 2, 3]
Package is a collection of modules, but it can also define its own objects.
On a filesystem this corresponds to a directory of Python files with an optional init script.
Running 'import <package>' does not automatically provide access to the package's
modules unless they are explicitly imported in its init script.
# Closure
We have/get a closure in Python when:
def get_multiplier(a):
def out(b):
return a * b
return out
If multiple nested functions within enclosing function reference the same value, that
value gets shared.
To dynamically access function's first free variable use
'<function>.__closure__[0].cell_contents'.
Partial
Partial is also useful in cases when function needs to be passed as an argument because it
enables us to set its arguments beforehand.
A few examples being: 'defaultdict(<func>)', 'iter(<func>, to_exc)' and
dataclass's 'field(default_factory=<func>)'.
Non-Local
If variable is being assigned to anywhere in the scope, it is regarded as a local variable, unless
it is declared as a 'global' or a 'nonlocal'.
def get_counter():
i = 0
def out():
nonlocal i
i += 1
return i
return out
@decorator_name
def function_that_gets_passed_to_decorator():
...
Debugger Example
Decorator that prints function's name every time the function is called.
def debug(func):
@wraps(func)
def out(*args, **kwargs):
print(func.__name__)
return func(*args, **kwargs)
return out
@debug
def add(x, y):
return x + y
Wraps is a helper decorator that copies the metadata of the passed function (func) to the
function it is wrapping (out).
Without it 'add.__name__' would return 'out'.
LRU Cache
Decorator that caches function's return values. All function's arguments must be hashable.
@lru_cache(maxsize=None)
def fib(n):
return n if n < 2 else fib(n-2) + fib(n-1)
Default size of the cache is 128 values. Passing 'maxsize=None' makes it unbounded.
CPython interpreter limits recursion depth to 1000 by default. To increase it use
'sys.setrecursionlimit(<depth>)'.
Parametrized Decorator
A decorator that accepts arguments and returns a normal decorator that accepts a function.
def debug(print_result=False):
def decorator(func):
@wraps(func)
def out(*args, **kwargs):
result = func(*args, **kwargs)
print(func.__name__, result if print_result else '')
return result
return out
return decorator
@debug(print_result=True)
def add(x, y):
return x + y
Using only '@debug' to decorate the add() function would not work here, because debug
would then receive the add() function as a 'print_result' argument. Decorators can
however manually check if the argument they received is a function and act accordingly.
# Class
class <name>:
def __init__(self, a):
self.a = a
def __repr__(self):
class_name = self.__class__.__name__
return f'{class_name}({self.a!r})'
def __str__(self):
return str(self.a)
@classmethod
def get_class_name(cls):
return cls.__name__
print(<el>)
f'{<el>}'
logging.warning(<el>)
csv.writer(<file>).writerow([<el>])
raise Exception(<el>)
print/str/repr([<el>])
print/str/repr({<el>: <el>})
f'{<el>!r}'
Z = dataclasses.make_dataclass('Z', ['a']); print/str/repr(Z(<el>))
>>> <el>
Constructor Overloading
class <name>:
def __init__(self, a=None):
self.a = a
Inheritance
class Person:
def __init__(self, name, age):
self.name = name
self.age = age
class Employee(Person):
def __init__(self, name, age, staff_num):
super().__init__(name, age)
self.staff_num = staff_num
Multiple Inheritance
class A: pass
class B: pass
class C(A, B): pass
MRO determines the order in which parent classes are traversed when searching for a method
or an attribute:
>>> C.mro()
[<class 'C'>, <class 'A'>, <class 'B'>, <class 'object'>]
Property
class Person:
@property
def name(self):
return ' '.join(self._name)
@name.setter
def name(self, value):
self._name = value.split()
Dataclass
Decorator that automatically generates init(), repr() and eq() special methods.
@dataclass(order=False, frozen=False)
class <class_name>:
<attr_name>: <type>
<attr_name>: <type> = <default_value>
<attr_name>: list/dict/set = field(default_factory=list/dict/set)
Objects can be made sortable with 'order=True' and immutable with 'frozen=True'.
For object to be hashable, all attributes must be hashable and 'frozen' must be True.
Function field() is needed because '<attr_name>: list = []' would make a list that is
shared among all instances. Its 'default_factory' argument can be any callable.
For attributes of arbitrary type use 'typing.Any'.
Inline:
Slots
Mechanism that restricts objects to attributes listed in 'slots' and significantly reduces their
memory footprint.
class MyClassWithSlots:
__slots__ = ['a']
def __init__(self):
self.a = 1
Copy
Comparable
class MyComparable:
def __init__(self, a):
self.a = a
def __eq__(self, other):
if isinstance(other, type(self)):
return self.a == other.a
return NotImplemented
Hashable
Hashable object needs both hash() and eq() methods and its hash value should never
change.
Hashable objects that compare equal must have the same hash value, meaning default
hash() that returns 'id(self)' will not do.
That is why Python automatically makes classes unhashable if you only implement eq().
class MyHashable:
def __init__(self, a):
self._a = a
@property
def a(self):
return self._a
def __eq__(self, other):
if isinstance(other, type(self)):
return self.a == other.a
return NotImplemented
def __hash__(self):
return hash(self.a)
Sortable
With 'total_ordering' decorator, you only need to provide eq() and one of lt(), gt(), le() or
ge() special methods and the rest will be automatically generated.
Functions sorted() and min() only require lt() method, while max() only requires gt().
However, it is best to define them all so that confusion doesn't arise in other contexts.
When two lists, strings or dataclasses are compared, their values get compared in order
until a pair of unequal values is found. The comparison of this two values is then
returned. The shorter sequence is considered smaller in case of all values being equal.
For proper alphabetical order pass 'key=locale.strxfrm' to sorted() after running
'locale.setlocale(locale.LC_COLLATE, "en_US.UTF-8")'.
@total_ordering
class MySortable:
def __init__(self, a):
self.a = a
def __eq__(self, other):
if isinstance(other, type(self)):
return self.a == other.a
return NotImplemented
def __lt__(self, other):
if isinstance(other, type(self)):
return self.a < other.a
return NotImplemented
Iterator
class Counter:
def __init__(self):
self.i = 0
def __next__(self):
self.i += 1
return self.i
def __iter__(self):
return self
Sequence iterators returned by the iter() function, such as list_iterator and set_iterator.
Objects returned by the itertools module, such as count, repeat and cycle.
Generators returned by the generator functions and generator expressions.
File objects returned by the open() function, etc.
Callable
All functions and classes have a call() method, hence are callable.
When this cheatsheet uses '<function>' as an argument, it actually means
'<callable>'.
class Counter:
def __init__(self):
self.i = 0
def __call__(self):
self.i += 1
return self.i
Context Manager
With statements only work with objects that have enter() and exit() special methods.
Enter() should lock the resources and optionally return an object.
Exit() should release the resources.
Any exception that happens inside the with block is passed to the exit() method.
The exit() method can suppress the exception by returning a true value.
class MyOpen:
def __init__(self, filename):
self.filename = filename
def __enter__(self):
self.file = open(self.filename)
return self.file
def __exit__(self, exc_type, exception, traceback):
self.file.close()
class MyIterable:
def __init__(self, a):
self.a = a
def __iter__(self):
return iter(self.a)
def __contains__(self, el):
return el in self.a
Collection
Only required methods are iter() and len(). Len() should return the number of items.
This cheatsheet actually means '<iterable>' when it uses '<collection>'.
I chose not to use the name 'iterable' because it sounds scarier and more vague than
'collection'. The only drawback of this decision is that the reader could think a certain
function doesn't accept iterators when it does, since iterators are the only built-in objects
that are iterable but are not collections.
class MyCollection:
def __init__(self, a):
self.a = a
def __iter__(self):
return iter(self.a)
def __contains__(self, el):
return el in self.a
def __len__(self):
return len(self.a)
Sequence
class MySequence:
def __init__(self, a):
self.a = a
def __iter__(self):
return iter(self.a)
def __contains__(self, el):
return el in self.a
def __len__(self):
return len(self.a)
def __getitem__(self, i):
return self.a[i]
def __reversed__(self):
return reversed(self.a)
Glossary defines iterable as any object with iter() or getitem() and sequence as any object
with getitem() and len(). It does not define collection.
Passing ABC Iterable to isinstance() or issubclass() checks whether object/class has
method iter(), while ABC Collection checks for iter(), contains() and len().
ABC Sequence
class MyAbcSequence(abc.Sequence):
def __init__(self, a):
self.a = a
def __len__(self):
return len(self.a)
def __getitem__(self, i):
return self.a[i]
┏━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━┓
┃ │ Iterable │ Collection │ Sequence │ abc.Sequence ┃
┠────────────┼────────────┼────────────┼────────────┼──────────────┨
┃ iter() │ ! │ ! │ ✓ │ ✓ ┃
┃ contains() │ ✓ │ ✓ │ ✓ │ ✓ ┃
┃ len() │ │ ! │ ! │ ! ┃
┃ getitem() │ │ │ ! │ ! ┃
┃ reversed() │ │ │ ✓ │ ✓ ┃
┃ index() │ │ │ │ ✓ ┃
┃ count() │ │ │ │ ✓ ┃
┗━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━┛
Other ABCs that generate missing methods are: MutableSequence, Set, MutableSet,
Mapping and MutableMapping.
Names of their required methods are stored in '<abc>.__abstractmethods__'.
# Enum
class <enum_name>(Enum):
<member_name> = auto()
<member_name> = <value>
<member_name> = <value>, <value>
def get_next_member(member):
members = list(type(member))
index = members.index(member) + 1
return members[index % len(members)]
Inline
# Exceptions
try:
<code>
except <exception>:
<code>
Complex Example
try:
<code_1>
except <exception_a>:
<code_2_a>
except <exception_b>:
<code_2_b>
else:
<code_2_c>
finally:
<code_3>
Code inside the 'else' block will only be executed if 'try' block had no exceptions.
Code inside the 'finally' block will always be executed (unless a signal is received).
All variables that are initialized in executed blocks are also visible in all subsequent
blocks, as well as outside the try/except clause (only function block delimits scope).
To catch signals use 'signal.signal(signal_number, <func>)'.
Catching Exceptions
Raising Exceptions
raise <exception>
raise <exception>()
raise <exception>(<el> [, ...])
arguments = <name>.args
exc_type = <name>.__class__
filename = <name>.__traceback__.tb_frame.f_code.co_filename
func_name = <name>.__traceback__.tb_frame.f_code.co_name
line = linecache.getline(filename, <name>.__traceback__.tb_lineno)
trace_str = ''.join(traceback.format_tb(<name>.__traceback__))
error_msg = ''.join(traceback.format_exception(type(<name>), <name>, <name>.__traceback__))
Built-in Exceptions
BaseException
├── SystemExit # Raised by the sys.exit() function.
├── KeyboardInterrupt # Raised when the user hits the interrupt key (ctrl-c).
└── Exception # User-defined exceptions should be derived from this class.
├── ArithmeticError # Base class for arithmetic errors such as ZeroDivisionError.
├── AssertionError # Raised by `assert <exp>` if expression returns false value.
├── AttributeError # Raised when object doesn't have requested attribute/method.
├── EOFError # Raised by input() when it hits an end-of-file condition.
├── LookupError # Base class for errors when a collection can't find an item.
│ ├── IndexError # Raised when a sequence index is out of range.
│ └── KeyError # Raised when a dictionary key or set element is missing.
├── MemoryError # Out of memory. Could be too late to start deleting vars.
├── NameError # Raised when nonexistent name (variable/func/class) is used.
│ └── UnboundLocalError # Raised when local name is used before it's being defined.
├── OSError # Errors such as FileExistsError/PermissionError (see #Open).
│ └── ConnectionError # Errors such as BrokenPipeError/ConnectionAbortedError.
├── RuntimeError # Raised by errors that don't fall into other categories.
│ ├── NotImplementedErr # Can be raised by abstract methods or by unfinished code.
│ └── RecursionError # Raised when the maximum recursion depth is exceeded.
├── StopIteration # Raised by next() when run on an empty iterator.
├── TypeError # Raised when an argument is of the wrong type.
└── ValueError # When argument has the right type but inappropriate value.
┏━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┓
┃ │ List │ Set │ Dict ┃
┠───────────┼────────────┼────────────┼────────────┨
┃ getitem() │ IndexError │ │ KeyError ┃
┃ pop() │ IndexError │ KeyError │ KeyError ┃
┃ remove() │ ValueError │ KeyError │ ┃
┃ index() │ ValueError │ │ ┃
┗━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┛
User-defined Exceptions
# Exit
Exits the interpreter by raising SystemExit exception.
import sys
sys.exit() # Exits with exit code 0 (success).
sys.exit(<el>) # Prints to stderr and exits with 1.
sys.exit(<int>) # Exits with the passed exit code.
# Print
Pretty Print
# Input
Reads a line from the user input or pipe if present.
<str> = input(prompt=None)
import sys
scripts_path = sys.argv[0]
arguments = sys.argv[1:]
Argument Parser
Use 'help=<str>' to set argument description that will be displayed in help message.
Use 'default=<el>' to set argument's default value.
Use 'type=FileType(<mode>)' for files. Accepts 'encoding', but 'newline' is None.
# Open
Opens the file and returns a corresponding file object.
'encoding=None' means that the default encoding is used, which is platform dependent.
Best practice is to use 'encoding="utf-8"' whenever possible.
'newline=None' means all different end of line combinations are converted to '\n' on
read, while on write all '\n' characters are converted to system's default line separator.
'newline=""' means no conversions take place, but input is still broken into chunks by
readline() and readlines() on every '\n', '\r' and '\r\n'.
Modes
Exceptions
File Object
def read_file(filename):
with open(filename, encoding='utf-8') as file:
return file.readlines()
# Paths
DirEntry
Unlike listdir(), scandir() returns DirEntry objects that cache isfile, isdir and on Windows also
stat information, thus significantly increasing the performance of code that requires it.
Path Object
# OS Commands
Sends '1 + 1' to the basic calculator and captures its output:
Sends test.in to the basic calculator running in standard mode and saves its output to test.out:
# JSON
Text file format for storing collections of strings and numbers.
import json
<str> = json.dumps(<object>) # Converts object to JSON string.
<object> = json.loads(<str>) # Converts JSON string to object.
def read_json_file(filename):
with open(filename, encoding='utf-8') as file:
return json.load(file)
# Pickle
Binary file format for storing Python objects.
import pickle
<bytes> = pickle.dumps(<object>) # Converts object to bytes object.
<object> = pickle.loads(<bytes>) # Converts bytes object to object.
def read_pickle_file(filename):
with open(filename, 'rb') as file:
return pickle.load(file)
import csv
Read
Write
File must be opened with a 'newline=""' argument, or '\r' will be added in front of every
'\n' on platforms that use '\r\n' line endings!
Open existing file with 'mode="w"' to overwrite it or 'mode="a"' to append to it.
Parameters
'dialect' - Master parameter that sets the default values. String or a 'csv.Dialect' object.
'delimiter' - A one-character string used to separate fields.
'quotechar' - Character for quoting fields that contain special characters.
'doublequote' - Whether quotechars inside fields are/get doubled or escaped.
'skipinitialspace' - Is space character at the start of the field stripped by the reader.
'lineterminator' - How writer terminates rows. Reader is hardcoded to '\n', '\r', '\r\n'.
'quoting' - 0: As necessary, 1: All, 2: All but numbers which are read as floats, 3: None.
'escapechar' - Character for escaping quotechars if 'doublequote' is False.
Dialects
┏━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━┓
┃ │ excel │ excel-tab │ unix ┃
┠──────────────────┼──────────────┼──────────────┼──────────────┨
┃ delimiter │ ',' │ '\t' │ ',' ┃
┃ quotechar │ '"' │ '"' │ '"' ┃
┃ doublequote │ True │ True │ True ┃
┃ skipinitialspace │ False │ False │ False ┃
┃ lineterminator │ '\r\n' │ '\r\n' │ '\n' ┃
┃ quoting │ 0 │ 0 │ 1 ┃
┃ escapechar │ None │ None │ None ┃
┗━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━┛
import sqlite3
<conn> = sqlite3.connect(<path>) # Opens existing or new file. Also ':memory:'.
<conn>.close() # Closes the connection.
Read
Write
Or:
Placeholders
Passed values can be of type str, int, float, bytes, None, bool, datetime.date or
datetime.datetime.
Bools will be stored and returned as ints and dates as ISO formatted strings.
Example
Values are not actually saved in this example because 'conn.commit()' is omitted!
SqlAlchemy
┏━━━━━━━━━━━━┯━━━━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Dialect │ pip3 install │ import │ Dependencies ┃
┠────────────┼──────────────┼──────────┼──────────────────────────────────┨
┃ mysql │ mysqlclient │ MySQLdb │ www.pypi.org/project/mysqlclient ┃
┃ postgresql │ psycopg2 │ psycopg2 │ www.pypi.org/project/psycopg2 ┃
┃ mssql │ pyodbc │ pyodbc │ www.pypi.org/project/pyodbc ┃
┃ oracle │ oracledb │ oracledb │ www.pypi.org/project/oracledb ┃
┗━━━━━━━━━━━━┷━━━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
# Bytes
Bytes object is an immutable sequence of single bytes. Mutable version is called bytearray.
Encode
Decode
def read_bytes(filename):
with open(filename, 'rb') as file:
return file.read()
# Struct
Module that performs conversions between a sequence of numbers and a bytes object.
System’s type sizes, byte order, and alignment rules are used by default.
>>> pack('>hhl', 1, 2, 3)
b'\x00\x01\x00\x02\x00\x00\x00\x03'
>>> unpack('>hhl', b'\x00\x01\x00\x02\x00\x00\x00\x03')
(1, 2, 3)
Format
For standard type sizes and manual alignment (padding) start format string with:
Besides numbers, pack() and unpack() also support bytes objects as part of the sequence:
'c' - A bytes object with a single element. For pad byte use 'x'.
'<n>s' - A bytes object with n elements.
Integer types. Use a capital letter for unsigned type. Minimum and standard sizes are in brackets:
# Array
List that can only hold numbers of a predefined type. Available types and their minimum
sizes in bytes are listed above. Type sizes and byte order are always determined by the system,
however bytes of each element can be swapped with byteswap() method.
# Memory View
A sequence object that points to the memory of another bytes-like object.
Each element can reference a single or multiple consecutive bytes, depending on format.
Order and number of elements can be changed with slicing.
Casting only works between char and other types and uses system's sizes.
Byte order is always determined by the system.
# Deque
A thread-safe list with efficient appends and pops from either side. Pronounced "deck".
Thread
Lock
Or:
Queue
Map() and as_completed() also accept 'timeout' argument that causes TimeoutError if
result isn't available in 'timeout' seconds after next() is called.
Exceptions that happen inside threads are raised when next() is called on map's iterator
or when result() is called on a Future. Its exception() method returns exception or None.
ProcessPoolExecutor provides true parallelism, but everything sent to/from workers must
be pickable. Queues must be sent using executor's 'initargs' and 'initializer' parameters.
# Operator
Module of functions that provide the functionality of operators. Functions are ordered by
operator precedence, starting with least binding.
import operator as op
<bool> = op.not_(<obj>) # or, and, not (or/and missing)
<bool> = op.eq/ne/lt/le/gt/ge/contains/is_(<obj>, <obj>) # ==, !=, <, <=, >, >=, in, is
<obj> = op.or_/xor/and_(<int/set>, <int/set>) # |, ^, &
<int> = op.lshift/rshift(<int>, <int>) # <<, >>
<obj> = op.add/sub/mul/truediv/floordiv/mod(<obj>, <obj>) # +, -, *, /, //, %
<num> = op.neg/invert(<num>) # -, ~
<num> = op.pow(<num>, <num>) # **
<func> = op.itemgetter/attrgetter/methodcaller(<obj> [, ...]) # [index/key], .name, .name()
Bitwise operators require objects to have or(), xor(), and(), lshift(), rshift() and invert()
special methods, unlike logical operators that work on all types of objects.
Also: '<bool> = <bool> &|^ <bool>' and '<int> = <bool> &|^ <int>'.
# Introspection
Attributes
Parameters
# Metaprogramming
Code that generates code.
Type
Type is the root class. If only passed an object it returns its type (class). Otherwise it creates a
new class.
Or:
class MyMetaClass(type):
def __new__(cls, name, parents, attrs):
attrs['a'] = 'abcde'
return type.__new__(cls, name, parents, attrs)
New() is a class method that gets called before init(). If it returns an instance of its class,
then that instance gets passed to init() as a 'self' argument.
It receives the same arguments as init(), except for the first one that specifies the desired
type of the returned instance (MyMetaClass in our case).
Like in our case, new() can also be called directly, usually from a new() method of a child
class (def __new__(cls): return super().__new__(cls)).
The only difference between the examples above is that my_meta_class() returns a class of
type type, while MyMetaClass() returns a class of type MyMetaClass.
Metaclass Attribute
Right before a class is created it checks if it has the 'metaclass' attribute defined. If not, it
recursively checks if any of its parents has it defined and eventually comes to type().
class MyClass(metaclass=MyMetaClass):
b = 12345
Type Diagram
┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┓
┃ Classes │ Metaclasses ┃
┠─────────────┼─────────────┨
┃ MyClass ←──╴MyMetaClass ┃
┃ │ ↑ ┃
┃ object ←─────╴type ←╮ ┃
┃ │ │ ╰──╯ ┃
┃ str ←─────────╯ ┃
┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┛
Inheritance Diagram
┏━━━━━━━━━━━━━┯━━━━━━━━━━━━━┓
┃ Classes │ Metaclasses ┃
┠─────────────┼─────────────┨
┃ MyClass │ MyMetaClass ┃
┃ ↑ │ ↑ ┃
┃ object╶─────→ type ┃
┃ ↓ │ ┃
┃ str │ ┃
┗━━━━━━━━━━━━━┷━━━━━━━━━━━━━┛
# Eval
# Coroutines
Coroutines have a lot in common with threads, but unlike threads, they only give up
control when they call another coroutine and they don’t use as much memory.
Coroutine definition starts with 'async' and its call with 'await'.
'asyncio.run(<coroutine>)' is the main entry point for asynchronous programs.
Functions wait(), gather() and as_completed() start multiple coroutines at the same time.
Asyncio module also provides its own Queue, Event, Lock and Semaphore classes.
Runs a terminal game where you control an asterisk that must avoid numbers:
def main(screen):
curses.curs_set(0) # Makes cursor invisible.
screen.nodelay(True) # Makes getch() non-blocking.
asyncio.run(main_coroutine(screen)) # Starts running asyncio code.
if __name__ == '__main__':
curses.wrapper(main)
Libraries
# Progress Bar
# Plot
# Table
Prints a CSV file as an ASCII table:
# Curses
Runs a basic file explorer in the console:
def main(screen):
ch, first, selected, paths = 0, 0, 0, os.listdir()
while ch != ord('q'):
height, width = screen.getmaxyx()
screen.erase()
for y, filename in enumerate(paths[first : first+height]):
color = A_REVERSE if filename == paths[selected] else 0
screen.addnstr(y, 0, filename, width-1, color)
ch = screen.getch()
selected += (ch == KEY_DOWN) - (ch == KEY_UP)
selected = max(0, min(len(paths)-1, selected))
first += (selected >= first + height) - (selected < first)
if ch in [KEY_LEFT, KEY_RIGHT, KEY_ENTER, ord('\n'), ord('\r')]:
new_dir = '..' if ch == KEY_LEFT else paths[selected]
if os.path.isdir(new_dir):
os.chdir(new_dir)
first, selected, paths = 0, 0, os.listdir()
if __name__ == '__main__':
curses.wrapper(main)
# Logging
import logging
Setup
logging.basicConfig(
filename=None, # Logs to console (stderr) by default.
format='%(levelname)s:%(name)s:%(message)s', # Add '%(asctime)s' for local datetime.
level=logging.WARNING, # Drops messages with lower priority.
handlers=[logging.StreamHandler(sys.stderr)] # Uses FileHandler if filename is set.
)
Creates a logger that writes all messages to file and sends them to the root's handler that prints warnings or higher:
# Scraping
Scrapes Python's URL and logo from its Wikipedia page:
try:
response = requests.get('https://en.wikipedia.org/wiki/Python_(programming_language)')
document = bs4.BeautifulSoup(response.text, 'html.parser')
table = document.find('table', class_='infobox vevent')
python_url = table.find('th', text='Website').next_sibling.a['href']
logo_url = table.find('img')['src']
logo = requests.get(f'https:{logo_url}').content
filename = os.path.basename(logo_url)
with open(filename, 'wb') as file:
file.write(logo)
print(f'{python_url}, file://{os.path.abspath(filename)}')
except requests.exceptions.ConnectionError:
print("You've got problems with connection.", file=sys.stderr)
# Web
Flask is a micro web framework/server. If you just want to open a html file in a web browser
use 'webbrowser.open(<path>)' instead.
app = flask.Flask(__name__)
app.run(host=None, port=None, debug=None)
Static Request
@app.route('/img/<path:filename>')
def serve_file(filename):
return flask.send_from_directory('dirname/', filename)
Dynamic Request
@app.route('/<sport>')
def serve_html(sport):
return flask.render_template_string('<h1>{{title}}</h1>', title=sport)
REST Request
@app.post('/<sport>/odds')
def serve_json(sport):
team = flask.request.form['team']
return {'team': team, 'odds': [2.09, 3.74, 3.68]}
Starts the app in its own thread and queries its REST API:
# Profiling
Timing a Snippet
┏━━━━━━━━━━━━━━┯━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━┓
┃ pip3 install │ Type │ Target │ How to run │ Live ┃
┠──────────────┼──────────┼────────────┼───────────────────────────────┼──────┨
┃ pyinstrument │ Sampling │ CPU │ pyinstrument test.py │ × ┃
┃ py-spy │ Sampling │ CPU │ py-spy top -- python3 test.py │ ✓ ┃
┃ scalene │ Sampling │ CPU+Memory │ scalene test.py │ × ┃
┃ memray │ Tracing │ Memory │ memray run --live test.py │ ✓ ┃
┗━━━━━━━━━━━━━━┷━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━┛
# NumPy
Array manipulation mini-language. It can run up to one hundred times faster than the
equivalent Python code. An even faster alternative that runs on a GPU is called CuPy.
Shape is a tuple of dimension sizes. A 100x50 RGB image has shape (50, 100, 3).
Axis is an index of the dimension that gets aggregated. Leftmost dimension has index 0.
Summing the RGB image along axis 2 will return a greyscale image with shape (50, 100).
Indexing
Indexes should not be tuples because Python converts 'obj[i, j]' to 'obj[(i, j)]'!
':' returns a slice of all dimension's indexes. Omitted dimensions default to ':'.
Any value that is broadcastable to the indexed shape can be assigned to the selection.
Broadcasting
Set of rules by which NumPy functions operate on arrays of different sizes and/or dimensions.
1. If array shapes differ in length, left-pad the shorter shape with ones:
2. If any dimensions differ in size, expand the ones that have size 1 by duplicating their elements:
Example
For each point returns index of its nearest point ([0.1, 0.6, 0.8] => [1, 2, 1]):
Modes
Examples
Image Draw
# Audio
import wave
Bytes object contains a sequence of frames, each consisting of one or more samples.
In a stereo signal, the first sample of a frame belongs to the left channel.
Each sample consists of one or more bytes that, when converted to an integer, indicate
the displacement of a speaker membrane at a given moment.
If sample width is one byte, then the integer should be encoded unsigned.
For all other sizes, the integer should be encoded signed with little-endian byte order.
Sample Values
┏━━━━━━━━━━━┯━━━━━━━━━━━┯━━━━━━┯━━━━━━━━━━━┓
┃ sampwidth │ min │ zero │ max ┃
┠───────────┼───────────┼──────┼───────────┨
┃ 1 │ 0 │ 128 │ 255 ┃
┃ 2 │ -32768 │ 0 │ 32767 ┃
┃ 3 │ -8388608 │ 0 │ 8388607 ┃
┗━━━━━━━━━━━┷━━━━━━━━━━━┷━━━━━━┷━━━━━━━━━━━┛
def read_wav_file(filename):
def get_int(bytes_obj):
an_int = int.from_bytes(bytes_obj, 'little', signed=(sampwidth != 1))
return an_int - 128 * (sampwidth == 1)
with wave.open(filename, 'rb') as file:
sampwidth = file.getsampwidth()
frames = file.readframes(-1)
bytes_samples = (frames[i : i+sampwidth] for i in range(0, len(frames), sampwidth))
return [get_int(b) / pow(2, sampwidth * 8 - 1) for b in bytes_samples]
Write Float Samples to WAV File
Examples
Text to Speech
# Synthesizer
Plays Popcorn by Gershon Kingsley:
F = 44100
P1 = '71♩,69♪,,71♩,66♪,,62♩,66♪,,59♩,,'
P2 = '71♩,73♪,,74♩,73♪,,74♪,,71♪,,73♩,71♪,,73♪,,69♪,,71♩,69♪,,71♪,,67♪,,71♩,,'
get_pause = lambda seconds: it.repeat(0, int(seconds * F))
sin_f = lambda i, hz: math.sin(i * 2 * math.pi * hz / F)
get_wave = lambda hz, seconds: (sin_f(i, hz) for i in range(int(seconds * F)))
get_hz = lambda key: 8.176 * 2 ** (int(key) / 12)
parse_note = lambda note: (get_hz(note[:2]), 1/4 if '♩' in note else 1/8)
get_samples = lambda note: get_wave(*parse_note(note)) if note else get_pause(1/8)
samples_f = it.chain.from_iterable(get_samples(n) for n in f'{P1},{P1},{P2}'.split(','))
samples_i = array.array('h', (int(f * 30000) for f in samples_f))
simpleaudio.play_buffer(samples_i, 1, 2, F).wait_done()
# Pygame
pg.init()
screen = pg.display.set_mode((500, 500))
rect = pg.Rect(240, 240, 20, 20)
while not pg.event.get(pg.QUIT):
deltas = {pg.K_UP: (0, -20), pg.K_RIGHT: (20, 0), pg.K_DOWN: (0, 20), pg.K_LEFT: (-20, 0)}
for event in pg.event.get(pg.KEYDOWN):
dx, dy = deltas.get(event.key, (0, 0))
rect = rect.move((dx, dy))
screen.fill((0, 0, 0))
pg.draw.rect(screen, (255, 255, 255), rect)
pg.display.flip()
Rectangle
Surface
<Surf> = pg.display.set_mode((width, height)) # Opens new window and returns its surface.
<Surf> = pg.Surface((width, height)) # New RGB surface. RGBA if `flags=pg.SRCALPHA`.
<Surf> = pg.image.load(<path/file>) # Loads the image. Format depends on source.
<Surf> = pg.surfarray.make_surface(<np_array>) # Also `<np_arr> = surfarray.pixels3d(<Surf>)`.
<Surf> = <Surf>.subsurface(<Rect>) # Creates a new surface from the cutout.
Font
<Font> = pg.font.Font(<path/file>, size) # Loads TTF file. Pass None for default font.
<Surf> = <Font>.render(text, antialias, color) # Background color can be specified at the end.
Sound
import collections, dataclasses, enum, io, itertools as it, pygame as pg, urllib.request
from random import randint
def main():
def get_screen():
pg.init()
return pg.display.set_mode((W*16, H*16))
def get_images():
url = 'https://gto76.github.io/python-cheatsheet/web/mario_bros.png'
img = pg.image.load(io.BytesIO(urllib.request.urlopen(url).read()))
return [img.subsurface(get_rect(x, 0)) for x in range(img.get_width() // 16)]
def get_mario():
Mario = dataclasses.make_dataclass('Mario', 'rect spd facing_left frame_cycle'.split())
return Mario(get_rect(1, 1), P(0, 0), False, it.cycle(range(3)))
def get_tiles():
border = [(x, y) for x in range(W) for y in range(H) if x in [0, W-1] or y in [0, H-1]]
platforms = [(randint(1, W-2), randint(2, H-2)) for _ in range(W*H // 10)]
return [get_rect(x, y) for x, y in border + platforms]
def get_rect(x, y):
return pg.Rect(x*16, y*16, 16, 16)
run(get_screen(), get_images(), get_mario(), get_tiles())
if __name__ == '__main__':
main()
# Pandas
Series
┏━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
┃ │ 'sum' │ ['sum'] │ {'s': 'sum'} ┃
┠───────────────┼─────────────┼─────────────┼───────────────┨
┃ sr.apply(…) │ 5 │ sum 5 │ s 5 ┃
┃ sr.agg(…) │ │ │ ┃
┗━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
┏━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
┃ │ 'rank' │ ['rank'] │ {'r': 'rank'} ┃
┠───────────────┼─────────────┼─────────────┼───────────────┨
┃ sr.apply(…) │ │ rank │ ┃
┃ sr.agg(…) │ x 1 │ x 1 │ r x 1 ┃
┃ │ y 2 │ y 2 │ y 2 ┃
┗━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
<DF> = <DF>.set_index(column_key) # Replaces row keys with values from the column.
<DF> = <DF>.reset_index(drop=False) # Drops or moves row keys to column named index.
<DF> = <DF>.sort_index(ascending=True) # Sorts rows by row keys. Use `axis=1` for cols.
<DF> = <DF>.sort_values(column_key/s) # Sorts rows by passed column/s. Also `axis=1`.
┏━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ │ 'outer' │ 'inner' │ 'left' │ Description ┃
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
┃ l.merge(r, on='y', │ x y z │ x y z │ x y z │ Merges on column if 'on' ┃
┃ how=…) │ 0 1 2 . │ 3 4 5 │ 1 2 . │ or 'left/right_on' are ┃
┃ │ 1 3 4 5 │ │ 3 4 5 │ set, else on shared cols.┃
┃ │ 2 . 6 7 │ │ │ Uses 'inner' by default. ┃
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
┃ l.join(r, lsuffix='l', │ x yl yr z │ │ x yl yr z │ Merges on row keys. ┃
┃ rsuffix='r', │ a 1 2 . . │ x yl yr z │ 1 2 . . │ Uses 'left' by default. ┃
┃ how=…) │ b 3 4 4 5 │ 3 4 4 5 │ 3 4 4 5 │ If r is a Series, it is ┃
┃ │ c . . 6 7 │ │ │ treated as a column. ┃
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
┃ pd.concat([l, r], │ x y z │ y │ │ Adds rows at the bottom. ┃
┃ axis=0, │ a 1 2 . │ 2 │ │ Uses 'outer' by default. ┃
┃ join=…) │ b 3 4 . │ 4 │ │ A Series is treated as a ┃
┃ │ b . 4 5 │ 4 │ │ column. To add a row use ┃
┃ │ c . 6 7 │ 6 │ │ pd.concat([l, DF([sr])]).┃
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
┃ pd.concat([l, r], │ x y y z │ │ │ Adds columns at the ┃
┃ axis=1, │ a 1 2 . . │ x y y z │ │ right end. Uses 'outer' ┃
┃ join=…) │ b 3 4 4 5 │ 3 4 4 5 │ │ by default. A Series is ┃
┃ │ c . . 6 7 │ │ │ treated as a column. ┃
┠────────────────────────┼───────────────┼────────────┼────────────┼──────────────────────────┨
┃ l.combine_first(r) │ x y z │ │ │ Adds missing rows and ┃
┃ │ a 1 2 . │ │ │ columns. Also updates ┃
┃ │ b 3 4 5 │ │ │ items that contain NaN. ┃
┃ │ c . 6 7 │ │ │ Argument r must be a DF. ┃
┗━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━┛
DataFrame — Aggregate, Transform, Map:
All operations operate on columns by default. Pass 'axis=1' to process the rows instead.
┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
┃ │ 'sum' │ ['sum'] │ {'x': 'sum'} ┃
┠─────────────────┼─────────────┼─────────────┼───────────────┨
┃ df.apply(…) │ x 4 │ x y │ x 4 ┃
┃ df.agg(…) │ y 6 │ sum 4 6 │ ┃
┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
┃ │ 'rank' │ ['rank'] │ {'x': 'rank'} ┃
┠─────────────────┼─────────────┼─────────────┼───────────────┨
┃ df.apply(…) │ │ x y │ ┃
┃ df.agg(…) │ x y │ rank rank │ x ┃
┃ df.transform(…) │ a 1 1 │ a 1 1 │ a 1 ┃
┃ │ b 2 2 │ b 2 2 │ b 2 ┃
┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
GroupBy
Object that groups together rows of a dataframe based on the value of the passed column.
┏━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┓
┃ │ 'sum' │ 'rank' │ ['rank'] │ {'x': 'rank'} ┃
┠─────────────────┼─────────────┼─────────────┼─────────────┼───────────────┨
┃ gb.agg(…) │ x y │ │ x y │ ┃
┃ │ z │ x y │ rank rank │ x ┃
┃ │ 3 1 2 │ a 1 1 │ a 1 1 │ a 1 ┃
┃ │ 6 11 13 │ b 1 1 │ b 1 1 │ b 1 ┃
┃ │ │ c 2 2 │ c 2 2 │ c 2 ┃
┠─────────────────┼─────────────┼─────────────┼─────────────┼───────────────┨
┃ gb.transform(…) │ x y │ x y │ │ ┃
┃ │ a 1 2 │ a 1 1 │ │ ┃
┃ │ b 11 13 │ b 1 1 │ │ ┃
┃ │ c 11 13 │ c 2 2 │ │ ┃
┗━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┛
Rolling
# Plotly
Displays a line chart of total coronavirus deaths per million grouped by continent:
Continent
2500 South America
Total Deaths per Million
North America
2000 Europe
Asia
1500 Africa
Oceania
1000
500
0
Apr 2020 Jul 2020 Oct 2020 Jan 2021 Apr 2021 Jul 2021 Oct 2021
Date
covid = pd.read_csv('https://covid.ourworldindata.org/data/owid-covid-data.csv',
usecols=['iso_code', 'date', 'total_deaths', 'population'])
continents = pd.read_csv('https://gist.githubusercontent.com/stevewithington/20a69c0b6d2ff'
'846ea5d35e5fc47f26c/raw/country-and-continent-codes-list-csv.csv',
usecols=['Three_Letter_Country_Code', 'Continent_Name'])
df = pd.merge(covid, continents, left_on='iso_code', right_on='Three_Letter_Country_Code')
df = df.groupby(['Continent_Name', 'date']).sum().reset_index()
df['Total Deaths per Million'] = df.total_deaths * 1e6 / df.population
df = df[df.date > '2020-03-14']
df = df.rename({'date': 'Date', 'Continent_Name': 'Continent'}, axis='columns')
ex.line(df, x='Date', y='Total Deaths per Million', color='Continent').show()
Displays a multi-axis line chart of total coronavirus cases and changes in prices of Bitcoin, Dow Jones and gold:
Total Cases
250M Bitcoin
600
Dow Jones
200M Gold
Total Cases
400
150M
%
100M
200
50M
0 0
Apr 2020 Jul 2020 Oct 2020 Jan 2021 Apr 2021 Jul 2021 Oct 2021
def main():
covid, bitcoin, gold, dow = scrape_data()
display_data(wrangle_data(covid, bitcoin, gold, dow))
def scrape_data():
def get_covid_cases():
url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'
df = pd.read_csv(url, usecols=['location', 'date', 'total_cases'])
return df[df.location == 'World'].set_index('date').total_cases
def get_ticker(symbol):
url = (f'https://query1.finance.yahoo.com/v7/finance/download/{symbol}?'
'period1=1579651200&period2=9999999999&interval=1d&events=history')
df = pd.read_csv(url, usecols=['Date', 'Close'])
return df.set_index('Date').Close
out = get_covid_cases(), get_ticker('BTC-USD'), get_ticker('GC=F'), get_ticker('^DJI')
return map(pd.Series.rename, out, ['Total Cases', 'Bitcoin', 'Gold', 'Dow Jones'])
def display_data(df):
figure = go.Figure()
for col_name in reversed(df.columns):
yaxis = 'y1' if col_name == 'Total Cases' else 'y2'
trace = go.Scatter(x=df.index, y=df[col_name], name=col_name, yaxis=yaxis)
figure.add_trace(trace)
figure.update_layout(
yaxis1=dict(title='Total Cases', rangemode='tozero'),
yaxis2=dict(title='%', rangemode='tozero', overlaying='y', side='right'),
legend=dict(x=1.08),
width=944,
height=423
)
figure.show()
if __name__ == '__main__':
main()
# PySimpleGUI
Definitions:
All 'cdef' definitions are optional, but they contribute to the speed-up.
Script needs to be saved with a 'pyx' extension.
Virtual Environments
#!/usr/bin/env python3
#
# Usage: .py
#
def main():
pass
###
## UTIL
#
def read_file(filename):
with open(filename, encoding='utf-8') as file:
return file.readlines()
if __name__ == '__main__':
main()