0% found this document useful (0 votes)
33 views9 pages

Pandas

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
33 views9 pages

Pandas

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 9

INTRODUCTION TO NUMPY doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

1. Your first NumPy array # Create an array of integers from one to ten
one_to_ten = np.arange(1, 11)
# edited/added
# Create your scatterplot
import numpy as np
plt.scatter(one_to_ten, doubling_array)
sudoku_list = np.load('sudoku_game.npy')
plt.show()
# Import NumPy
import numpy as np 4. Array dimensionality
# Convert sudoku_list into an array
# edited/added
sudoku_array = np.array(sudSoku_list)
sudoku_solution = np.load('sudoku_solution.npy')
# Print the type of sudoku_array
sudoku_list = np.load('sudoku_game.npy')
print(type(sudoku_array))
sudoku_game = np.array(sudoku_list)
2. Creating arrays from scratch # Create the game_and_solution 3D array
game_and_solution = np.array([sudoku_game, sudoku_solution])
# Create an array of zeros which has four columns and two rows
# Print game_and_solution
zero_array = np.zeros((2, 4))
print(game_and_solution)
print(zero_array)
# Create an array of random floats which has six columns and three rows 5. The fourth dimension
random_array = np.random.random((3, 6))
# edited/added
print(random_array)
new_sudoku_game = np.load('new_sudoku_game.npy')
3. A range array new_sudoku_solution = np.load('new_sudoku_solution.npy')
game_and_solution = np.load('game_and_solution.npy')
doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
# Create a second 3D array of another game and its solution
plt.scatter(x_values, y_values)
new_game_and_solution = np.array([new_sudoku_game,
plt.show()
new_sudoku_solution])
# edited/addedfrom matplotlib import pyplot as plt
# Create a 4D array of both game and solution 3D arrays zero_array = np.zeros((3, 2))
games_and_solutions = np.array([game_and_solution, # Print the data type of zero_array
new_game_and_solution]) print(zero_array.dtype)
# Print the shape of your 4D array # Create an array of zeros with three rows and two columns
print(games_and_solutions.shape) zero_array = np.zeros((3, 2))

6. Flattening and reshaping # Print the data type of zero_array


print(zero_array.dtype)
# edited/added
# Create a new array of int32 zeros with three rows and two columns
sudoku_game = np.load('sudoku_game_new.npy')
zero_int_array = np.zeros((3, 2), dtype=np.int32)
# Flatten sudoku_game
# Print the data type of zero_int_array
flattened_game = sudoku_game.flatten()
print(zero_int_array.dtype)
# Print the shape of flattened_game
print(flattened_game.shape) 8. Anticipating data types
# Flatten sudoku_game A string data type
flattened_game = sudoku_game.flatten()
 np.array([78.988, "NumPy", True])
# Print the shape of flattened_game  np.array([9, 1.12, True]).astype("<U5")
print(flattened_game.shape)
An integer data type
# Reshape flattened_game back to a nine by nine array
reshaped_game = flattened_game.reshape((9, 9))  np.array([34.62, 70.13, 9]).astype(np.int64)
 np.array([45.67, True], dtype=np.int8)
# Print sudoku_game and reshaped_game
print(sudoku_game) A float data type
print(reshaped_game)
 np.array([[6, 15.7], [True, False]])
 np.random.random((4, 5))
7. NumPy data types
9. A smaller sudoku game
The dtype argument
# Print the data type of sudoku_game
# Create an array of zeros with three rows and two columns
print(sudoku_game.dtype) 11. Stepping into 2D

# Print the data type of sudoku_game # Create an array of the first 100 trunk diameters from tree_census
print(sudoku_game.dtype) hundred_diameters = tree_census[:100, 2]
# Change the data type of sudoku_game to int8 print(hundred_diameters)
small_sudoku_game = sudoku_game.astype(np.int8) # Create an array of trunk diameters with even row indices from 50 to 100
inclusive
# Print the data type of small_sudoku_game
every_other_diameter = tree_census[50:101:2, 2]
print(small_sudoku_game.dtype)
print(every_other_diameter)
10. Slicing and indexing trees
12. Sorting trees
# edited/added
# Extract trunk diameters information and sort from smallest to largest
tree_census = np.load('tree_census.npy')
sorted_trunk_diameters = np.sort(tree_census[:, 2])
# Select all rows of block ID data from the second column
print(sorted_trunk_diameters)
block_ids = tree_census[:, 1]
# Print the first five block_ids 13. Filtering with masks
print(block_ids[:5])
# Create an array which contains row data on the largest tree in tree_census
# Select all rows of block ID data from the second column
largest_tree_data = tree_census[tree_census[:, 2] == 51]
block_ids = tree_census[:, 1]
print(largest_tree_data)
# Select the tenth block ID from block_ids
# Create an array which contains row data on the largest tree in tree_census
tenth_block_id = block_ids[9]
largest_tree_data = tree_census[tree_census[:, 2] == 51]
print(tenth_block_id)
print(largest_tree_data)
# Select all rows of block ID data from the second column
# Slice largest_tree_data to get only the block id
block_ids = tree_census[:, 1]
largest_tree_block_id = largest_tree_data[:, 1]
# Select five block IDs from block_ids starting with the tenth ID
print(largest_tree_block_id)
block_id_slice = block_ids[9:14]
# Create an array which contains row data on the largest tree in tree_census
print(block_id_slice)
largest_tree_data = tree_census[tree_census[:, 2] == 51]
print(largest_tree_data) Compatible along the first axis

# Slice largest_tree_data to get only the block ID  (4, 2) and (6, 2)


largest_tree_block_id = largest_tree_data[:, 1]  (15, 5) and (100, 5)
print(largest_tree_block_id) Compatible along the second axis
# Create an array which contains row data on all trees with
largest_tree_block_id  (4, 2) and (4, 3)
trees_on_largest_tree_block = tree_census[tree_census[:, 1] ==
Not compatible
largest_tree_block_id]
print(trees_on_largest_tree_block)  (5, 2) and (7, 4)
 (4, 2) and (4,)
14. Fancy indexing vs. np.where()  (4, 2) and (2,)

# Create the block_313879 array containing trees on block 313879 17. Adding rows
block_313879 = tree_census[tree_census[:, 1] == 313879] # edited/added
print(block_313879) new_trees = np.array([[1211, 227386, 20, 0], [1212, 227386, 8, 0]])
# Create an array of row_indices for trees on block 313879 # Print the shapes of tree_census and new_trees
row_indices = np.where(tree_census[:, 1] == 313879) print(tree_census.shape, new_trees.shape)
# Create an array which only contains data for trees on block 313879 # Print the shapes of tree_census and new_trees
block_313879 = tree_census[row_indices] print(tree_census.shape, new_trees.shape)
print(block_313879) # Add rows to tree_census which contain data for the new trees
updated_tree_census = np.concatenate((tree_census, new_trees))
15. Creating arrays from conditions
print(updated_tree_census)
# Create and print a 1D array of tree and stump diameters
trunk_stump_diameters = np.where(tree_census[:, 2] == 0, tree_census[:, 3], 18. Adding columns
tree_census[:, 2])
# Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters)
print(trunk_stump_diameters.shape, tree_census.shape)
16. Compatible or not? # Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters.shape, tree_census.shape) 20. Sales totals

# Reshape trunk_stump_diameters # edited/added


reshaped_diameters = trunk_stump_diameters.reshape((1000, 1)) monthly_sales = np.load('monthly_sales.npy')
# Print the shapes of tree_census and trunk_stump_diameters # Create a 2D array of total monthly sales across industries
print(trunk_stump_diameters.shape, tree_census.shape) monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
# Reshape trunk_stump_diameters print(monthly_industry_sales)
reshaped_diameters = trunk_stump_diameters.reshape((1000, 1)) # Create a 2D array of total monthly sales across industries
# Concatenate reshaped_diameters to tree_census as the last column monthly_industry_sales = monthly_sales.sum(axis=1, keepdims=True)
concatenated_tree_census = np.concatenate((tree_census, print(monthly_industry_sales)
reshaped_diameters), axis=1) # Add this column as the last column in monthly_sales
print(concatenated_tree_census) monthly_sales_with_total = np.concatenate((monthly_sales,
monthly_industry_sales), axis=1)
19. Deleting with np.delete()
print(monthly_sales_with_total)
# Delete the stump diameter column from tree_census
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) 21. Plotting averages

# Save the indices of the trees on block 313879 # Create the 1D array avg_monthly_sales
private_block_indices = np.where(tree_census[:, 1] == 313879) avg_monthly_sales = monthly_sales.mean(axis=1)
# Delete the stump diameter column from tree_census print(avg_monthly_sales)
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) # Create the 1D array avg_monthly_sales
# Save the indices of the trees on block 313879 avg_monthly_sales = monthly_sales.mean(axis=1)
private_block_indices = np.where(tree_census[:,1] == 313879) print(avg_monthly_sales)
# Delete the rows for trees on block 313879 from tree_census_no_stumps # Plot avg_monthly_sales by month
tree_census_clean = np.delete(tree_census_no_stumps, plt.plot(np.arange(1, 13), avg_monthly_sales, label="Average sales across
private_block_indices, axis=0) industries")
# Print the shape of tree_census_clean # Plot department store sales by month
print(tree_census_clean.shape)
plt.plot(np.arange(1, 13), monthly_sales[:, 2], label="Department store tax_collected = monthly_sales * 0.05
sales") print(tax_collected)
plt.legend() # Create an array of sales revenue plus tax collected by industry and month
plt.show() total_tax_and_revenue = tax_collected + monthly_sales

22. Cumulative sales print(total_tax_and_revenue)

# Find cumulative monthly sales for each industry 24. Projecting sales
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) # edited/added
print(cumulative_monthly_industry_sales) monthly_industry_multipliers = np.load('monthly_industry_multipliers.npy')
# Find cumulative monthly sales for each industry # Create an array of monthly projected sales for all industries
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) projected_monthly_sales = monthly_sales * monthly_industry_multipliers
print(cumulative_monthly_industry_sales) print(projected_monthly_sales)
# Plot each industry's cumulative sales by month as separate lines # Create an array of monthly projected sales for all industries
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 0], projected_monthly_sales = monthly_sales * monthly_industry_multipliers
label="Liquor Stores")
print(projected_monthly_sales)
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 1],
label="Restaurants") # Graph current liquor store sales and projected liquor store sales by month
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 2], plt.plot(np.arange(1, 13), monthly_sales[:, 0], label="Current liquor store
label="Department stores") sales")
plt.legend() plt.plot(np.arange(1, 13), projected_monthly_sales[:, 0], label="Projected
liquor store sales")
plt.show()
plt.legend()
23. Tax calculations plt.show()

# Create an array of tax collected by industry and month


25. Vectorizing .upper()
tax_collected = monthly_sales * 0.05
# edited/added
print(tax_collected)
names = np.array([["Izzy", "Monica", "Marvin"],
# Create an array of tax collected by industry and month
["Weber", "Patel", "Hernandez"]]) print(monthly_growth_2D * monthly_sales)
# Vectorize the .upper() string method
28. Broadcasting across rows
vectorized_upper = np.vectorize(str.upper)
# Apply vectorized_upper to the names array # Find the mean sales projection multiplier for each industry
uppercase_names = vectorized_upper(names) mean_multipliers = monthly_industry_multipliers.mean(axis=0)
print(uppercase_names) print(mean_multipliers)
# Find the mean sales projection multiplier for each industry
26. Broadcastable or not? mean_multipliers = monthly_industry_multipliers.mean(axis=0)
Broadcastable print(mean_multipliers)
 (3, 4) and (1, 4) # Print the shapes of mean_multipliers and monthly_sales
 (3, 4) and (4, ) print(mean_multipliers.shape, monthly_sales.shape)
 (3, 4) and (3, 1)
# Find the mean sales projection multiplier for each industry
Not Broadcastable mean_multipliers = monthly_industry_multipliers.mean(axis=0)
print(mean_multipliers)
 (3, 4) and (1, 2)
 (3, 4) and (4, 1) # Print the shapes of mean_multipliers and monthly_sales
 (3, 4) and (3, ) print(mean_multipliers.shape, monthly_sales.shape)
27. Broadcasting across columns # Multiply each value by the multiplier for that industry
projected_sales = monthly_sales * mean_multipliers
# edited/added
print(projected_sales)
monthly_growth_rate = [1.01, 1.03, 1.03, 1.02, 1.05, 1.03, 1.06, 1.04, 1.03,
1.04, 1.02, 1.01]
29. Loading .npy files
# Convert monthly_growth_rate into a NumPy array
monthly_growth_1D = np.array(monthly_growth_rate) # Load the mystery_image.npy file with open("mystery_image.npy", "rb") as
f:
# Reshape monthly_growth_1D
rgb_array = np.load(f)
monthly_growth_2D = monthly_growth_1D.reshape((12, 1))
# Multiply each column in monthly_sales by monthly_growth_2D
plt.imshow(rgb_array)
plt.show() 32. Augmenting Monet

# Flip rgb_array so that it is the mirror image of the original


30. Getting help
mirrored_monet = np.flip(rgb_array, axis=1)
# Display the documentation for .astype() plt.imshow(mirrored_monet)
help(np.ndarray.astype) plt.show()
# Flip rgb_array so that it is upside down
31. Update and save
upside_down_monet = np.flip(rgb_array, axis=(0, 1))
# edited/added plt.imshow(upside_down_monet)
rgb_array = np.load('rgb_array.npy') plt.show()
# Reduce every value in rgb_array by 50 percent
darker_rgb_array = rgb_array * 0.5 33. Transposing your masterpiece
# Reduce every value in rgb_array by 50 percent # Transpose rgb_array
darker_rgb_array = rgb_array * 0.5 transposed_rgb = np.transpose(rgb_array, axes=(1, 0, 2))
# Convert darker_rgb_array into an array of integers plt.imshow(transposed_rgb)
darker_rgb_int_array = darker_rgb_array.astype(np.int8) plt.show()
plt.imshow(darker_rgb_int_array)
plt.show() 34. 2D split and stack

# Reduce every value in rgb_array by 50 percent # Split monthly_sales into quarterly data
darker_rgb_array = rgb_array * 0.5 q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
# Convert darker_rgb_array into an array of integers print(q1_sales)
darker_rgb_int_array = darker_rgb_array.astype(np.int8) # Split monthly_sales into quarterly data
plt.imshow(darker_rgb_int_array) q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
plt.show() print(q1_sales)
# Save darker_rgb_int_array to an .npy file called darker_monet.npywith # Stack the four quarterly sales arrays
open("darker_monet.npy", "wb") as f: quarterly_sales = np.stack([q1_sales, q2_sales, q3_sales, q4_sales])
np.save(f, darker_rgb_int_array)
print(quarterly_sales) # Print the shapes of blue_array and emphasized_blue_array_2D
print(blue_array.shape, emphasized_blue_array_2D.shape)
35. Splitting RGB data
# Reshape red_array and green_array
# Split rgb_array into red, green, and blue arrays red_array_2D = red_array.reshape((675, 843)) # edited/added
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) green_array_2D = green_array.reshape((675, 843)) # edited/added
# Split rgb_array into red, green, and blue arrays # Print the shapes of blue_array and emphasized_blue_array_2D
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) print(blue_array.shape, emphasized_blue_array_2D.shape)
# Create emphasized_blue_array # Reshape red_array and green_array
emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255, red_array_2D = red_array.reshape((675, 843)) # edited/added
blue_array)
green_array_2D = green_array.reshape((675, 843)) # edited/added
# Print the shape of emphasized_blue_array
# Stack red_array_2D, green_array_2D, and emphasized_blue_array_2D
print(emphasized_blue_array.shape)
emphasized_blue_monet = np.stack([red_array_2D, green_array_2D,
# Split rgb_array into red, green, and blue arrays emphasized_blue_array_2D], axis=2)
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) plt.imshow(emphasized_blue_monet)
# Create emphasized_blue_array plt.show()
emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255,
blue_array)
# Print the shape of emphasized_blue_array
print(emphasized_blue_array.shape)
# Remove the trailing dimension from emphasized_blue_array
emphasized_blue_array_2D = emphasized_blue_array.reshape((675, 843)) #
edited/added

36. Stacking RGB data

# Print the shapes of blue_array and emphasized_blue_array_2D


print(blue_array.shape, emphasized_blue_array_2D.shape)

You might also like