Pandas
Pandas
1. Your first NumPy array # Create an array of integers from one to ten
one_to_ten = np.arange(1, 11)
# edited/added
# Create your scatterplot
import numpy as np
plt.scatter(one_to_ten, doubling_array)
sudoku_list = np.load('sudoku_game.npy')
plt.show()
# Import NumPy
import numpy as np 4. Array dimensionality
# Convert sudoku_list into an array
# edited/added
sudoku_array = np.array(sudSoku_list)
sudoku_solution = np.load('sudoku_solution.npy')
# Print the type of sudoku_array
sudoku_list = np.load('sudoku_game.npy')
print(type(sudoku_array))
sudoku_game = np.array(sudoku_list)
2. Creating arrays from scratch # Create the game_and_solution 3D array
game_and_solution = np.array([sudoku_game, sudoku_solution])
# Create an array of zeros which has four columns and two rows
# Print game_and_solution
zero_array = np.zeros((2, 4))
print(game_and_solution)
print(zero_array)
# Create an array of random floats which has six columns and three rows 5. The fourth dimension
random_array = np.random.random((3, 6))
# edited/added
print(random_array)
new_sudoku_game = np.load('new_sudoku_game.npy')
3. A range array new_sudoku_solution = np.load('new_sudoku_solution.npy')
game_and_solution = np.load('game_and_solution.npy')
doubling_array = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
# Create a second 3D array of another game and its solution
plt.scatter(x_values, y_values)
new_game_and_solution = np.array([new_sudoku_game,
plt.show()
new_sudoku_solution])
# edited/addedfrom matplotlib import pyplot as plt
# Create a 4D array of both game and solution 3D arrays zero_array = np.zeros((3, 2))
games_and_solutions = np.array([game_and_solution, # Print the data type of zero_array
new_game_and_solution]) print(zero_array.dtype)
# Print the shape of your 4D array # Create an array of zeros with three rows and two columns
print(games_and_solutions.shape) zero_array = np.zeros((3, 2))
# Print the data type of sudoku_game # Create an array of the first 100 trunk diameters from tree_census
print(sudoku_game.dtype) hundred_diameters = tree_census[:100, 2]
# Change the data type of sudoku_game to int8 print(hundred_diameters)
small_sudoku_game = sudoku_game.astype(np.int8) # Create an array of trunk diameters with even row indices from 50 to 100
inclusive
# Print the data type of small_sudoku_game
every_other_diameter = tree_census[50:101:2, 2]
print(small_sudoku_game.dtype)
print(every_other_diameter)
10. Slicing and indexing trees
12. Sorting trees
# edited/added
# Extract trunk diameters information and sort from smallest to largest
tree_census = np.load('tree_census.npy')
sorted_trunk_diameters = np.sort(tree_census[:, 2])
# Select all rows of block ID data from the second column
print(sorted_trunk_diameters)
block_ids = tree_census[:, 1]
# Print the first five block_ids 13. Filtering with masks
print(block_ids[:5])
# Create an array which contains row data on the largest tree in tree_census
# Select all rows of block ID data from the second column
largest_tree_data = tree_census[tree_census[:, 2] == 51]
block_ids = tree_census[:, 1]
print(largest_tree_data)
# Select the tenth block ID from block_ids
# Create an array which contains row data on the largest tree in tree_census
tenth_block_id = block_ids[9]
largest_tree_data = tree_census[tree_census[:, 2] == 51]
print(tenth_block_id)
print(largest_tree_data)
# Select all rows of block ID data from the second column
# Slice largest_tree_data to get only the block id
block_ids = tree_census[:, 1]
largest_tree_block_id = largest_tree_data[:, 1]
# Select five block IDs from block_ids starting with the tenth ID
print(largest_tree_block_id)
block_id_slice = block_ids[9:14]
# Create an array which contains row data on the largest tree in tree_census
print(block_id_slice)
largest_tree_data = tree_census[tree_census[:, 2] == 51]
print(largest_tree_data) Compatible along the first axis
# Create the block_313879 array containing trees on block 313879 17. Adding rows
block_313879 = tree_census[tree_census[:, 1] == 313879] # edited/added
print(block_313879) new_trees = np.array([[1211, 227386, 20, 0], [1212, 227386, 8, 0]])
# Create an array of row_indices for trees on block 313879 # Print the shapes of tree_census and new_trees
row_indices = np.where(tree_census[:, 1] == 313879) print(tree_census.shape, new_trees.shape)
# Create an array which only contains data for trees on block 313879 # Print the shapes of tree_census and new_trees
block_313879 = tree_census[row_indices] print(tree_census.shape, new_trees.shape)
print(block_313879) # Add rows to tree_census which contain data for the new trees
updated_tree_census = np.concatenate((tree_census, new_trees))
15. Creating arrays from conditions
print(updated_tree_census)
# Create and print a 1D array of tree and stump diameters
trunk_stump_diameters = np.where(tree_census[:, 2] == 0, tree_census[:, 3], 18. Adding columns
tree_census[:, 2])
# Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters)
print(trunk_stump_diameters.shape, tree_census.shape)
16. Compatible or not? # Print the shapes of tree_census and trunk_stump_diameters
print(trunk_stump_diameters.shape, tree_census.shape) 20. Sales totals
# Save the indices of the trees on block 313879 # Create the 1D array avg_monthly_sales
private_block_indices = np.where(tree_census[:, 1] == 313879) avg_monthly_sales = monthly_sales.mean(axis=1)
# Delete the stump diameter column from tree_census print(avg_monthly_sales)
tree_census_no_stumps = np.delete(tree_census, 3, axis=1) # Create the 1D array avg_monthly_sales
# Save the indices of the trees on block 313879 avg_monthly_sales = monthly_sales.mean(axis=1)
private_block_indices = np.where(tree_census[:,1] == 313879) print(avg_monthly_sales)
# Delete the rows for trees on block 313879 from tree_census_no_stumps # Plot avg_monthly_sales by month
tree_census_clean = np.delete(tree_census_no_stumps, plt.plot(np.arange(1, 13), avg_monthly_sales, label="Average sales across
private_block_indices, axis=0) industries")
# Print the shape of tree_census_clean # Plot department store sales by month
print(tree_census_clean.shape)
plt.plot(np.arange(1, 13), monthly_sales[:, 2], label="Department store tax_collected = monthly_sales * 0.05
sales") print(tax_collected)
plt.legend() # Create an array of sales revenue plus tax collected by industry and month
plt.show() total_tax_and_revenue = tax_collected + monthly_sales
# Find cumulative monthly sales for each industry 24. Projecting sales
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) # edited/added
print(cumulative_monthly_industry_sales) monthly_industry_multipliers = np.load('monthly_industry_multipliers.npy')
# Find cumulative monthly sales for each industry # Create an array of monthly projected sales for all industries
cumulative_monthly_industry_sales = monthly_sales.cumsum(axis=0) projected_monthly_sales = monthly_sales * monthly_industry_multipliers
print(cumulative_monthly_industry_sales) print(projected_monthly_sales)
# Plot each industry's cumulative sales by month as separate lines # Create an array of monthly projected sales for all industries
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 0], projected_monthly_sales = monthly_sales * monthly_industry_multipliers
label="Liquor Stores")
print(projected_monthly_sales)
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 1],
label="Restaurants") # Graph current liquor store sales and projected liquor store sales by month
plt.plot(np.arange(1, 13), cumulative_monthly_industry_sales[:, 2], plt.plot(np.arange(1, 13), monthly_sales[:, 0], label="Current liquor store
label="Department stores") sales")
plt.legend() plt.plot(np.arange(1, 13), projected_monthly_sales[:, 0], label="Projected
liquor store sales")
plt.show()
plt.legend()
23. Tax calculations plt.show()
# Reduce every value in rgb_array by 50 percent # Split monthly_sales into quarterly data
darker_rgb_array = rgb_array * 0.5 q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
# Convert darker_rgb_array into an array of integers print(q1_sales)
darker_rgb_int_array = darker_rgb_array.astype(np.int8) # Split monthly_sales into quarterly data
plt.imshow(darker_rgb_int_array) q1_sales, q2_sales, q3_sales, q4_sales = np.split(monthly_sales, 4)
plt.show() print(q1_sales)
# Save darker_rgb_int_array to an .npy file called darker_monet.npywith # Stack the four quarterly sales arrays
open("darker_monet.npy", "wb") as f: quarterly_sales = np.stack([q1_sales, q2_sales, q3_sales, q4_sales])
np.save(f, darker_rgb_int_array)
print(quarterly_sales) # Print the shapes of blue_array and emphasized_blue_array_2D
print(blue_array.shape, emphasized_blue_array_2D.shape)
35. Splitting RGB data
# Reshape red_array and green_array
# Split rgb_array into red, green, and blue arrays red_array_2D = red_array.reshape((675, 843)) # edited/added
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) green_array_2D = green_array.reshape((675, 843)) # edited/added
# Split rgb_array into red, green, and blue arrays # Print the shapes of blue_array and emphasized_blue_array_2D
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) print(blue_array.shape, emphasized_blue_array_2D.shape)
# Create emphasized_blue_array # Reshape red_array and green_array
emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255, red_array_2D = red_array.reshape((675, 843)) # edited/added
blue_array)
green_array_2D = green_array.reshape((675, 843)) # edited/added
# Print the shape of emphasized_blue_array
# Stack red_array_2D, green_array_2D, and emphasized_blue_array_2D
print(emphasized_blue_array.shape)
emphasized_blue_monet = np.stack([red_array_2D, green_array_2D,
# Split rgb_array into red, green, and blue arrays emphasized_blue_array_2D], axis=2)
red_array, green_array, blue_array = np.split(rgb_array, 3, axis=2) plt.imshow(emphasized_blue_monet)
# Create emphasized_blue_array plt.show()
emphasized_blue_array = np.where(blue_array > blue_array.mean(), 255,
blue_array)
# Print the shape of emphasized_blue_array
print(emphasized_blue_array.shape)
# Remove the trailing dimension from emphasized_blue_array
emphasized_blue_array_2D = emphasized_blue_array.reshape((675, 843)) #
edited/added