The Error Code
The Error Code
hist(merged_data['two_rightmost_digits']
plt.ylabel("Frequency")
)(plt.tight_layout
)(plt.show
Create subplots for each store, showing the histogram of rightmost digits #
)(stores = merged_data['STORE'].unique
,plt.hist(store_data['rightmost_digit']
plt.title(f"Store {store}")
plt.xlabel("Rightmost Digit")
plt.ylabel("Frequency")
)(plt.tight_layout
)(plt.show
)
(avg_price_not_ends_in_9 = merged_data[merged_data['ends_in_9'] == 0]['PRICE'].mean
plt.figure(figsize=(8, 6))
)(plt.tight_layout
)(plt.show
avg_price_ends_in_9, avg_price_not_ends_in_9
plt.figure(figsize=(8, 6))
)(plt.tight_layout
)(plt.show
avg_price_ends_in_99, avg_price_not_ends_in_99
Calculate average prices for products ending in 9 vs not ending in 9 for each store #
][ = store_averages
avg_not_ends_in_9 = store_data[store_data['ends_in_9']
)(mean.]'PRICE'[]0 ==
{(store_averages.append
,Store': store'
)}
store_averages_df = pd.DataFrame(store_averages)
)(axes = axes.flatten
ax = axes[idx]
ax.set_title(f"Store {int(row['Store'])}")
ax.axis('off')
)(plt.tight_layout
)(plt.show
merged_data['log_PRICE'] = np.log(merged_data['PRICE'])
print(model.summary())
coefficient = model.params['ends_in_9']
std_error = model.bse['ends_in_9']
p_value = model.pvalues['ends_in_9']
print(f"p-value: {p_value}")
:else
print("The price difference between products ending in 9 and those not ending in 9 is
NOT statistically significant.")
merged_data['log_PRICE'] = np.log(merged_data['PRICE'])
)(model_99_summary = model_99.summary
model_99_summary
][ = store_results_log_price
Loop through each store to calculate the regression for log(PRICE) ~ ends_in_9 #
X_store = sm.add_constant(store_data['ends_in_9'])
y_store = np.log(store_data['PRICE'])
,STORE": store"
p-value": model_store.pvalues['ends_in_9']"
)}
store_results_df = pd.DataFrame(store_results_log_price)
store_results_df
merged_data['log_price'] = np.log(merged_data['PRICE'])
merged_data['log_move'] = np.log(merged_data['MOVE'])
][ = store_results_log_price_ends_in_99
Loop through each store to calculate the regression for log(PRICE) ~ ends_in_99 #
X_store = sm.add_constant(store_data['ends_in_99'])
y_store = np.log(store_data['PRICE'])
{(store_results_log_price_ends_in_99.append
,STORE": store"
)}
store_results_ends_in_99_df = pd.DataFrame(store_results_log_price_ends_in_99)
store_results_ends_in_99_df
(merged_data['log_PRICE'] = pd.to_numeric
)'merged_data['log_PRICE'], errors='coerce
(merged_data['log_MOVE'] = pd.to_numeric
)'merged_data['log_MOVE'], errors='coerce
(merged_data['ends_in_9'] = pd.to_numeric
)'merged_data['ends_in_9'], errors='coerce
(store_dummies = pd.get_dummies
y = merged_data['log_MOVE']
)(X = X.dropna
y = y.loc[X.index]
X = sm.add_constant(X)
log_price_coeff = model.params['log_PRICE']
log_price_pvalue = model.pvalues['log_PRICE']
ends_in_9_coeff = model.params['ends_in_9']
ends_in_9_pvalue = model.pvalues['ends_in_9']
print(model.summary())
Interpret results #
(merged_data['log_PRICE'] = pd.to_numeric
)'merged_data['log_PRICE'], errors='coerce
(merged_data['log_MOVE'] = pd.to_numeric
)'merged_data['log_MOVE'], errors='coerce
(merged_data['ends_in_99'] = pd.to_numeric
)'merged_data['ends_in_99'], errors='coerce
(store_dummies = pd.get_dummies
(X = pd.concat
y = merged_data['log_MOVE']
Drop rows with NaN values (resulting from non-numeric conversions) #
)(X = X.dropna
y = y.loc[X.index]
log_price_coeff = model.params['log_PRICE']
log_price_pvalue = model.pvalues['log_PRICE']
ends_in_99_coeff = model.params['ends_in_99']
ends_in_99_pvalue = model.pvalues['ends_in_99']
print(model.summary())
Interpret results #
Interpretation #
:else
][ = store_results
y_store = store_data['log_MOVE']
X_store = sm.add_constant(X_store)
price_coeff = model_store.params['log_PRICE']
price_se = model_store.bse['log_PRICE']
price_pvalue = model_store.pvalues['log_PRICE']
ends_in_9_coeff = model_store.params['ends_in_9']
ends_in_9_se = model_store.bse['ends_in_9']
ends_in_9_pvalue = model_store.pvalues['ends_in_9']
,Store': store'
)}
store_results_df = pd.DataFrame(store_results)
store_results_df
][ = store_results_ends_in_99
y_store = store_data['log_MOVE']
X_store = sm.add_constant(X_store)
Fit the regression model #
price_coeff = model_store.params['log_PRICE']
price_se = model_store.bse['log_PRICE']
price_pvalue = model_store.pvalues['log_PRICE']
ends_in_99_coeff = model_store.params['ends_in_99']
ends_in_99_se = model_store.bse['ends_in_99']
ends_in_99_pvalue = model_store.pvalues['ends_in_99']
{(store_results_ends_in_99.append
,Store': store'
)}
store_results_ends_in_99_df = pd.DataFrame(store_results_ends_in_99_df)
store_results_ends_in_99_df
"product_data_path = r"C:\Users\Hila\Python\Final_task\upcsdr.xlsx
product_data = pd.read_excel(product_data_path)
Merge datasets #
(merged_with_products = pd.merge
print("The 'DESCRIP' column is missing. Please verify column names in the product_data
file.")
:else
)(product_counts = merged_with_products['DESCRIP'].value_counts
)(top_products = product_counts.head(4).reset_index
print(top_products)
(product_counts = merged_data.groupby
size().reset_index(name='Frequency').)]'DESCRIP', 'UPC', 'SIZE'[
PRICE'].mean().reset_index(name='Average Price')'
print(top_frequent_products)
...Loading datasets
Important
Figures are displayed in the Plots pane by default. To make them also appear inline in the
.console, you need to uncheck "Mute inline plotting" under the options menu of Plots
AppData\Local\Temp\ipykernel_24960\2575356717.py:45: \\הילהC:\Users
:SettingWithCopyWarning
AppData\Local\Temp\ipykernel_24960\2575356717.py:67: \\הילהC:\Users
:SettingWithCopyWarning
AppData\Local\Temp\ipykernel_24960\2575356717.py:89: \\הילהC:\Users
:SettingWithCopyWarning
=============================================
=================================
Df Model: 1
=============================================
=================================
------------------------------------------------------------------------------
=============================================
=================================
Omnibus: 16.285 Durbin-Watson: 1.999
=============================================
=================================
:Notes
.Standard Errors assume that the covariance matrix of the errors is correctly specified ]1[
p-value: 1.4216469146422434e-21
The price difference between products ending in 9 and those not ending in 9 is statistically
.significant
return self._engine.get_loc(casted_key)
File pandas\\_libs\\hashtable_class_helper.pxi:7081 in
pandas._libs.hashtable.PyObjectHashTable.get_item
File pandas\\_libs\\hashtable_class_helper.pxi:7089 in
pandas._libs.hashtable.PyObjectHashTable.get_item
'KeyError: 'log_MOVE
:The above exception was the direct cause of the following exception
)'merged_data['log_MOVE'], errors='coerce
indexer = self.columns.get_loc(key)
'KeyError: 'log_MOVE