My Own Cheatsheet
My Own Cheatsheet
SP_hist.to_csv("/Users/maria/Desktop/csv_files/SP_hist.csv",index=True,
sep=',')
#import the data from the systembolaget API documentation (just down-
loaded the JSON)
with open(("/Users/maria/OneDrive/Documents_old/CodeOpDocs/Milestones/
Group_Project/Drinks_data/assortment.json"), 'r', encoding ='utf-8') as
our_file:
our_file_as_dictionary = json.load(our_file)
# Print the loaded data
print(our_file_as_dictionary)
LISTS
combined= []
if stock in SNP['Symbol'].values:
combined.append(stock)
print(combined)
reporting_owner.groupby('ACCESSION_NUMBER').agg({'RPTOWNERNAME':
','.join})
for i in removal_wines:
full_wine = full_wine[full_wine['Headline']!=i]
TUPLES
#pull out data from tuples
vivino_data['match_name']=vivino_data['best_match'].str[0]
vivino_data['match_percentage']= vivino_data['best_match'].str[1]
vivino_data['match_reference']= vivino_data['best_match'].str[2]
DICTIONARIES
Using a dictionary to rename
DataFrame
#make a copy of the dataframe
piv_table_control=piv_table.copy()
piv_table_control['transacted_avg']=np.nanwor
DF singular calculations
#This is an audit that the number of BAC shares really is that large
result=combo.groupby('ISSUERTRADINGSYMBOL')['TRANS_SHARES'].sum()
bac_count
combo['TRANS_CODE'].value_counts(True)
piv_table=piv_table.dropna(how='any',subset=['STOCK_t0'])
combo['RPTOWNERNAME']=combo['RPTOWNERNAME'].str.replace('BERKSHIRE
HATHAWAY INC,BUFFETT WARREN E','BUFFETT WARREN E,BERKSHIRE HATHAWAY
INC')
combo
DF Column calculations
#make a field to say whether data in SP500 in the submission data
combo['is_sp500']= combo['ISSUERTRADINGSYMBOL'].isin(combined)
# Calculating cost of transacted shares per line item (to be used later
for getting avg cost of transacted stock)
combo['value_transacted']= combo['TRANS_SHARES'] *
combo['TRANS_PRICEPERSHARE']
Renaming Columns
stock_prices=stock_prices.rename(columns={'variable':'ref_time','value'
:'price'})
piv_table['max']= np.amax(piv_table[['STOCK_-
1','STOCK_t0','STOCK_t1','STOCK_t2','STOCK_t3','STOCK_t4','STOCK_t5']],
axis=1)
piv_table['min']= np.amin(piv_table[['STOCK_-
1','STOCK_t0','STOCK_t1','STOCK_t2','STOCK_t3','STOCK_t4','STOCK_t5']],
axis=1)
Groupby
full_wine.groupby('Product_Group_Details')['Units_sold'].sum()
Sort_values
DF FULL calculations
Merge dataframes
# merge dataFrames
combo=submission.merge(non_deriv_trans,on='ACCESSION_NUMBER',
how='inner')
combo=combo.merge(owner_agg,on='ACCESSION_NUMBER', how='inner')
Pivot Table
piv_table_buys=pd.pivot_table(combo_buys,index=['TRANS_DATE','ISSUERTRA
DINGSYMBOL'],aggfunc={'TRANS_SHARES':np.sum,'value_transacted':np.sum})
join df
piv_table=pd.concat([piv_table_sells,piv_table_buys], axis=0)
iterarrows:
trans_date=row['TRANS_DATE']
symbol= row['ISSUERTRADINGSYMBOL']
try:
value=data_neg_1.loc[trans_date,symbol]
piv_table.loc[idx,"STOCK_-1"]=value
except:
piv_table.loc[idx,"STOCK_-1"]=np.nan
stock_prices_returns= pd.melt(piv_combined_normalized_prices,
id_vars=['TRANS_DATE', 'ISSUERTRADINGSYMBOL', 'TRANS_SHARES',
Index calculations
#We will now make a new index that we can later reference
data_control=data
ref=-1
ref=ref+1
data_control.loc[idx,"ref_num"]=ref
#we will pull out the dates as our index and replace them with our
ref_num
data_control['Date']=data_control.index
data_control= data_control.set_index('ref_num')
Using Apply:
def bucket(x):
if x > 500:
return 'Over 500'
elif x > 249:
return '250 to 499'
elif x > 199:
return '200 to 249'
elif x > 149:
return '150 to 199'
elif x > 99:
return '100 to 149'
elif x > 74:
return '75-99'
else:
return 'less than 75'
bucket_wine['price_bucket']= bucket_wine['Actual_Price'].apply(bucket)
#using the match reference (as recieved from fuzzy match) to pull out
the correct dictionary and put it into best match
vivino_data['best_match_details']=vivino_data.apply(lambda row:
row['wine_matches'][row['match_reference']], axis=1)
# then use search in the dictionary for the correct field and make that
into a column
vivino_data['vivino_name'] = vivino_data['best_match_details'].ap-
ply(lambda x: x['name'])
vivino_data['vivino_link'] = vivino_data['best_match_details'].ap-
ply(lambda x: x['link'])
vivino_data['vivino_country'] = vivino_data['best_match_details'].ap-
ply(lambda x: x['country'])
vivino_data['vivino_region'] = vivino_data['best_match_details'].ap-
ply(lambda x: x['region'])
vivino_data['vivino_average_rating'] = vivino_data['best_match_de-
tails'].apply(lambda x: x['average_rating'])
vivino_data['vivino_price'] = vivino_data['best_match_details'].ap-
ply(lambda x: x['price'])
DateTime
# Putting date to datetime format
combo['TRANS_DATE']=pd.to_datetime(combo['TRANS_DATE'],format="%d-%b-
%Y")
data=data.tz_convert(None)
shifting
#getting stock prices in the day before and the 5 days after
data_neg_1=data.shift(periods=1)
YFinance
#pull in stock data from what was Yahoo Finance
Statistics
stats.mannwhitneyu(Experiment_returns, Control_returns,
alternative='less')
Random numbers
#we will now go back to pivot_table_control to give some random dates
to our transactions
#we will now assign a random time variable within our reference range
of dates from 150-216 correponding to ref and the period sampled.
random.seed(42)
piv_table_control.loc[idx,'random_date_index']=int(a)
#as running it took forever, i will instead have to transform the data
unfortunately using a different package....
vivino_data['wine_matches']=vivino_data['wine_matches'].apply(lambda x:
ast.literal_eval(x))
command = [
"node",
"C:/Users/maria/OneDrive/Documents_old/CodeOpDocs/Milestones/Group_Proj
ect/Vivino_api/vivino-api/vivino.js",
f"--name='{wine_name}'"
]
try:
result = subprocess.run(command, capture_output=True,
text=True, check=True)
print("Node.js script output:", result.stdout)
except subprocess.CalledProcessError:
data = np.nan
return data
FUZZ Data
def find_best_match(row):
dataf_choices = pd.DataFrame(row['wine_matches'])
the_name = row['full_name']
if 'name' in dataf_choices.columns:
match = process.extractOne(the_name, dataf_choices['name'],
scorer=fuzz.token_set_ratio)
return match
else:
return np.nan
API
REST API, SOAP, RPC, U
print(unique_values_df)column_to_translate = list(unique_values_df["Bottle
Type"].unique()
# Your DeepL API key (replace with your own API key)
DEEPL_API_KEY = '***'
# Replace with your actual DeepL API
keydl_translator = Translator(DEEPL_API_KEY)
#put unique values in a dictionary as keys
translation_dict = {}
for type_name in column_to_translate:
if type_name.strip() != "": # Skip if the value is empty or just whitespace
try:
# Translate the text and extract the actual translated text from the TextResult
object
translated_result = dl_translator.translate_text(type_name, target_lang="EN-
US").text
if hasattr(translated_result, 'text'):
translation_dict[type_name] = translated_result # If it's a TextResult object
else:
translation_dict[type_name] = translated_result # If it's already a string
except deepl.DeepLException as e:
print(f"Error translating value '{type_name}': {e}")
translation_dict[type_name] = type_name
else:
# If the type_name is empty, keep it empty in the dictionary
translation_dict[type_name] = type_name# Display the translation dictionary
print("Translation dictionary:")
print(translation_dict)
Unit Tests- with a coverage of 98%
Can also do unit tests in PyTest (pip install pytest)
In the repository
Src-
Tests-
Data
examples