python_cheatsheet
python_cheatsheet
# Create a DataFrame
df.head()
df.dtypes
# Filter data
df[df['col1'] > 1]
df.groupby('col1').mean()
# Summarize data
df.describe()
df.fillna(value=0)
df.dropna()
# Remove duplicates
df.drop_duplicates()
#### Plotting:
# Histogram
df['col1'].hist()
# Boxplot
sns.boxplot(x='col1', data=df)
# Scatter plot
plt.scatter(df['col1'], df['col2'])
df['col1'].median()
df['col1'].mode()
# Standard Deviation
df['col1'].std()
# Correlation
df.corr()
stats.ttest_1samp(df['col1'], 0)
df['col1'].str.lower()
df['col1'].str.contains('substring')
# Replace a substring
df['col1'].replace('old', 'new')