ip project
ip project
SILIGURI
Informatics Practices
SESSION: 2024-2025
TEXT
Name,Class,Marks,Age
Alice,10,85,14
Bob,10,92,15
Charlie,10,76,14
David,11,89,16
Eva,11,95,15
Part 1: Reading from an Existing CSV File
• Display the first few rows of a CSV file
CODE:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
print(df.head())
OUTPUTS:
import pandas as pd
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
# Filtering data where Marks > 80
filtered = df[df['Marks'] > 80]
print(filtered)
OUTPUTS:
• Calculate the sum of Marks for each Class
CODE:
import pandas as pd
df=pd.read_csv(r'C:\Users\ankit\Downoads\data.csv')
class_sum = df.groupby('Class')['Marks'].sum()
print(class_sum)
OUTPUTS:
CODE:
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
class_sum = df.groupby('Class')['Marks'].sum()
class_sum.plot(kind='bar', title='Class-wise Total Marks')
plt.ylabel('Total Marks')
plt.show()
OUTPUT:
• Sort students by Marks in descending order
CODE
import pandas as pd
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
sorted_df = df.sort_values('Marks', ascending=False)
print(sorted_df)
OUTPUT
• Histogram of Marks
CODE
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
df['Marks'].plot(kind='hist', bins=5, title='Marks Distribution', edgecolor='black')
plt.xlabel('Marks')
plt.show()
OUTPUT
• Pie chart for the proportion of Marks by Student
CODE
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
df.set_index('Name')['Marks'].plot(kind='pie', autopct='%1.1f%%', title='Marks Distribution
by Student')
plt.ylabel('')
plt.show()
OUTPUT
CODE
import pandas as pd
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
mean_marks = df['Marks'].mean()
median_marks = df['Marks'].median()
std_dev = df['Marks'].std()
print(f"Mean Marks: {mean_marks}, Median Marks: {median_marks}, Std Dev: {std_dev}")
OUTPUT
• Scatter plot of Age vs Marks
CODE
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
df.plot(x='Age', y='Marks', kind='scatter', title='Age vs Marks')
plt.show()
OUTPUT
CODE
import pandas as pd
import numpy as np
df = pd.read_csv(r'C:\Users\ankit\Downloads\data.csv')
df['Marks_squared'] = np.power(df['Marks'], 2) # Square of Marks
df['Marks_log'] = np.log(df['Marks']) # Logarithm of Marks
print(df[['Name', 'Marks', 'Marks_squared', 'Marks_log']])
OUTPUT
Part 2: Creating and Manipulating CSV Files
from DataFrames
• Create a DataFrame and save as CSV AND Read it
CODE
import pandas as pd
data = {'Name': ['John', 'Mike', 'Anna'], 'Marks': [78, 85, 92], 'Class': [10, 10, 11]}
df = pd.DataFrame(data)
df.to_csv(r'C:\Users\ankit\Downloads\created1.csv', index=False)
print(df)
OUTPUT
OUTPUT
merged_df.to_csv(r'C:\Users\ankit\Downloads\merged.csv', index=False)
print(merged_df)
OUTPUT