0% found this document useful (0 votes)
139 views

Statsmodel Python Example

This document contains code examples using the statsmodels library in Python to perform statistical analysis and linear regression on various datasets. It includes importing datasets, fitting linear regression models, predicting values, and calculating ANOVA results.

Uploaded by

Karlo
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
139 views

Statsmodel Python Example

This document contains code examples using the statsmodels library in Python to perform statistical analysis and linear regression on various datasets. It includes importing datasets, fitting linear regression models, predicting values, and calculating ANOVA results.

Uploaded by

Karlo
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import statsmodels.

api as sm
import numpy as np
mtcars = sm.datasets.get_rdataset('mtcars')
mtcars_data = mtcars.data
liner_model = sm.formula.ols('np.log(wt) ~ np.log(mpg)',mtcars_data)
liner_result = liner_model.fit()
print(liner_result.rsquared)

import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd

df = sm.datasets.get_rdataset("iris").data
df.info()
df.Species.unique()
iris_subset = iris[(iris.Species == "versicolor") | (iris.Species ==
"virginica")].copy()

print(iris_subset.Species.unique())
df_subset = df[(df.Species == "versicolor") | (df.Species == "virginica" )].copy()
df_subset.Species = df_subset.Species.map({"versicolor": 1, "virginica": 0})

df_subset.rename(columns={"Sepal.Length": "Sepal_Length", "Sepal.Width":


"Sepal_Width", "Petal.Length": "Petal_Length", "Petal.Width": "Petal_Width"},
inplace=True)

model = smf.logit("Species ~ Petal_Length + Petal_Width", data=df_subset)


result = model.fit()

df_new = pd.DataFrame({"Petal_Length": np.random.randn(20)*0.5 + 5,


"Petal_Width": np.random.randn(20)*0.5 + 1.7})
df_new["P-Species"] = result.predict(df_new)
df_new["P-Species"].head(3)

df_new["Species"] = (df_new["P-Species"] > 0.5).astype(int)


df_new.head()

import statsmodels.api as sa
import numpy as np
import statsmodels.formula.api as sfa
biopsy = sa.datasets.get_rdataset("biopsy","MASS")
biopsy_data = biopsy.data
biopsy_data.rename(columns={"class":"Class"},inplace=True)
biopsy_data.Class = biopsy_data.Class.map({"benign":0,"malignant":1})
biopsy_data["V1"] = np.divide(biopsy_data["V1"] - biopsy_data["V1"].min(),
biopsy_data["V1"].max() - biopsy_data["V1"].min())
log_mod1 = sfa.logit("V1~Class",biopsy_data)
log_res1 = log_mod1.fit()
print(log_res1.summary())

import statsmodels.formula.api as smf

poisson_model = smf.poisson('num_awards ~ math + C(prog)', awards_df)


poisson_model_result = poisson_model.fit()

import statsmodels.api as sm
import statsmodels.formula.api as smf
import pandas as pd
import numpy as np
df_insurance=sm.datasets.get_rdataset("Insurance","MASS")
df_data=df_insurance.data
insurance_model=smf.poisson('Claims ~ np.log(Holders)', df_data).fit()
print(np.cumsum(insurance_model.resid))

#Write your code here


import statsmodels.api as sm
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.stats import anova

mtcars = sm.datasets.get_rdataset("mtcars", "datasets", cache=True).data


df = pd.DataFrame(mtcars)
model = smf.ols(formula='mpg~wt',data=mtcars).fit()
#print(anova.anova_lm(model))
print(anova.anova_lm(model).F["wt"])

#Write your code here


import statsmodels.api as sm
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.stats import anova

mtcars = sm.datasets.get_rdataset("mtcars", "datasets", cache=True).data


df = pd.DataFrame(mtcars)
model = smf.ols(formula='np.log(mpg) ~ np.log(wt)', data=mtcars).fit()
#print(anova.anova_lm(model))
print(anova.anova_lm(model).F["np.log(wt)"])

You might also like