Normialization Dataset
Normialization Dataset
import numpy as np
x_array = np.array([1000,50,71,5006,80])
normalized_arr = preprocessing.normalize([x_array])
print(normalized_arr)
import numpy as np
import pandas as pd
from sklearn import preprocessing
x_array = np.array(california_housing['housing_median_age'])
print("HouseAge array: ",x_array)
d = preprocessing.normalize(california_housing)
scaled_df = pd.DataFrame(d, columns=california_housing.columns)
print(scaled_df)
----------------------------------------------------------------------
-----
ValueError Traceback (most recent call
last)
Cell In[8], line 1
----> 1 d = preprocessing.normalize(california_housing)
2 scaled_df = pd.DataFrame(d,
columns=california_housing.columns)
3 print(scaled_df)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
sklearn\preprocessing\_data.py:1817, in normalize(X, norm, axis, copy,
return_norm)
1814 else:
1815 raise ValueError("'%d' is not a supported axis" % axis)
-> 1817 X = check_array(
1818 X,
1819 accept_sparse=sparse_format,
1820 copy=copy,
1821 estimator="the normalize function",
1822 dtype=FLOAT_DTYPES,
1823 )
1824 if axis == 0:
1825 X = X.T
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
sklearn\utils\validation.py:877, in check_array(array, accept_sparse,
accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d,
allow_nd, ensure_min_samples, ensure_min_features, estimator,
input_name)
875 array = xp.astype(array, dtype, copy=False)
876 else:
--> 877 array = _asarray_with_order(array, order=order,
dtype=dtype, xp=xp)
878 except ComplexWarning as complex_warning:
879 raise ValueError(
880 "Complex data not supported\n{}\n".format(array)
881 ) from complex_warning
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
sklearn\utils\_array_api.py:185, in _asarray_with_order(array, dtype,
order, copy, xp)
182 xp, _ = get_namespace(array)
183 if xp.__name__ in {"numpy", "numpy.array_api"}:
184 # Use NumPy API to support order
--> 185 array = numpy.asarray(array, order=order, dtype=dtype)
186 return xp.asarray(array, copy=copy)
187 else:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
pandas\core\generic.py:2070, in NDFrame.__array__(self, dtype)
2069 def __array__(self, dtype: npt.DTypeLike | None = None) ->
np.ndarray:
-> 2070 return np.asarray(self._values, dtype=dtype)
data = california_housing.iloc[:,:-1]
data.head()
scaler = preprocessing.MinMaxScaler()
d = scaler.fit_transform(data)
scaled_df = pd.DataFrame(d, columns=data.columns)
print(scaled_df)
longitude latitude housing_median_age total_rooms
total_bedrooms \
0 0.211155 0.567481 0.784314 0.022331
0.019863
1 0.212151 0.565356 0.392157 0.180503
0.171477
2 0.210159 0.564293 1.000000 0.037260
0.029330
3 0.209163 0.564293 1.000000 0.032352
0.036313
4 0.209163 0.564293 1.000000 0.041330
0.043296
... ... ... ... ...
...
20635 0.324701 0.737513 0.470588 0.042296
0.057883
20636 0.312749 0.738576 0.333333 0.017676
0.023122
20637 0.311753 0.732200 0.313725 0.057277
0.075109
20638 0.301793 0.732200 0.333333 0.047256
0.063315
20639 0.309761 0.725824 0.294118 0.070782
0.095438
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 longitude 20640 non-null float64
1 latitude 20640 non-null float64
2 housing_median_age 20640 non-null float64
3 total_rooms 20640 non-null float64
4 total_bedrooms 20433 non-null float64
5 population 20640 non-null float64
6 households 20640 non-null float64
7 median_income 20640 non-null float64
8 median_house_value 20640 non-null float64
dtypes: float64(9)
memory usage: 1.4 MB
california_housing.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 longitude 20640 non-null float64
1 latitude 20640 non-null float64
2 housing_median_age 20640 non-null float64
3 total_rooms 20640 non-null float64
4 total_bedrooms 20433 non-null float64
5 population 20640 non-null float64
6 households 20640 non-null float64
7 median_income 20640 non-null float64
8 median_house_value 20640 non-null float64
9 ocean_proximity 20640 non-null object
dtypes: float64(9), object(1)
memory usage: 1.6+ MB
california_housing.describe()
median_house_value
count 20640.000000
mean 206855.816909
std 115395.615874
min 14999.000000
25% 119600.000000
50% 179700.000000
75% 264725.000000
max 500001.000000
california_housing.isna().sum1
ocean_proximity
0 False
1 False
2 False
3 False
4 False
... ...
20635 False
20636 False
20637 False
20638 False
20639 False