0% found this document useful (0 votes)
37 views7 pages

Normialization Dataset

NormializationDataset in Deep Learning

Uploaded by

mohitdubey42551
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
37 views7 pages

Normialization Dataset

NormializationDataset in Deep Learning

Uploaded by

mohitdubey42551
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

from sklearn import preprocessing

import numpy as np

x_array = np.array([1000,50,71,5006,80])
normalized_arr = preprocessing.normalize([x_array])
print(normalized_arr)

[[0.19583773 0.00979189 0.01390448 0.98036366 0.01566702]]

import numpy as np
import pandas as pd
from sklearn import preprocessing

# create the DataFrame


california_housing = pd.read_csv("housing.csv")

# print the dataset description


#print(california_housing.DESCR)
california_housing.head()

longitude latitude housing_median_age total_rooms


total_bedrooms \
0 -122.23 37.88 41.0 880.0
129.0
1 -122.22 37.86 21.0 7099.0
1106.0
2 -122.24 37.85 52.0 1467.0
190.0
3 -122.25 37.85 52.0 1274.0
235.0
4 -122.25 37.85 52.0 1627.0
280.0

population households median_income median_house_value


ocean_proximity
0 322.0 126.0 8.3252 452600.0
NEAR BAY
1 2401.0 1138.0 8.3014 358500.0
NEAR BAY
2 496.0 177.0 7.2574 352100.0
NEAR BAY
3 558.0 219.0 5.6431 341300.0
NEAR BAY
4 565.0 259.0 3.8462 342200.0
NEAR BAY

x_array = np.array(california_housing['housing_median_age'])
print("HouseAge array: ",x_array)

HouseAge array: [41. 21. 52. ... 17. 18. 16.]


normalized_arr = preprocessing.normalize([x_array])
print("Normalized HouseAge array: ",normalized_arr)

Normalized HouseAge array: [[0.00912272 0.00467261 0.01157028 ...


0.00378259 0.0040051 0.00356009]]

d = preprocessing.normalize(california_housing)
scaled_df = pd.DataFrame(d, columns=california_housing.columns)
print(scaled_df)

----------------------------------------------------------------------
-----
ValueError Traceback (most recent call
last)
Cell In[8], line 1
----> 1 d = preprocessing.normalize(california_housing)
2 scaled_df = pd.DataFrame(d,
columns=california_housing.columns)
3 print(scaled_df)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
sklearn\preprocessing\_data.py:1817, in normalize(X, norm, axis, copy,
return_norm)
1814 else:
1815 raise ValueError("'%d' is not a supported axis" % axis)
-> 1817 X = check_array(
1818 X,
1819 accept_sparse=sparse_format,
1820 copy=copy,
1821 estimator="the normalize function",
1822 dtype=FLOAT_DTYPES,
1823 )
1824 if axis == 0:
1825 X = X.T

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
sklearn\utils\validation.py:877, in check_array(array, accept_sparse,
accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d,
allow_nd, ensure_min_samples, ensure_min_features, estimator,
input_name)
875 array = xp.astype(array, dtype, copy=False)
876 else:
--> 877 array = _asarray_with_order(array, order=order,
dtype=dtype, xp=xp)
878 except ComplexWarning as complex_warning:
879 raise ValueError(
880 "Complex data not supported\n{}\n".format(array)
881 ) from complex_warning

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
sklearn\utils\_array_api.py:185, in _asarray_with_order(array, dtype,
order, copy, xp)
182 xp, _ = get_namespace(array)
183 if xp.__name__ in {"numpy", "numpy.array_api"}:
184 # Use NumPy API to support order
--> 185 array = numpy.asarray(array, order=order, dtype=dtype)
186 return xp.asarray(array, copy=copy)
187 else:

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\
pandas\core\generic.py:2070, in NDFrame.__array__(self, dtype)
2069 def __array__(self, dtype: npt.DTypeLike | None = None) ->
np.ndarray:
-> 2070 return np.asarray(self._values, dtype=dtype)

ValueError: could not convert string to float: 'NEAR BAY'

from sklearn import preprocessing


import pandas as pd

data = california_housing.iloc[:,:-1]

data.head()

longitude latitude housing_median_age total_rooms


total_bedrooms \
0 -122.23 37.88 41.0 880.0
129.0
1 -122.22 37.86 21.0 7099.0
1106.0
2 -122.24 37.85 52.0 1467.0
190.0
3 -122.25 37.85 52.0 1274.0
235.0
4 -122.25 37.85 52.0 1627.0
280.0

population households median_income median_house_value


0 322.0 126.0 8.3252 452600.0
1 2401.0 1138.0 8.3014 358500.0
2 496.0 177.0 7.2574 352100.0
3 558.0 219.0 5.6431 341300.0
4 565.0 259.0 3.8462 342200.0

scaler = preprocessing.MinMaxScaler()
d = scaler.fit_transform(data)
scaled_df = pd.DataFrame(d, columns=data.columns)
print(scaled_df)
longitude latitude housing_median_age total_rooms
total_bedrooms \
0 0.211155 0.567481 0.784314 0.022331
0.019863
1 0.212151 0.565356 0.392157 0.180503
0.171477
2 0.210159 0.564293 1.000000 0.037260
0.029330
3 0.209163 0.564293 1.000000 0.032352
0.036313
4 0.209163 0.564293 1.000000 0.041330
0.043296
... ... ... ... ...
...
20635 0.324701 0.737513 0.470588 0.042296
0.057883
20636 0.312749 0.738576 0.333333 0.017676
0.023122
20637 0.311753 0.732200 0.313725 0.057277
0.075109
20638 0.301793 0.732200 0.333333 0.047256
0.063315
20639 0.309761 0.725824 0.294118 0.070782
0.095438

population households median_income median_house_value


0 0.008941 0.020556 0.539668 0.902266
1 0.067210 0.186976 0.538027 0.708247
2 0.013818 0.028943 0.466028 0.695051
3 0.015555 0.035849 0.354699 0.672783
4 0.015752 0.042427 0.230776 0.674638
... ... ... ... ...
20635 0.023599 0.054103 0.073130 0.130105
20636 0.009894 0.018582 0.141853 0.128043
20637 0.028140 0.071041 0.082764 0.159383
20638 0.020684 0.057227 0.094295 0.143713
20639 0.038790 0.086992 0.130253 0.153403

[20640 rows x 9 columns]

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 longitude 20640 non-null float64
1 latitude 20640 non-null float64
2 housing_median_age 20640 non-null float64
3 total_rooms 20640 non-null float64
4 total_bedrooms 20433 non-null float64
5 population 20640 non-null float64
6 households 20640 non-null float64
7 median_income 20640 non-null float64
8 median_house_value 20640 non-null float64
dtypes: float64(9)
memory usage: 1.4 MB

california_housing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 longitude 20640 non-null float64
1 latitude 20640 non-null float64
2 housing_median_age 20640 non-null float64
3 total_rooms 20640 non-null float64
4 total_bedrooms 20433 non-null float64
5 population 20640 non-null float64
6 households 20640 non-null float64
7 median_income 20640 non-null float64
8 median_house_value 20640 non-null float64
9 ocean_proximity 20640 non-null object
dtypes: float64(9), object(1)
memory usage: 1.6+ MB

california_housing.describe()

longitude latitude housing_median_age total_rooms \


count 20640.000000 20640.000000 20640.000000 20640.000000
mean -119.569704 35.631861 28.639486 2635.763081
std 2.003532 2.135952 12.585558 2181.615252
min -124.350000 32.540000 1.000000 2.000000
25% -121.800000 33.930000 18.000000 1447.750000
50% -118.490000 34.260000 29.000000 2127.000000
75% -118.010000 37.710000 37.000000 3148.000000
max -114.310000 41.950000 52.000000 39320.000000

total_bedrooms population households median_income \


count 20433.000000 20640.000000 20640.000000 20640.000000
mean 537.870553 1425.476744 499.539680 3.870671
std 421.385070 1132.462122 382.329753 1.899822
min 1.000000 3.000000 1.000000 0.499900
25% 296.000000 787.000000 280.000000 2.563400
50% 435.000000 1166.000000 409.000000 3.534800
75% 647.000000 1725.000000 605.000000 4.743250
max 6445.000000 35682.000000 6082.000000 15.000100

median_house_value
count 20640.000000
mean 206855.816909
std 115395.615874
min 14999.000000
25% 119600.000000
50% 179700.000000
75% 264725.000000
max 500001.000000

california_housing.isna().sum1

longitude latitude housing_median_age total_rooms


total_bedrooms \
0 False False False False
False
1 False False False False
False
2 False False False False
False
3 False False False False
False
4 False False False False
False
... ... ... ... ...
...
20635 False False False False
False
20636 False False False False
False
20637 False False False False
False
20638 False False False False
False
20639 False False False False
False

population households median_income median_house_value \


0 False False False False
1 False False False False
2 False False False False
3 False False False False
4 False False False False
... ... ... ... ...
20635 False False False False
20636 False False False False
20637 False False False False
20638 False False False False
20639 False False False False

ocean_proximity
0 False
1 False
2 False
3 False
4 False
... ...
20635 False
20636 False
20637 False
20638 False
20639 False

[20640 rows x 10 columns]

You might also like