project code
project code
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets
preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved
outside of the current session
/kaggle/input/computer-virus-predictor/sample_submission.csv
/kaggle/input/computer-virus-predictor/train.csv
/kaggle/input/computer-virus-predictor/test.csv
train_df = pd.read_csv('/kaggle/input/computer-virus-predictor/train.csv')
train_1_df = train_df.drop(columns = ['MachineID','ProductName','EngineVersion',
'AppVersion','SignatureVersion', 'IsBetaUser', 'PlatformType','Processor',
'OSVersion','OsPlatformSubRelease','OSBuildLab',
'SKUEditionName','MDC2FormFactor','DeviceFamily','PrimaryDiskType','ChassisType','P
owerPlatformRole','NumericOSVersion','OSArchitecture','OSBranch','OSEdition','OSSku
FriendlyName','OSInstallType','AutoUpdateOptionsName','OSGenuineState','LicenseActi
vationChannel','FlightRing','DateAS','DateOS'])
train_1_df["id"] = train_1_df.index
train_1_df.head()
train_1_df.tail()
train_1_df.dtypes
train_1_df = train_1_df[train_1_df['RealTimeProtectionState'].notna()]
train_1_df = train_1_df[train_1_df['AntivirusConfigID'].notna()]
train_1_df = train_1_df[train_1_df['CityID'].notna()]
train_1_df = train_1_df[train_1_df['SMode'].notna()]
train_1_df = train_1_df[train_1_df['IEVersionID'].notna()]
train_1_df = train_1_df[train_1_df['ProcessorCoreCount'].notna()]
train_1_df = train_1_df[train_1_df['PrimaryDisplayDiagonalInches'].notna()]
train_1_df = train_1_df[train_1_df['FirewallEnabled'].notna()]
train_1_df = train_1_df[train_1_df['TotalPhysicalRAMMB'].notna()]
train_1_df = train_1_df[train_1_df['EnableLUA'].notna()]
train_1_df = train_1_df[train_1_df['OEMModelID'].notna()]
train_1_df = train_1_df[train_1_df['InternalBatteryNumberOfCharges'].notna()]
train_1_df = train_1_df[train_1_df['IsGamer'].notna()]
train_1_df = train_1_df[train_1_df['OSInstallLanguageID'].notna()]
train_1_df = train_1_df[train_1_df['IsFlightsDisabled'].notna()]
train_1_df = train_1_df[train_1_df['FirmwareManufacturerID'].notna()]
train_1_df = train_1_df[train_1_df['IsVirtualDevice'].notna()]
test_df = pd.read_csv('/kaggle/input/computer-virus-predictor/test.csv')
x_test = test_df.drop(columns = ['MachineID','ProductName','EngineVersion',
'AppVersion','SignatureVersion', 'IsBetaUser', 'PlatformType','Processor',
'OSVersion','OsPlatformSubRelease','OSBuildLab',
'SKUEditionName','MDC2FormFactor','DeviceFamily','PrimaryDiskType','ChassisType','P
owerPlatformRole','NumericOSVersion','OSArchitecture','OSBranch','OSEdition','OSSku
FriendlyName','OSInstallType','AutoUpdateOptionsName','OSGenuineState','LicenseActi
vationChannel','FlightRing','DateAS','DateOS'])
x_test["id"] = x_test.index
x_test = x_test[x_test['RealTimeProtectionState'].notna()]
x_test = x_test[x_test['AntivirusConfigID'].notna()]
x_test = x_test[x_test['CityID'].notna()]
x_test = x_test[x_test['SMode'].notna()]
x_test = x_test[x_test['IEVersionID'].notna()]
x_test = x_test[x_test['ProcessorCoreCount'].notna()]
x_test = x_test[x_test['PrimaryDisplayDiagonalInches'].notna()]
x_test = x_test[x_test['FirewallEnabled'].notna()]
x_test = x_test[x_test['TotalPhysicalRAMMB'].notna()]
x_test = x_test[x_test['EnableLUA'].notna()]
x_test = x_test[x_test['OEMModelID'].notna()]
x_test = x_test[x_test['InternalBatteryNumberOfCharges'].notna()]
x_test = x_test[x_test['IsGamer'].notna()]
x_test = x_test[x_test['OSInstallLanguageID'].notna()]
x_test = x_test[x_test['IsFlightsDisabled'].notna()]
x_test = x_test[x_test['FirmwareManufacturerID'].notna()]
x_test = x_test[x_test['IsVirtualDevice'].notna()]
prediction = model.predict(x_test)