0% found this document useful (0 votes)
166 views2 pages

MLR PDF

The document discusses machine learning workflows in R, including: 1) Preprocessing data by creating dummy features, normalizing features, and merging small factor levels. 2) Setting up training and testing by making classification, regression, and other task types from data, and creating resampling instances like cross validation folds. 3) Refining model performance by tuning hyperparameters, training models, predicting on test data, and calculating evaluation metrics.

Uploaded by

ayrusurya
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
166 views2 pages

MLR PDF

The document discusses machine learning workflows in R, including: 1) Preprocessing data by creating dummy features, normalizing features, and merging small factor levels. 2) Setting up training and testing by making classification, regression, and other task types from data, and creating resampling instances like cross validation folds. 3) Refining model performance by tuning hyperparameters, training models, predicting on test data, and calculating evaluation metrics.

Uploaded by

ayrusurya
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

Setup Training & Testing Refining Performance

createDummyFeatures(obj=,target=,method=,cols=) setHyperPars(learner=,...) makeParamSet(make<type>Param())


• makeNumericParam(id=,lower=,upper=,trafo=)
target cols • makeIntegerParam(id=,lower=,upper=,trafo=)
• makeIntegerVectorParam(id=,len=,lower=,upper=,
normalizeFeatures(obj=,target=,method=,cols=, makeLearner() trafo=)
range=,on.constant=) • makeDiscreteParam(id=,values=c(...))

Introduction • "center"
method getParamSet(learner=)
trafo
• "scale" lower=-2,upper=2,trafo=function(x) 10^x
• "standardize" "classif.qda"
• "range" range=c(0,1) Logical
LogicalVector CharacterVector DiscreteVector
mergeSmallFactorLevels(task=,cols=,min.perc=)
train(learner=,task=) makeTuneControl<type>()
WrappedModel • Grid(resolution=10L)
summarizeColumns(obj=) obj • Random(maxit=100)
• MBO(budget=)
• Irace(n.instances=)
capLargeValues dropFeatures getLearnerModel() • CMAES Design GenSA
removeConstantFeatures summarizeLevels
predict(object=,task=,newdata=) tuneParams(learner=,task=,resampling=,
measures=,par.set=,control=)
pred
View(pred)
makeClassifTask(data=,target=)
as.data.frame(pred)
A B C
positive Quickstart
makeRegrTask(data=,target=)
0 63 100 performance(pred=,measures=)

listMeasures() library(mlbench)
makeMultilabelTask(data=,target=)
• acc auc bac ber brier[.scaled] f1 fdr fn data(Soybean)
A B C fnr fp fpr gmean multiclass[.au1u .aunp .aunu soy = createDummyFeatures(Soybean,target="Class")
.brier] npv ppv qsr ssr tn tnr tp tpr wkappa tsk = makeClassifTask(data=soy,target="Class")
makeClusterTask(data=) • arsq expvar kendalltau mae mape medae ho = makeResampleInstance("Holdout",tsk)
medse mse msle rae rmse rmsle rrse rsq sae tsk.train = subsetTask(tsk,ho$train.inds[[1]])
spearmanrho sse tsk.test = subsetTask(tsk,ho$test.inds[[1]])
makeSurvTask(data=,target= • db dunn G1 G2 silhouette
c("time","event")) • multilabel[.f1 .subset01 .tpr .ppv
.acc .hamloss]
• mcp meancosts
• cindex
makeCostSensTask(data=,costs=) • featperc timeboth timepredict timetrain
A B lrn = makeLearner("classif.xgboost",nrounds=10)
cv = makeResampleDesc("CV",iters=5)
• calculateConfusionMatrix(pred=) res = resample(lrn,tsk.train,cv,acc)
task • calculateROCMeasures(pred=)
• weights=
• blocking=

makeResampleDesc(method=,...,stratify=)
method ps = makeParamSet(makeNumericParam("eta",0,1),
• "CV" iters= makeNumericParam("lambda",0,200),
makeLearner(cl=,predict.type=,...,par.vals=) • "LOO" iters= makeIntegerParam("max_depth",1,20))
• "RepCV" tc = makeTuneControlMBO(budget=100)
reps= folds= tr = tuneParams(lrn,tsk.train,cv5,acc,ps,tc)
• cl= "classif.xgboost" • "Subsample" lrn = setHyperPars(lrn,par.vals=tr$x)
"regr.randomForest" "cluster.kmeans" iters= split= eta lambda max_depth
• predict.type="response" • "Bootstrap" iters=
"prob" • "Holdout" split=
"se" stratify

"prob" "se" makeResampleInstance(desc=,task=)


• par.vals= mdl = train(lrn,tsk.train)
... prd = predict(mdl,tsk.test)
makeLearners() resample(learner=,task=,resampling=,measures=) calculateConfusionMatrix(prd)
mdl = train(lrn,tsk)

• View(listLearners()) cv2
• View(listLearners(task)) cv3 cv5 cv10 hout
• View(listLearners("classif",
properties=c("prob", "factors"))) resample()
"classif" crossval() repcv() holdout() subsample()
"prob" "factors" bootstrapOOB() bootstrapB632() bootstrapB632plus()
• getLearnerProperties()

function(required_parameters=,optional_parameters=)
Configuration Feature Extraction Visualization Wrappers
configureMlr()
• show.info Wrapper 3, etc.
TRUE filterFeatures(task=,method=, generateThreshVsPerfData(obj=,measures=)
• on.learner.error "stop" perc=,abs=,threshold=) Wrapper 2
"warn" Wrapper 1
"quiet" "stop" • plotThreshVsPerf(obj)
• on.learner.warning Learner
"warn" "quiet" "warn" perc= abs= ThreshVsPerfData
• on.par.without.desc threshold= • plotROCCurves(obj)
"stop" "warn" "quiet" "stop"
• on.par.out.of.bounds method ThreshVsPerfData
"stop" "warn" "quiet" "stop" "randomForestSRC.rfsrc" measures=list(fpr,tpr) makeDummyFeaturesWrapper(learner=)
• on.measure.not.applicable "anova.test" "carscore" "cforest.importance" makeImputeWrapper(learner=,classes=,cols=)
"stop" "warn" "quiet" "stop" "chi.squared" "gain.ratio" "information.gain" makePreprocWrapper(learner=,train=,predict=)
• show.learner.output "kruskal.test" "linear.correlation" "mrmr" "oneR" makePreprocWrapperCaret(learner=,...)
TRUE "permutation.importance" "randomForest.importance" • plotResiduals(obj=) makeRemoveConstantFeaturesWrapper(learner=)
• on.error.dump "randomForestSRC.rfsrc" "randomForestSRC.var.select" Prediction BenchmarkResult
on.learner.error "stop" TRUE "rank.correlation" "relief"
"symmetrical.uncertainty" "univariate.model.score"
getMlrOptions() "variance" makeOverBaggingWrapper(learner=)
generateLearningCurveData(learners=,task=, makeSMOTEWrapper(learner=)
resampling=,percs=,measures=) makeUndersampleWrapper(learner=)
makeWeightedClassesWrapper(learner=)

Parallelization selectFeatures(learner=,task=
resampling=,measures=,control=)
• plotLearningCurve(obj=)

LearningCurveData
control makeCostSensClassifWrapper(learner=)
parallelMap
makeCostSensRegrWrapper(learner=)
makeCostSensWeightedPairsWrapper(learner=)
generateFilterValuesData(task=,method=)
• makeFeatSelControlExhaustive(max.features=)
parallelStart(mode=,cpus=,level=) max.features • plotFilterValues(obj=)
• makeMultilabelBinaryRelevanceWrapper(learner=)
• mode makeFeatSelControlRandom(maxit=,prob=,
makeMultilabelClassifierChainsWrapper(learner=)
• "local" mapply max.features=) FilterValuesData
makeMultilabelDBRWrapper(learner=)
• "multicore" prob maxit
makeMultilabelNestedStackingWrapper(learner=)
parallel::mclapply
• makeMultilabelStackingWrapper(learner=)
• "socket" makeFeatSelControlSequential(method=,maxit=,
• "mpi" max.features=,alpha=,beta=) generateHyperParsEffectData(tune.result=)
parallel::makeCluster parallel::clusterMap method "sfs"
• "BatchJobs" "sbs" "sffs"
"sfbs" alpha • plotHyperParsEffect(hyperpars.effec makeBaggingWrapper(learner=)
BatchJobs::batchMap
makeConstantClassWrapper(learner=)
• cpus beta t.data=,x=,y=,z=)
makeDownsampleWrapper(learner=,dw.perc=)
• level "mlr.benchmark"
• makeFeatSelControlGA(maxit=,max.features=,mu=, HyperParsEffectData makeFeatSelWrapper(learner=,resampling=,control=)
"mlr.resample" "mlr.selectFeatures"
lambda=,crossover.rate=,mutation.rate=) makeFilterWrapper(learner=,fw.perc=,fw.abs=,
"mlr.tuneParams" "mlr.ensemble"
• plotOptPath(op=) fw.threshold=)
<obj>$opt.path <obj> makeMultiClassWrapper(learner=)
parallelStop()
mu tuneResult featSelResult makeTuneWrapper(learner=,resampling=,par.set=,
lambda crossover.rate • plotTuneMultiCritResult(res=) control=)

Imputation mutation.rate

impute(obj=,target=,cols=,dummy.cols=,dummy.type=)
selectFeatures FeatSelResult
generatePartialDependenceData(obj=,input=) Nested Resampling
fsr tsk obj
tsk = subsetTask(tsk,features=fsr$x) input
• obj= • plotPartialDependence(obj=)
• target=
• cols= PartialDependenceData
• dummy.cols=
• dummy.type=
classes
"numeric"
dummy.classes cols
Benchmarking • resample benchmark
• makeTuneWrapper
benchmark(learners=,tasks=,resamplings=,measures=) • plotBMRBoxplots(bmr=) makeFeatSelWrapper
cols classes • plotBMRSummary(bmr=)
cols=list(V1=imputeMean()) V1 • plotBMRRanksAsBarChart(bmr=)
imputeMean()

imputeConst(const=) imputeMedian() imputeMode()


Ensembles
imputeMin(multiplier=) imputeMax(multiplier=) getBMR<object> AggrPerformance • generateCritDifferencesData(bmr=,
imputeNormal(mean=,sd=) B makeStackedLearner(base.learners=,super.learner=,
FeatSelResults FilteredFeatures LearnerIds measure=,p.value=,test=)
imputeHist(breaks=,use.mids=) method=)
LeanerShortNames Learners MeasureIds Measures A C
imputeLearner(learner=,features=) • base.learners=
Models Performances Predictions TaskDescs TaskIds "bd" "Nemenyi"
impute 0 1 2 3 • super.learner=
TuneResults • plotCritDifferences(obj=)
• method=
reimpute • "average"
• generateCalibrationData(obj=)
• "stack.nocv" "stack.cv"
agri.task bc.task bh.task costiris.task iris.task
reimpute(obj=,desc=) lung.task mtcars.task pid.task sonar.task
obj desc impute • "hill.climb"
wpbc.task yeast.task • plotCalibration(obj=)
• "compress"

You might also like