-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathplot_pipeline.py
45 lines (38 loc) · 1.26 KB
/
plot_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# -*- coding: utf-8 -*-
"""
=====================
Building Pipelines
=====================
HiClass can be adopted in scikit-learn pipelines, and fully supports sparse matrices as input.
This example desmonstrates the use of both of these features.
"""
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from hiclass import LocalClassifierPerParentNode
# Define data
X_train = [
"Struggling to repay loan",
"Unable to get annual report",
]
X_test = [
"Unable to get annual report",
"Struggling to repay loan",
]
Y_train = [["Loan", "Student loan"], ["Credit reporting", "Reports"]]
# We will use logistic regression classifiers for every parent node
lr = LogisticRegression()
# Let's build a pipeline using CountVectorizer and TfidfTransformer
# to extract features as sparse matrices
pipeline = Pipeline(
[
("count", CountVectorizer()),
("tfidf", TfidfTransformer()),
("lcppn", LocalClassifierPerParentNode(local_classifier=lr)),
]
)
# Now, let's train a local classifier per parent node
pipeline.fit(X_train, Y_train)
# Finally, let's predict using the pipeline
predictions = pipeline.predict(X_test)
print(predictions)