Skip to content

Commit a344014

Browse files
authored
Add zero_division parameter #minor (#136)
1 parent c029aa6 commit a344014

File tree

3 files changed

+190
-12
lines changed

3 files changed

+190
-12
lines changed

docs/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ sphinx_code_tabs==0.5.3
66
sphinx-gallery==0.10.1
77
matplotlib==3.5.2
88
pandas==1.4.2
9-
ray==1.13.0
9+
ray
1010
numpy
1111
git+https://github.com/charles9n/bert-sklearn.git@master
1212
shap==0.44.1

hiclass/metrics.py

+90-11
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
"""Helper functions to compute hierarchical evaluation metrics."""
22

3-
from typing import Union, List
3+
import warnings
4+
from typing import List, Union
5+
46
import numpy as np
5-
from sklearn.utils import check_array
7+
from sklearn.exceptions import UndefinedMetricWarning
68
from sklearn.metrics import log_loss as sk_log_loss
79
from sklearn.preprocessing import LabelEncoder
10+
from sklearn.utils import check_array
811

9-
from hiclass.HierarchicalClassifier import make_leveled
1012
from hiclass import HierarchicalClassifier
13+
from hiclass.HierarchicalClassifier import make_leveled
1114

1215

1316
def _validate_input(y_true, y_pred):
@@ -208,7 +211,12 @@ def _recall_macro(y_true: np.ndarray, y_pred: np.ndarray):
208211
return _compute_macro(y_true, y_pred, _recall_micro)
209212

210213

211-
def f1(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"):
214+
def f1(
215+
y_true: np.ndarray,
216+
y_pred: np.ndarray,
217+
average: str = "micro",
218+
zero_division: str = "warn",
219+
):
212220
r"""
213221
Compute hierarchical f-score.
214222
@@ -223,33 +231,104 @@ def f1(y_true: np.ndarray, y_pred: np.ndarray, average: str = "micro"):
223231
224232
- `micro`: The f-score is computed by summing over all individual instances, :math:`\displaystyle{hF = \frac{2 \times hP \times hR}{hP + hR}}`, where :math:`hP` is the hierarchical precision and :math:`hR` is the hierarchical recall.
225233
- `macro`: The f-score is computed for each instance and then averaged, :math:`\displaystyle{hF = \frac{\sum_{i=1}^{n}hF_{i}}{n}}`, where :math:`\alpha_i` is the set consisting of the most specific classes predicted for test example :math:`i` and all their ancestor classes, while :math:`\beta_i` is the set containing the true most specific classes of test example :math:`i` and all their ancestors.
234+
zero_division: {"warn", 0.0, 1.0, np.nan}, default="warn"
235+
Sets the value to return when there is a zero division, i.e., when all
236+
predictions and labels are negative.
237+
238+
Notes:
239+
- If set to "warn", this acts like 0, but a warning is also raised.
240+
- If set to `np.nan`, such values will be excluded from the average.
241+
226242
Returns
227243
-------
228244
f1 : float
229245
Weighted average of the precision and recall
246+
247+
Notes
248+
-----
249+
When ``precision + recall == 0`` (i.e. classes
250+
are completely different from both ``y_true`` and ``y_pred``), f-score is
251+
undefined. In such cases, by default f-score will be set to 0.0, and
252+
``UndefinedMetricWarning`` will be raised. This behavior can be modified by
253+
setting the ``zero_division`` parameter.
254+
255+
References
256+
----------
257+
.. [1] `A survey of hierarchical classification across different application domains
258+
<https://link.springer.com/article/10.1007/S10618-010-0175-9>`_.
259+
260+
Examples
261+
--------
262+
>>> import numpy as np
263+
>>> from hiclass.metrics import f1
264+
>>> y_true = [[0, 1, 2], [3, 4, 5]]
265+
>>> y_pred = [[0, 1, 2], [6, 7, 8]]
266+
>>> f1(y_true, y_pred, average='micro')
267+
0.5
268+
>>> f1(y_true, y_pred, average='macro')
269+
0.5
270+
271+
>>> # zero division
272+
>>> y_true = [[0, 1], [2, 3]]
273+
>>> y_pred = [[4, 5], [6, 7]]
274+
>>> f1(y_true, y_pred)
275+
F-score is ill-defined and being set to 0.0. Use `zero_division` parameter to control this behavior.
276+
0.0
277+
>>> f1(y_true, y_pred, zero_division=1.0)
278+
1.0
279+
>>> f1(y_true, y_pred, zero_division=np.nan)
280+
nan
281+
282+
>>> # multilabel hierarchical classification
283+
>>> y_true = [[["a", "b", "c"]], [["d", "e", "f"]], [["g", "h", "i"]]]
284+
>>> y_pred = [[["a", "b", "c"]], [["d", "e", "f"]], [["g", "h", "i"]]]
285+
>>> f1(y_true, y_pred)
286+
1.0
230287
"""
231288
y_true, y_pred = _validate_input(y_true, y_pred)
232289
functions = {
233290
"micro": _f_score_micro,
234291
"macro": _f_score_macro,
235292
}
236-
return functions[average](y_true, y_pred)
293+
return functions[average](y_true, y_pred, zero_division)
237294

238295

239-
def _f_score_micro(y_true: np.ndarray, y_pred: np.ndarray):
296+
def _f_score_micro(y_true: np.ndarray, y_pred: np.ndarray, zero_division):
240297
prec = precision(y_true, y_pred)
241298
rec = recall(y_true, y_pred)
242-
return 2 * prec * rec / (prec + rec)
299+
if prec + rec == 0:
300+
if zero_division == "warn":
301+
msg = (
302+
"F-score is ill-defined and being set to 0.0. "
303+
"Use `zero_division` parameter to control this behavior."
304+
)
305+
warnings.warn(msg, UndefinedMetricWarning, stacklevel=2)
306+
return np.float64(0.0)
307+
elif zero_division in [0, 1]:
308+
return np.float64(zero_division)
309+
else:
310+
return np.nan
311+
else:
312+
return np.float64(2 * prec * rec / (prec + rec))
243313

244314

245-
def _f_score_macro(y_true: np.ndarray, y_pred: np.ndarray):
246-
return _compute_macro(y_true, y_pred, _f_score_micro)
315+
def _f_score_macro(y_true: np.ndarray, y_pred: np.ndarray, zero_division):
316+
return _compute_macro(y_true, y_pred, _f_score_micro, zero_division)
247317

248318

249-
def _compute_macro(y_true: np.ndarray, y_pred: np.ndarray, _micro_function):
319+
def _compute_macro(
320+
y_true: np.ndarray, y_pred: np.ndarray, _micro_function, zero_division=None
321+
):
250322
overall_sum = 0
251323
for ground_truth, prediction in zip(y_true, y_pred):
252-
sample_score = _micro_function(np.array([ground_truth]), np.array([prediction]))
324+
if zero_division:
325+
sample_score = _micro_function(
326+
np.array([ground_truth]), np.array([prediction]), zero_division
327+
)
328+
else:
329+
sample_score = _micro_function(
330+
np.array([ground_truth]), np.array([prediction])
331+
)
253332
overall_sum = overall_sum + sample_score
254333
return overall_sum / len(y_true)
255334

tests/test_metrics.py

+99
Original file line numberDiff line numberDiff line change
@@ -264,24 +264,55 @@ def test_f1_micro_1d_list():
264264
assert 0.5 == f1(y_true, y_pred, "micro")
265265

266266

267+
def test_f1_micro_1d_list_zero_division():
268+
y_true = [1, 2, 3, 4]
269+
y_pred = [5, 6, 7, 8]
270+
assert 0.0 == f1(y_true, y_pred, "micro")
271+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
272+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
273+
274+
267275
def test_f1_micro_2d_list():
268276
y_true = [[1, 2, 3, 4], [1, 2, 5, 6]]
269277
y_pred = [[1, 2, 5, 6], [1, 2, 3, 4]]
270278
assert 0.5 == f1(y_true, y_pred, "micro")
271279

272280

281+
def test_f1_micro_2d_list_zero_division():
282+
y_true = [[1, 2, 3, 4], [5, 6, 7, 8]]
283+
y_pred = [[5, 6, 7, 8], [1, 2, 3, 4]]
284+
assert 0.0 == f1(y_true, y_pred, "micro")
285+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
286+
287+
273288
def test_f1_micro_1d_np_array():
274289
y_true = np.array([1, 2, 3, 4])
275290
y_pred = np.array([1, 2, 5, 6])
276291
assert 0.5 == f1(y_true, y_pred, "micro")
277292

278293

294+
def test_f1_micro_1d_np_array_zero_division():
295+
y_true = np.array([1, 2, 3, 4])
296+
y_pred = np.array([5, 6, 7, 8])
297+
assert 0.0 == f1(y_true, y_pred, "micro")
298+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
299+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
300+
301+
279302
def test_f1_micro_2d_np_array():
280303
y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]])
281304
y_pred = np.array([[1, 2, 5, 6], [1, 2, 3, 4]])
282305
assert 0.5 == f1(y_true, y_pred, "micro")
283306

284307

308+
def test_f1_micro_2d_np_array_zero_division():
309+
y_true = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
310+
y_pred = np.array([[5, 6, 7, 8], [1, 2, 3, 4]])
311+
assert 0.0 == f1(y_true, y_pred, "micro")
312+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
313+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
314+
315+
285316
def test_f1_micro_3d_np_array():
286317
y_true = np.array(
287318
[
@@ -299,30 +330,80 @@ def test_f1_micro_3d_np_array():
299330
assert 1 == f1(y_true, y_true, "micro")
300331

301332

333+
def test_f1_micro_3d_np_array_zero_division():
334+
y_true = np.array(
335+
[
336+
[["a", "b"], ["c", "d"]],
337+
[["e", "f"], ["g", "h"]],
338+
]
339+
)
340+
y_pred = np.array(
341+
[
342+
[["i", "j"], ["k", "l"]],
343+
[["m", "n"], ["o", "p"]],
344+
]
345+
)
346+
assert 0.0 == f1(y_true, y_pred, "micro")
347+
assert 1.0 == f1(y_true, y_pred, "micro", 1.0)
348+
assert np.isnan(f1(y_true, y_pred, "micro", np.nan))
349+
350+
302351
def test_f1_macro_1d_list():
303352
y_true = [1, 2, 3, 4]
304353
y_pred = [1, 2, 3, 4]
305354
assert 1 == f1(y_true, y_pred, "macro")
306355

307356

357+
def test_f1_macro_1d_list_zero_division():
358+
y_true = [1, 2, 3, 4]
359+
y_pred = [5, 6, 7, 8]
360+
assert 0.0 == f1(y_true, y_pred, "macro")
361+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
362+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
363+
364+
308365
def test_f1_macro_2d_list():
309366
y_true = [[1, 2, 3, 4], [1, 2, 5, 6]]
310367
y_pred = [[1, 5, 6], [1, 2, 3]]
311368
assert 0.4285714 == approx(f1(y_true, y_pred, "macro"))
312369

313370

371+
def test_f1_macro_2d_list_zero_division():
372+
y_true = [[1, 2, 3, 4], [5, 6, 7, 8]]
373+
y_pred = [[5, 6, 7, 8], [1, 2, 3, 4]]
374+
assert 0.0 == f1(y_true, y_pred, "macro")
375+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
376+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
377+
378+
314379
def test_f1_macro_1d_np_array():
315380
y_true = np.array([1, 2, 3, 4])
316381
y_pred = np.array([1, 2, 3, 4])
317382
assert 1 == f1(y_true, y_pred, "macro")
318383

319384

385+
def test_f1_macro_1d_np_array_zero_division():
386+
y_true = np.array([1, 2, 3, 4])
387+
y_pred = np.array([5, 6, 7, 8])
388+
assert 0.0 == f1(y_true, y_pred, "macro")
389+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
390+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
391+
392+
320393
def test_f1_macro_2d_np_array():
321394
y_true = np.array([[1, 2, 3, 4], [1, 2, 5, 6]])
322395
y_pred = np.array([[1, 5, 6], [1, 2, 3]])
323396
assert 0.4285714 == approx(f1(y_true, y_pred, "macro"))
324397

325398

399+
def test_f1_macro_2d_np_array_zero_division():
400+
y_true = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
401+
y_pred = np.array([[5, 6, 7, 8], [1, 2, 3, 4]])
402+
assert 0.0 == f1(y_true, y_pred, "macro")
403+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
404+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
405+
406+
326407
def test_f1_macro_3d_np_array():
327408
y_true = np.array(
328409
[
@@ -340,6 +421,24 @@ def test_f1_macro_3d_np_array():
340421
assert 1 == f1(y_true, y_true, "macro")
341422

342423

424+
def test_f1_macro_3d_np_array_zero_division():
425+
y_true = np.array(
426+
[
427+
[["a", "b"], ["c", "d"]],
428+
[["e", "f"], ["g", "h"]],
429+
]
430+
)
431+
y_pred = np.array(
432+
[
433+
[["i", "j"], ["k", "l"]],
434+
[["m", "n"], ["o", "p"]],
435+
]
436+
)
437+
assert 0.0 == f1(y_true, y_pred, "macro")
438+
assert 1.0 == f1(y_true, y_pred, "macro", 1.0)
439+
assert np.isnan(f1(y_true, y_pred, "macro", np.nan))
440+
441+
343442
def test_empty_levels_2d_list_1():
344443
y_true = [["2", "3"], ["1"], ["4", "5", "6"]]
345444
y_pred = [["1"], ["2", "3"], ["4", "5", "6"]]

0 commit comments

Comments
 (0)