@inproceedings{khosla-gangadharaiah-2022-benchmarking,
title = "Benchmarking the Covariate Shift Robustness of Open-world Intent Classification Approaches",
author = "Khosla, Sopan and
Gangadharaiah, Rashmi",
editor = "He, Yulan and
Ji, Heng and
Li, Sujian and
Liu, Yang and
Chang, Chua-Hui",
booktitle = "Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)",
month = nov,
year = "2022",
address = "Online only",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.aacl-short.3/",
doi = "10.18653/v1/2022.aacl-short.3",
pages = "14--23",
abstract = "Task-oriented dialog systems deployed in real-world applications are often challenged by out-of-distribution queries. These systems should not only reliably detect utterances with unsupported intents (\textit{semantic shift}), but also generalize to \textit{covariate shift} (supported intents from unseen distributions). However, none of the existing benchmarks for open-world intent classification focus on the second aspect, thus only performing a partial evaluation of intent detection techniques. In this work, we propose two new datasets ( and ) that include utterances useful for evaluating the robustness of open-world models to covariate shift. Along with the i.i.d. test set, both datasets contain a new cov-test set that, along with out-of-scope utterances, contains in-scope utterances sampled from different distributions not seen during training. This setting better mimics the challenges faced in real-world applications. Evaluating several open-world classifiers on the new datasets reveals that models that perform well on the test set struggle to generalize to the cov-test. Our datasets fill an important gap in the field, offering a more realistic evaluation scenario for intent classification in task-oriented dialog systems."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="khosla-gangadharaiah-2022-benchmarking">
<titleInfo>
<title>Benchmarking the Covariate Shift Robustness of Open-world Intent Classification Approaches</title>
</titleInfo>
<name type="personal">
<namePart type="given">Sopan</namePart>
<namePart type="family">Khosla</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rashmi</namePart>
<namePart type="family">Gangadharaiah</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-11</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yulan</namePart>
<namePart type="family">He</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Heng</namePart>
<namePart type="family">Ji</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sujian</namePart>
<namePart type="family">Li</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Liu</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Chua-Hui</namePart>
<namePart type="family">Chang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Online only</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Task-oriented dialog systems deployed in real-world applications are often challenged by out-of-distribution queries. These systems should not only reliably detect utterances with unsupported intents (semantic shift), but also generalize to covariate shift (supported intents from unseen distributions). However, none of the existing benchmarks for open-world intent classification focus on the second aspect, thus only performing a partial evaluation of intent detection techniques. In this work, we propose two new datasets ( and ) that include utterances useful for evaluating the robustness of open-world models to covariate shift. Along with the i.i.d. test set, both datasets contain a new cov-test set that, along with out-of-scope utterances, contains in-scope utterances sampled from different distributions not seen during training. This setting better mimics the challenges faced in real-world applications. Evaluating several open-world classifiers on the new datasets reveals that models that perform well on the test set struggle to generalize to the cov-test. Our datasets fill an important gap in the field, offering a more realistic evaluation scenario for intent classification in task-oriented dialog systems.</abstract>
<identifier type="citekey">khosla-gangadharaiah-2022-benchmarking</identifier>
<identifier type="doi">10.18653/v1/2022.aacl-short.3</identifier>
<location>
<url>https://aclanthology.org/2022.aacl-short.3/</url>
</location>
<part>
<date>2022-11</date>
<extent unit="page">
<start>14</start>
<end>23</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Benchmarking the Covariate Shift Robustness of Open-world Intent Classification Approaches
%A Khosla, Sopan
%A Gangadharaiah, Rashmi
%Y He, Yulan
%Y Ji, Heng
%Y Li, Sujian
%Y Liu, Yang
%Y Chang, Chua-Hui
%S Proceedings of the 2nd Conference of the Asia-Pacific Chapter of the Association for Computational Linguistics and the 12th International Joint Conference on Natural Language Processing (Volume 2: Short Papers)
%D 2022
%8 November
%I Association for Computational Linguistics
%C Online only
%F khosla-gangadharaiah-2022-benchmarking
%X Task-oriented dialog systems deployed in real-world applications are often challenged by out-of-distribution queries. These systems should not only reliably detect utterances with unsupported intents (semantic shift), but also generalize to covariate shift (supported intents from unseen distributions). However, none of the existing benchmarks for open-world intent classification focus on the second aspect, thus only performing a partial evaluation of intent detection techniques. In this work, we propose two new datasets ( and ) that include utterances useful for evaluating the robustness of open-world models to covariate shift. Along with the i.i.d. test set, both datasets contain a new cov-test set that, along with out-of-scope utterances, contains in-scope utterances sampled from different distributions not seen during training. This setting better mimics the challenges faced in real-world applications. Evaluating several open-world classifiers on the new datasets reveals that models that perform well on the test set struggle to generalize to the cov-test. Our datasets fill an important gap in the field, offering a more realistic evaluation scenario for intent classification in task-oriented dialog systems.
%R 10.18653/v1/2022.aacl-short.3
%U https://aclanthology.org/2022.aacl-short.3/
%U https://doi.org/10.18653/v1/2022.aacl-short.3
%P 14-23
Markdown (Informal)
[Benchmarking the Covariate Shift Robustness of Open-world Intent Classification Approaches](https://aclanthology.org/2022.aacl-short.3/) (Khosla & Gangadharaiah, AACL-IJCNLP 2022)
ACL