@inproceedings{rogers-rumshisky-2020-guide,
title = "A guide to the dataset explosion in {QA}, {NLI}, and commonsense reasoning",
author = "Rogers, Anna and
Rumshisky, Anna",
editor = "Specia, Lucia and
Beck, Daniel",
booktitle = "Proceedings of the 28th International Conference on Computational Linguistics: Tutorial Abstracts",
month = dec,
year = "2020",
address = "Barcelona, Spain (Online)",
publisher = "International Committee for Computational Linguistics",
url = "https://aclanthology.org/2020.coling-tutorials.5/",
doi = "10.18653/v1/2020.coling-tutorials.5",
pages = "27--32",
abstract = "Question answering, natural language inference and commonsense reasoning are increasingly popular as general NLP system benchmarks, driving both modeling and dataset work. Only for question answering we already have over 100 datasets, with over 40 published after 2018. However, most new datasets get {\textquotedblleft}solved{\textquotedblright} soon after publication, and this is largely due not to the verbal reasoning capabilities of our models, but to annotation artifacts and shallow cues in the data that they can exploit. This tutorial aims to (1) provide an up-to-date guide to the recent datasets, (2) survey the old and new methodological issues with dataset construction, and (3) outline the existing proposals for overcoming them. The target audience is the NLP practitioners who are lost in dozens of the recent datasets, and would like to know what these datasets are actually measuring. Our overview of the problems with the current datasets and the latest tips and tricks for overcoming them will also be useful to the researchers working on future benchmarks."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="rogers-rumshisky-2020-guide">
<titleInfo>
<title>A guide to the dataset explosion in QA, NLI, and commonsense reasoning</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rumshisky</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2020-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 28th International Conference on Computational Linguistics: Tutorial Abstracts</title>
</titleInfo>
<name type="personal">
<namePart type="given">Lucia</namePart>
<namePart type="family">Specia</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Daniel</namePart>
<namePart type="family">Beck</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>International Committee for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Barcelona, Spain (Online)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Question answering, natural language inference and commonsense reasoning are increasingly popular as general NLP system benchmarks, driving both modeling and dataset work. Only for question answering we already have over 100 datasets, with over 40 published after 2018. However, most new datasets get “solved” soon after publication, and this is largely due not to the verbal reasoning capabilities of our models, but to annotation artifacts and shallow cues in the data that they can exploit. This tutorial aims to (1) provide an up-to-date guide to the recent datasets, (2) survey the old and new methodological issues with dataset construction, and (3) outline the existing proposals for overcoming them. The target audience is the NLP practitioners who are lost in dozens of the recent datasets, and would like to know what these datasets are actually measuring. Our overview of the problems with the current datasets and the latest tips and tricks for overcoming them will also be useful to the researchers working on future benchmarks.</abstract>
<identifier type="citekey">rogers-rumshisky-2020-guide</identifier>
<identifier type="doi">10.18653/v1/2020.coling-tutorials.5</identifier>
<location>
<url>https://aclanthology.org/2020.coling-tutorials.5/</url>
</location>
<part>
<date>2020-12</date>
<extent unit="page">
<start>27</start>
<end>32</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A guide to the dataset explosion in QA, NLI, and commonsense reasoning
%A Rogers, Anna
%A Rumshisky, Anna
%Y Specia, Lucia
%Y Beck, Daniel
%S Proceedings of the 28th International Conference on Computational Linguistics: Tutorial Abstracts
%D 2020
%8 December
%I International Committee for Computational Linguistics
%C Barcelona, Spain (Online)
%F rogers-rumshisky-2020-guide
%X Question answering, natural language inference and commonsense reasoning are increasingly popular as general NLP system benchmarks, driving both modeling and dataset work. Only for question answering we already have over 100 datasets, with over 40 published after 2018. However, most new datasets get “solved” soon after publication, and this is largely due not to the verbal reasoning capabilities of our models, but to annotation artifacts and shallow cues in the data that they can exploit. This tutorial aims to (1) provide an up-to-date guide to the recent datasets, (2) survey the old and new methodological issues with dataset construction, and (3) outline the existing proposals for overcoming them. The target audience is the NLP practitioners who are lost in dozens of the recent datasets, and would like to know what these datasets are actually measuring. Our overview of the problems with the current datasets and the latest tips and tricks for overcoming them will also be useful to the researchers working on future benchmarks.
%R 10.18653/v1/2020.coling-tutorials.5
%U https://aclanthology.org/2020.coling-tutorials.5/
%U https://doi.org/10.18653/v1/2020.coling-tutorials.5
%P 27-32
Markdown (Informal)
[A guide to the dataset explosion in QA, NLI, and commonsense reasoning](https://aclanthology.org/2020.coling-tutorials.5/) (Rogers & Rumshisky, COLING 2020)
ACL