@inproceedings{tan-etal-2023-multi2claim,
title = "{M}ulti2{C}laim: Generating Scientific Claims from Multi-Choice Questions for Scientific Fact-Checking",
author = "Tan, Neset and
Nguyen, Trung and
Bensemann, Josh and
Peng, Alex and
Bao, Qiming and
Chen, Yang and
Gahegan, Mark and
Witbrock, Michael",
editor = "Vlachos, Andreas and
Augenstein, Isabelle",
booktitle = "Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics",
month = may,
year = "2023",
address = "Dubrovnik, Croatia",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.eacl-main.194/",
doi = "10.18653/v1/2023.eacl-main.194",
pages = "2652--2664",
abstract = "Training machine learning models to successfully perform scientific fact-checking tasks is challenging due to the expertise bottleneck that limits the availability of appropriate training datasets. In this task, models use textual evidence to confirm scientific claims, which requires data that contains extensive domain-expert annotation. Consequently, the number of existing scientific-fact-checking datasets and the sizes of those datasets are limited. However, these limitations do not apply to multiple-choice question datasets because of the necessity of domain exams in the modern education system. As one of the first steps towards addressing the fact-checking dataset scarcity problem in scientific domains, we propose a pipeline for automatically converting multiple-choice questions into fact-checking data, which we call Multi2Claim. By applying the proposed pipeline, we generated two large-scale datasets for scientific-fact-checking tasks: Med-Fact and Gsci-Fact for the medical and general science domains, respectively. These two datasets are among the first examples of large-scale scientific-fact-checking datasets. We developed baseline models for the verdict prediction task using each dataset. Additionally, we demonstrated that the datasets could be used to improve performance with respect to the F 1 weighted metric on existing fact-checking datasets such as SciFact, HEALTHVER, COVID-Fact, and CLIMATE-FEVER. In some cases, the improvement in performance was up to a 26{\%} increase."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="tan-etal-2023-multi2claim">
<titleInfo>
<title>Multi2Claim: Generating Scientific Claims from Multi-Choice Questions for Scientific Fact-Checking</title>
</titleInfo>
<name type="personal">
<namePart type="given">Neset</namePart>
<namePart type="family">Tan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Trung</namePart>
<namePart type="family">Nguyen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Josh</namePart>
<namePart type="family">Bensemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Alex</namePart>
<namePart type="family">Peng</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Qiming</namePart>
<namePart type="family">Bao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yang</namePart>
<namePart type="family">Chen</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mark</namePart>
<namePart type="family">Gahegan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Michael</namePart>
<namePart type="family">Witbrock</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-05</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</title>
</titleInfo>
<name type="personal">
<namePart type="given">Andreas</namePart>
<namePart type="family">Vlachos</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Isabelle</namePart>
<namePart type="family">Augenstein</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Dubrovnik, Croatia</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Training machine learning models to successfully perform scientific fact-checking tasks is challenging due to the expertise bottleneck that limits the availability of appropriate training datasets. In this task, models use textual evidence to confirm scientific claims, which requires data that contains extensive domain-expert annotation. Consequently, the number of existing scientific-fact-checking datasets and the sizes of those datasets are limited. However, these limitations do not apply to multiple-choice question datasets because of the necessity of domain exams in the modern education system. As one of the first steps towards addressing the fact-checking dataset scarcity problem in scientific domains, we propose a pipeline for automatically converting multiple-choice questions into fact-checking data, which we call Multi2Claim. By applying the proposed pipeline, we generated two large-scale datasets for scientific-fact-checking tasks: Med-Fact and Gsci-Fact for the medical and general science domains, respectively. These two datasets are among the first examples of large-scale scientific-fact-checking datasets. We developed baseline models for the verdict prediction task using each dataset. Additionally, we demonstrated that the datasets could be used to improve performance with respect to the F 1 weighted metric on existing fact-checking datasets such as SciFact, HEALTHVER, COVID-Fact, and CLIMATE-FEVER. In some cases, the improvement in performance was up to a 26% increase.</abstract>
<identifier type="citekey">tan-etal-2023-multi2claim</identifier>
<identifier type="doi">10.18653/v1/2023.eacl-main.194</identifier>
<location>
<url>https://aclanthology.org/2023.eacl-main.194/</url>
</location>
<part>
<date>2023-05</date>
<extent unit="page">
<start>2652</start>
<end>2664</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Multi2Claim: Generating Scientific Claims from Multi-Choice Questions for Scientific Fact-Checking
%A Tan, Neset
%A Nguyen, Trung
%A Bensemann, Josh
%A Peng, Alex
%A Bao, Qiming
%A Chen, Yang
%A Gahegan, Mark
%A Witbrock, Michael
%Y Vlachos, Andreas
%Y Augenstein, Isabelle
%S Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics
%D 2023
%8 May
%I Association for Computational Linguistics
%C Dubrovnik, Croatia
%F tan-etal-2023-multi2claim
%X Training machine learning models to successfully perform scientific fact-checking tasks is challenging due to the expertise bottleneck that limits the availability of appropriate training datasets. In this task, models use textual evidence to confirm scientific claims, which requires data that contains extensive domain-expert annotation. Consequently, the number of existing scientific-fact-checking datasets and the sizes of those datasets are limited. However, these limitations do not apply to multiple-choice question datasets because of the necessity of domain exams in the modern education system. As one of the first steps towards addressing the fact-checking dataset scarcity problem in scientific domains, we propose a pipeline for automatically converting multiple-choice questions into fact-checking data, which we call Multi2Claim. By applying the proposed pipeline, we generated two large-scale datasets for scientific-fact-checking tasks: Med-Fact and Gsci-Fact for the medical and general science domains, respectively. These two datasets are among the first examples of large-scale scientific-fact-checking datasets. We developed baseline models for the verdict prediction task using each dataset. Additionally, we demonstrated that the datasets could be used to improve performance with respect to the F 1 weighted metric on existing fact-checking datasets such as SciFact, HEALTHVER, COVID-Fact, and CLIMATE-FEVER. In some cases, the improvement in performance was up to a 26% increase.
%R 10.18653/v1/2023.eacl-main.194
%U https://aclanthology.org/2023.eacl-main.194/
%U https://doi.org/10.18653/v1/2023.eacl-main.194
%P 2652-2664
Markdown (Informal)
[Multi2Claim: Generating Scientific Claims from Multi-Choice Questions for Scientific Fact-Checking](https://aclanthology.org/2023.eacl-main.194/) (Tan et al., EACL 2023)
ACL
- Neset Tan, Trung Nguyen, Josh Bensemann, Alex Peng, Qiming Bao, Yang Chen, Mark Gahegan, and Michael Witbrock. 2023. Multi2Claim: Generating Scientific Claims from Multi-Choice Questions for Scientific Fact-Checking. In Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics, pages 2652–2664, Dubrovnik, Croatia. Association for Computational Linguistics.