@inproceedings{vahtola-etal-2022-easy,
title = "It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New {S}em{A}nto{N}eg Benchmark",
author = {Vahtola, Teemu and
Creutz, Mathias and
Tiedemann, J{\"o}rg},
editor = "Bastings, Jasmijn and
Belinkov, Yonatan and
Elazar, Yanai and
Hupkes, Dieuwke and
Saphra, Naomi and
Wiegreffe, Sarah",
booktitle = "Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates (Hybrid)",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.blackboxnlp-1.20",
doi = "10.18653/v1/2022.blackboxnlp-1.20",
pages = "249--262",
abstract = "We investigate to what extent a hundred publicly available, popular neural language models capture meaning systematically. Sentence embeddings obtained from pretrained or fine-tuned language models can be used to perform particular tasks, such as paraphrase detection, semantic textual similarity assessment or natural language inference. Common to all of these tasks is that paraphrastic sentences, that is, sentences that carry (nearly) the same meaning, should have (nearly) the same embeddings regardless of surface form. We demonstrate that performance varies greatly across different language models when a specific type of meaning-preserving transformation is applied: two sentences should be identified as paraphrastic if one of them contains a negated antonym in relation to the other one, such as {``}I am not guilty{''} versus {``}I am innocent{''}.We introduce and release SemAntoNeg, a new test suite containing 3152 entries for probing paraphrasticity in sentences incorporating negation and antonyms. Among other things, we show that language models fine-tuned for natural language inference outperform other types of models, especially the ones fine-tuned to produce general-purpose sentence embeddings, on the test suite. Furthermore, we show that most models designed explicitly for paraphrasing are rather mediocre in our task.",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="vahtola-etal-2022-easy">
<titleInfo>
<title>It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New SemAntoNeg Benchmark</title>
</titleInfo>
<name type="personal">
<namePart type="given">Teemu</namePart>
<namePart type="family">Vahtola</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mathias</namePart>
<namePart type="family">Creutz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jörg</namePart>
<namePart type="family">Tiedemann</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP</title>
</titleInfo>
<name type="personal">
<namePart type="given">Jasmijn</namePart>
<namePart type="family">Bastings</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yonatan</namePart>
<namePart type="family">Belinkov</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yanai</namePart>
<namePart type="family">Elazar</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dieuwke</namePart>
<namePart type="family">Hupkes</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naomi</namePart>
<namePart type="family">Saphra</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Sarah</namePart>
<namePart type="family">Wiegreffe</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates (Hybrid)</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>We investigate to what extent a hundred publicly available, popular neural language models capture meaning systematically. Sentence embeddings obtained from pretrained or fine-tuned language models can be used to perform particular tasks, such as paraphrase detection, semantic textual similarity assessment or natural language inference. Common to all of these tasks is that paraphrastic sentences, that is, sentences that carry (nearly) the same meaning, should have (nearly) the same embeddings regardless of surface form. We demonstrate that performance varies greatly across different language models when a specific type of meaning-preserving transformation is applied: two sentences should be identified as paraphrastic if one of them contains a negated antonym in relation to the other one, such as “I am not guilty” versus “I am innocent”.We introduce and release SemAntoNeg, a new test suite containing 3152 entries for probing paraphrasticity in sentences incorporating negation and antonyms. Among other things, we show that language models fine-tuned for natural language inference outperform other types of models, especially the ones fine-tuned to produce general-purpose sentence embeddings, on the test suite. Furthermore, we show that most models designed explicitly for paraphrasing are rather mediocre in our task.</abstract>
<identifier type="citekey">vahtola-etal-2022-easy</identifier>
<identifier type="doi">10.18653/v1/2022.blackboxnlp-1.20</identifier>
<location>
<url>https://aclanthology.org/2022.blackboxnlp-1.20</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>249</start>
<end>262</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New SemAntoNeg Benchmark
%A Vahtola, Teemu
%A Creutz, Mathias
%A Tiedemann, Jörg
%Y Bastings, Jasmijn
%Y Belinkov, Yonatan
%Y Elazar, Yanai
%Y Hupkes, Dieuwke
%Y Saphra, Naomi
%Y Wiegreffe, Sarah
%S Proceedings of the Fifth BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates (Hybrid)
%F vahtola-etal-2022-easy
%X We investigate to what extent a hundred publicly available, popular neural language models capture meaning systematically. Sentence embeddings obtained from pretrained or fine-tuned language models can be used to perform particular tasks, such as paraphrase detection, semantic textual similarity assessment or natural language inference. Common to all of these tasks is that paraphrastic sentences, that is, sentences that carry (nearly) the same meaning, should have (nearly) the same embeddings regardless of surface form. We demonstrate that performance varies greatly across different language models when a specific type of meaning-preserving transformation is applied: two sentences should be identified as paraphrastic if one of them contains a negated antonym in relation to the other one, such as “I am not guilty” versus “I am innocent”.We introduce and release SemAntoNeg, a new test suite containing 3152 entries for probing paraphrasticity in sentences incorporating negation and antonyms. Among other things, we show that language models fine-tuned for natural language inference outperform other types of models, especially the ones fine-tuned to produce general-purpose sentence embeddings, on the test suite. Furthermore, we show that most models designed explicitly for paraphrasing are rather mediocre in our task.
%R 10.18653/v1/2022.blackboxnlp-1.20
%U https://aclanthology.org/2022.blackboxnlp-1.20
%U https://doi.org/10.18653/v1/2022.blackboxnlp-1.20
%P 249-262
Markdown (Informal)
[It Is Not Easy To Detect Paraphrases: Analysing Semantic Similarity With Antonyms and Negation Using the New SemAntoNeg Benchmark](https://aclanthology.org/2022.blackboxnlp-1.20) (Vahtola et al., BlackboxNLP 2022)
ACL