@inproceedings{comsa-narayanan-2023-benchmark,
title = "A Benchmark for Reasoning with Spatial Prepositions",
author = "Comsa, Iulia and
Narayanan, Srini",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.emnlp-main.1015/",
doi = "10.18653/v1/2023.emnlp-main.1015",
pages = "16328--16335",
abstract = "Spatial reasoning is a fundamental building block of human cognition, used in representing, grounding, and reasoning about physical and abstract concepts. We propose a novel benchmark focused on assessing inferential properties of statements with spatial prepositions. The benchmark includes original datasets in English and Romanian and aims to probe the limits of reasoning about spatial relations in large language models. We use prompt engineering to study the performance of two families of large language models, PaLM and GPT-3, on our benchmark. Our results show considerable variability in the performance of smaller and larger models, as well as across prompts and languages. However, none of the models reaches human performance."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="comsa-narayanan-2023-benchmark">
<titleInfo>
<title>A Benchmark for Reasoning with Spatial Prepositions</title>
</titleInfo>
<name type="personal">
<namePart type="given">Iulia</namePart>
<namePart type="family">Comsa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srini</namePart>
<namePart type="family">Narayanan</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Houda</namePart>
<namePart type="family">Bouamor</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Juan</namePart>
<namePart type="family">Pino</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Kalika</namePart>
<namePart type="family">Bali</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Singapore</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Spatial reasoning is a fundamental building block of human cognition, used in representing, grounding, and reasoning about physical and abstract concepts. We propose a novel benchmark focused on assessing inferential properties of statements with spatial prepositions. The benchmark includes original datasets in English and Romanian and aims to probe the limits of reasoning about spatial relations in large language models. We use prompt engineering to study the performance of two families of large language models, PaLM and GPT-3, on our benchmark. Our results show considerable variability in the performance of smaller and larger models, as well as across prompts and languages. However, none of the models reaches human performance.</abstract>
<identifier type="citekey">comsa-narayanan-2023-benchmark</identifier>
<identifier type="doi">10.18653/v1/2023.emnlp-main.1015</identifier>
<location>
<url>https://aclanthology.org/2023.emnlp-main.1015/</url>
</location>
<part>
<date>2023-12</date>
<extent unit="page">
<start>16328</start>
<end>16335</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T A Benchmark for Reasoning with Spatial Prepositions
%A Comsa, Iulia
%A Narayanan, Srini
%Y Bouamor, Houda
%Y Pino, Juan
%Y Bali, Kalika
%S Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing
%D 2023
%8 December
%I Association for Computational Linguistics
%C Singapore
%F comsa-narayanan-2023-benchmark
%X Spatial reasoning is a fundamental building block of human cognition, used in representing, grounding, and reasoning about physical and abstract concepts. We propose a novel benchmark focused on assessing inferential properties of statements with spatial prepositions. The benchmark includes original datasets in English and Romanian and aims to probe the limits of reasoning about spatial relations in large language models. We use prompt engineering to study the performance of two families of large language models, PaLM and GPT-3, on our benchmark. Our results show considerable variability in the performance of smaller and larger models, as well as across prompts and languages. However, none of the models reaches human performance.
%R 10.18653/v1/2023.emnlp-main.1015
%U https://aclanthology.org/2023.emnlp-main.1015/
%U https://doi.org/10.18653/v1/2023.emnlp-main.1015
%P 16328-16335
Markdown (Informal)
[A Benchmark for Reasoning with Spatial Prepositions](https://aclanthology.org/2023.emnlp-main.1015/) (Comsa & Narayanan, EMNLP 2023)
ACL
- Iulia Comsa and Srini Narayanan. 2023. A Benchmark for Reasoning with Spatial Prepositions. In Proceedings of the 2023 Conference on Empirical Methods in Natural Language Processing, pages 16328–16335, Singapore. Association for Computational Linguistics.