@inproceedings{palm-etal-2017-end,
title = "End-to-End Information Extraction without Token-Level Supervision",
author = "Palm, Rasmus Berg and
Hovy, Dirk and
Laws, Florian and
Winther, Ole",
editor = "Ruiz, Nicholas and
Bangalore, Srinivas",
booktitle = "Proceedings of the Workshop on Speech-Centric Natural Language Processing",
month = sep,
year = "2017",
address = "Copenhagen, Denmark",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/W17-4606/",
doi = "10.18653/v1/W17-4606",
pages = "48--52",
abstract = "Most state-of-the-art information extraction approaches rely on token-level labels to find the areas of interest in text. Unfortunately, these labels are time-consuming and costly to create, and consequently, not available for many real-life IE tasks. To make matters worse, token-level labels are usually not the desired output, but just an intermediary step. End-to-end (E2E) models, which take raw text as input and produce the desired output directly, need not depend on token-level labels. We propose an E2E model based on pointer networks, which can be trained directly on pairs of raw input and output text. We evaluate our model on the ATIS data set, MIT restaurant corpus and the MIT movie corpus and compare to neural baselines that do use token-level labels. We achieve competitive results, within a few percentage points of the baselines, showing the feasibility of E2E information extraction without the need for token-level labels. This opens up new possibilities, as for many tasks currently addressed by human extractors, raw input and output data are available, but not token-level labels."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="palm-etal-2017-end">
<titleInfo>
<title>End-to-End Information Extraction without Token-Level Supervision</title>
</titleInfo>
<name type="personal">
<namePart type="given">Rasmus</namePart>
<namePart type="given">Berg</namePart>
<namePart type="family">Palm</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Dirk</namePart>
<namePart type="family">Hovy</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Florian</namePart>
<namePart type="family">Laws</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ole</namePart>
<namePart type="family">Winther</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2017-09</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the Workshop on Speech-Centric Natural Language Processing</title>
</titleInfo>
<name type="personal">
<namePart type="given">Nicholas</namePart>
<namePart type="family">Ruiz</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Srinivas</namePart>
<namePart type="family">Bangalore</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Copenhagen, Denmark</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Most state-of-the-art information extraction approaches rely on token-level labels to find the areas of interest in text. Unfortunately, these labels are time-consuming and costly to create, and consequently, not available for many real-life IE tasks. To make matters worse, token-level labels are usually not the desired output, but just an intermediary step. End-to-end (E2E) models, which take raw text as input and produce the desired output directly, need not depend on token-level labels. We propose an E2E model based on pointer networks, which can be trained directly on pairs of raw input and output text. We evaluate our model on the ATIS data set, MIT restaurant corpus and the MIT movie corpus and compare to neural baselines that do use token-level labels. We achieve competitive results, within a few percentage points of the baselines, showing the feasibility of E2E information extraction without the need for token-level labels. This opens up new possibilities, as for many tasks currently addressed by human extractors, raw input and output data are available, but not token-level labels.</abstract>
<identifier type="citekey">palm-etal-2017-end</identifier>
<identifier type="doi">10.18653/v1/W17-4606</identifier>
<location>
<url>https://aclanthology.org/W17-4606/</url>
</location>
<part>
<date>2017-09</date>
<extent unit="page">
<start>48</start>
<end>52</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T End-to-End Information Extraction without Token-Level Supervision
%A Palm, Rasmus Berg
%A Hovy, Dirk
%A Laws, Florian
%A Winther, Ole
%Y Ruiz, Nicholas
%Y Bangalore, Srinivas
%S Proceedings of the Workshop on Speech-Centric Natural Language Processing
%D 2017
%8 September
%I Association for Computational Linguistics
%C Copenhagen, Denmark
%F palm-etal-2017-end
%X Most state-of-the-art information extraction approaches rely on token-level labels to find the areas of interest in text. Unfortunately, these labels are time-consuming and costly to create, and consequently, not available for many real-life IE tasks. To make matters worse, token-level labels are usually not the desired output, but just an intermediary step. End-to-end (E2E) models, which take raw text as input and produce the desired output directly, need not depend on token-level labels. We propose an E2E model based on pointer networks, which can be trained directly on pairs of raw input and output text. We evaluate our model on the ATIS data set, MIT restaurant corpus and the MIT movie corpus and compare to neural baselines that do use token-level labels. We achieve competitive results, within a few percentage points of the baselines, showing the feasibility of E2E information extraction without the need for token-level labels. This opens up new possibilities, as for many tasks currently addressed by human extractors, raw input and output data are available, but not token-level labels.
%R 10.18653/v1/W17-4606
%U https://aclanthology.org/W17-4606/
%U https://doi.org/10.18653/v1/W17-4606
%P 48-52
Markdown (Informal)
[End-to-End Information Extraction without Token-Level Supervision](https://aclanthology.org/W17-4606/) (Palm et al., 2017)
ACL