@inproceedings{ohta-etal-2022-joeys2t,
title = "{J}oey{S}2{T}: Minimalistic Speech-to-Text Modeling with {J}oey{NMT}",
author = "Ohta, Mayumi and
Kreutzer, Julia and
Riezler, Stefan",
editor = "Che, Wanxiang and
Shutova, Ekaterina",
booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations",
month = dec,
year = "2022",
address = "Abu Dhabi, UAE",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.emnlp-demos.6/",
doi = "10.18653/v1/2022.emnlp-demos.6",
pages = "50--59",
abstract = "JoeyS2T is a JoeyNMT extension for speech-to-text tasks such as automatic speech recognition and end-to-end speech translation. It inherits the core philosophy of JoeyNMT, a minimalist NMT toolkit built on PyTorch, seeking simplicity and accessibility. JoeyS2T`s workflow is self-contained, starting from data pre-processing, over model training and prediction to evaluation, and is seamlessly integrated into JoeyNMT`s compact and simple code base. On top of JoeyNMT`s state-of-the-art Transformer-based Encoder-Decoder architecture, JoeyS2T provides speech-oriented components such as convolutional layers, SpecAugment, CTC-loss, and WER evaluation. Despite its simplicity compared to prior implementations, JoeyS2T performs competitively on English speech recognition and English-to-German speech translation benchmarks. The implementation is accompanied by a walk-through tutorial and available on \url{https://github.com/may-/joeys2t}."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="ohta-etal-2022-joeys2t">
<titleInfo>
<title>JoeyS2T: Minimalistic Speech-to-Text Modeling with JoeyNMT</title>
</titleInfo>
<name type="personal">
<namePart type="given">Mayumi</namePart>
<namePart type="family">Ohta</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Julia</namePart>
<namePart type="family">Kreutzer</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Stefan</namePart>
<namePart type="family">Riezler</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations</title>
</titleInfo>
<name type="personal">
<namePart type="given">Wanxiang</namePart>
<namePart type="family">Che</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Ekaterina</namePart>
<namePart type="family">Shutova</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, UAE</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>JoeyS2T is a JoeyNMT extension for speech-to-text tasks such as automatic speech recognition and end-to-end speech translation. It inherits the core philosophy of JoeyNMT, a minimalist NMT toolkit built on PyTorch, seeking simplicity and accessibility. JoeyS2T‘s workflow is self-contained, starting from data pre-processing, over model training and prediction to evaluation, and is seamlessly integrated into JoeyNMT‘s compact and simple code base. On top of JoeyNMT‘s state-of-the-art Transformer-based Encoder-Decoder architecture, JoeyS2T provides speech-oriented components such as convolutional layers, SpecAugment, CTC-loss, and WER evaluation. Despite its simplicity compared to prior implementations, JoeyS2T performs competitively on English speech recognition and English-to-German speech translation benchmarks. The implementation is accompanied by a walk-through tutorial and available on https://github.com/may-/joeys2t.</abstract>
<identifier type="citekey">ohta-etal-2022-joeys2t</identifier>
<identifier type="doi">10.18653/v1/2022.emnlp-demos.6</identifier>
<location>
<url>https://aclanthology.org/2022.emnlp-demos.6/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>50</start>
<end>59</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T JoeyS2T: Minimalistic Speech-to-Text Modeling with JoeyNMT
%A Ohta, Mayumi
%A Kreutzer, Julia
%A Riezler, Stefan
%Y Che, Wanxiang
%Y Shutova, Ekaterina
%S Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, UAE
%F ohta-etal-2022-joeys2t
%X JoeyS2T is a JoeyNMT extension for speech-to-text tasks such as automatic speech recognition and end-to-end speech translation. It inherits the core philosophy of JoeyNMT, a minimalist NMT toolkit built on PyTorch, seeking simplicity and accessibility. JoeyS2T‘s workflow is self-contained, starting from data pre-processing, over model training and prediction to evaluation, and is seamlessly integrated into JoeyNMT‘s compact and simple code base. On top of JoeyNMT‘s state-of-the-art Transformer-based Encoder-Decoder architecture, JoeyS2T provides speech-oriented components such as convolutional layers, SpecAugment, CTC-loss, and WER evaluation. Despite its simplicity compared to prior implementations, JoeyS2T performs competitively on English speech recognition and English-to-German speech translation benchmarks. The implementation is accompanied by a walk-through tutorial and available on https://github.com/may-/joeys2t.
%R 10.18653/v1/2022.emnlp-demos.6
%U https://aclanthology.org/2022.emnlp-demos.6/
%U https://doi.org/10.18653/v1/2022.emnlp-demos.6
%P 50-59
Markdown (Informal)
[JoeyS2T: Minimalistic Speech-to-Text Modeling with JoeyNMT](https://aclanthology.org/2022.emnlp-demos.6/) (Ohta et al., EMNLP 2022)
ACL
- Mayumi Ohta, Julia Kreutzer, and Stefan Riezler. 2022. JoeyS2T: Minimalistic Speech-to-Text Modeling with JoeyNMT. In Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing: System Demonstrations, pages 50–59, Abu Dhabi, UAE. Association for Computational Linguistics.