@inproceedings{razdaibiedina-etal-2023-residual,
title = "Residual Prompt Tuning: improving prompt tuning with residual reparameterization",
author = "Razdaibiedina, Anastasiia and
Mao, Yuning and
Khabsa, Madian and
Lewis, Mike and
Hou, Rui and
Ba, Jimmy and
Almahairi, Amjad",
editor = "Rogers, Anna and
Boyd-Graber, Jordan and
Okazaki, Naoaki",
booktitle = "Findings of the Association for Computational Linguistics: ACL 2023",
month = jul,
year = "2023",
address = "Toronto, Canada",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-acl.421/",
doi = "10.18653/v1/2023.findings-acl.421",
pages = "6740--6757",
abstract = "Prompt tuning is one of the successful approaches for parameter-efficient tuning of pre-trained language models. Despite being arguably the most parameter-efficient (tuned soft prompts constitute {\ensuremath{<}}0.1{\%} of total parameters), it typically performs worse than other efficient tuning methods and is quite sensitive to hyper-parameters. In this work, we introduce Residual Prompt Tuning - a simple and efficient method that significantly improves the performance and stability of prompt tuning. We propose to reparameterize soft prompt embeddings using a shallow network with a residual connection. Our experiments show that Residual Prompt Tuning significantly outperforms prompt tuning across T5-Large, T5-Base and BERT-Base models. Notably, our method reaches +7 points improvement over prompt tuning on SuperGLUE benchmark with T5-Base model and allows to reduce the prompt length by 10 times without hurting performance. In addition, we show that our approach is robust to the choice of learning rate and prompt initialization, and is effective in few-shot settings."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="razdaibiedina-etal-2023-residual">
<titleInfo>
<title>Residual Prompt Tuning: improving prompt tuning with residual reparameterization</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anastasiia</namePart>
<namePart type="family">Razdaibiedina</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yuning</namePart>
<namePart type="family">Mao</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Madian</namePart>
<namePart type="family">Khabsa</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Mike</namePart>
<namePart type="family">Lewis</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Rui</namePart>
<namePart type="family">Hou</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jimmy</namePart>
<namePart type="family">Ba</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Amjad</namePart>
<namePart type="family">Almahairi</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2023-07</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: ACL 2023</title>
</titleInfo>
<name type="personal">
<namePart type="given">Anna</namePart>
<namePart type="family">Rogers</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jordan</namePart>
<namePart type="family">Boyd-Graber</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Naoaki</namePart>
<namePart type="family">Okazaki</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Toronto, Canada</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Prompt tuning is one of the successful approaches for parameter-efficient tuning of pre-trained language models. Despite being arguably the most parameter-efficient (tuned soft prompts constitute \ensuremath<0.1% of total parameters), it typically performs worse than other efficient tuning methods and is quite sensitive to hyper-parameters. In this work, we introduce Residual Prompt Tuning - a simple and efficient method that significantly improves the performance and stability of prompt tuning. We propose to reparameterize soft prompt embeddings using a shallow network with a residual connection. Our experiments show that Residual Prompt Tuning significantly outperforms prompt tuning across T5-Large, T5-Base and BERT-Base models. Notably, our method reaches +7 points improvement over prompt tuning on SuperGLUE benchmark with T5-Base model and allows to reduce the prompt length by 10 times without hurting performance. In addition, we show that our approach is robust to the choice of learning rate and prompt initialization, and is effective in few-shot settings.</abstract>
<identifier type="citekey">razdaibiedina-etal-2023-residual</identifier>
<identifier type="doi">10.18653/v1/2023.findings-acl.421</identifier>
<location>
<url>https://aclanthology.org/2023.findings-acl.421/</url>
</location>
<part>
<date>2023-07</date>
<extent unit="page">
<start>6740</start>
<end>6757</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Residual Prompt Tuning: improving prompt tuning with residual reparameterization
%A Razdaibiedina, Anastasiia
%A Mao, Yuning
%A Khabsa, Madian
%A Lewis, Mike
%A Hou, Rui
%A Ba, Jimmy
%A Almahairi, Amjad
%Y Rogers, Anna
%Y Boyd-Graber, Jordan
%Y Okazaki, Naoaki
%S Findings of the Association for Computational Linguistics: ACL 2023
%D 2023
%8 July
%I Association for Computational Linguistics
%C Toronto, Canada
%F razdaibiedina-etal-2023-residual
%X Prompt tuning is one of the successful approaches for parameter-efficient tuning of pre-trained language models. Despite being arguably the most parameter-efficient (tuned soft prompts constitute \ensuremath<0.1% of total parameters), it typically performs worse than other efficient tuning methods and is quite sensitive to hyper-parameters. In this work, we introduce Residual Prompt Tuning - a simple and efficient method that significantly improves the performance and stability of prompt tuning. We propose to reparameterize soft prompt embeddings using a shallow network with a residual connection. Our experiments show that Residual Prompt Tuning significantly outperforms prompt tuning across T5-Large, T5-Base and BERT-Base models. Notably, our method reaches +7 points improvement over prompt tuning on SuperGLUE benchmark with T5-Base model and allows to reduce the prompt length by 10 times without hurting performance. In addition, we show that our approach is robust to the choice of learning rate and prompt initialization, and is effective in few-shot settings.
%R 10.18653/v1/2023.findings-acl.421
%U https://aclanthology.org/2023.findings-acl.421/
%U https://doi.org/10.18653/v1/2023.findings-acl.421
%P 6740-6757
Markdown (Informal)
[Residual Prompt Tuning: improving prompt tuning with residual reparameterization](https://aclanthology.org/2023.findings-acl.421/) (Razdaibiedina et al., Findings 2023)
ACL