


default search action
BibTeX records: Aengus Lynch
@article{DBLP:journals/corr/abs-2502-17578, author = {Rylan Schaeffer and Joshua Kazdan and John Hughes and Jordan Juravsky and Sara Price and Aengus Lynch and Erik Jones and Robert Kirk and Azalia Mirhoseini and Sanmi Koyejo}, title = {How Do Large Language Monkeys Get Their Power (Laws)?}, journal = {CoRR}, volume = {abs/2502.17578}, year = {2025}, url = {https://doi.org/10.48550/arXiv.2502.17578}, doi = {10.48550/ARXIV.2502.17578}, eprinttype = {arXiv}, eprint = {2502.17578}, timestamp = {Fri, 21 Mar 2025 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2502-17578.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/TanCLPKGK24, author = {Daniel Tan and David Chanin and Aengus Lynch and Brooks Paige and Dimitrios Kanoulas and Adri{\`{a}} Garriga{-}Alonso and Robert Kirk}, editor = {Amir Globersons and Lester Mackey and Danielle Belgrave and Angela Fan and Ulrich Paquet and Jakub M. Tomczak and Cheng Zhang}, title = {Analysing the Generalisation and Reliability of Steering Vectors}, booktitle = {Advances in Neural Information Processing Systems 38: Annual Conference on Neural Information Processing Systems 2024, NeurIPS 2024, Vancouver, BC, Canada, December 10 - 15, 2024}, year = {2024}, url = {http://papers.nips.cc/paper\_files/paper/2024/hash/fb3ad59a84799bfb8d700e56d19c231b-Abstract-Conference.html}, timestamp = {Thu, 13 Feb 2025 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/TanCLPKGK24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-16835, author = {Aengus Lynch and Phillip Guo and Aidan Ewart and Stephen Casper and Dylan Hadfield{-}Menell}, title = {Eight Methods to Evaluate Robust Unlearning in LLMs}, journal = {CoRR}, volume = {abs/2402.16835}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.16835}, doi = {10.48550/ARXIV.2402.16835}, eprinttype = {arXiv}, eprint = {2402.16835}, timestamp = {Mon, 25 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-16835.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2407-12404, author = {Daniel Tan and David Chanin and Aengus Lynch and Dimitrios Kanoulas and Brooks Paige and Adri{\`{a}} Garriga{-}Alonso and Robert Kirk}, title = {Analyzing the Generalization and Reliability of Steering Vectors}, journal = {CoRR}, volume = {abs/2407.12404}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2407.12404}, doi = {10.48550/ARXIV.2407.12404}, eprinttype = {arXiv}, eprint = {2407.12404}, timestamp = {Mon, 03 Mar 2025 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2407-12404.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2407-15549, author = {Abhay Sheshadri and Aidan Ewart and Phillip Guo and Aengus Lynch and Cindy Wu and Vivek Hebbar and Henry Sleight and Asa Cooper Stickland and Ethan Perez and Dylan Hadfield{-}Menell and Stephen Casper}, title = {Targeted Latent Adversarial Training Improves Robustness to Persistent Harmful Behaviors in LLMs}, journal = {CoRR}, volume = {abs/2407.15549}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2407.15549}, doi = {10.48550/ARXIV.2407.15549}, eprinttype = {arXiv}, eprint = {2407.15549}, timestamp = {Mon, 19 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2407-15549.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2412-03556, author = {John Hughes and Sara Price and Aengus Lynch and Rylan Schaeffer and Fazl Barez and Sanmi Koyejo and Henry Sleight and Erik Jones and Ethan Perez and Mrinank Sharma}, title = {Best-of-N Jailbreaking}, journal = {CoRR}, volume = {abs/2412.03556}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2412.03556}, doi = {10.48550/ARXIV.2412.03556}, eprinttype = {arXiv}, eprint = {2412.03556}, timestamp = {Mon, 13 Jan 2025 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2412-03556.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/nips/ConmyMLHG23, author = {Arthur Conmy and Augustine N. Mavor{-}Parker and Aengus Lynch and Stefan Heimersheim and Adri{\`{a}} Garriga{-}Alonso}, editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, title = {Towards Automated Circuit Discovery for Mechanistic Interpretability}, booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, year = {2023}, url = {http://papers.nips.cc/paper\_files/paper/2023/hash/34e1dbe95d34d7ebaf99b9bcaeb5b2be-Abstract-Conference.html}, timestamp = {Fri, 01 Mar 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/conf/nips/ConmyMLHG23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2303-05470, author = {Aengus Lynch and Gb{\`{e}}tondji J.{-}S. Dovonon and Jean Kaddour and Ricardo Silva}, title = {Spawrious: {A} Benchmark for Fine Control of Spurious Correlation Biases}, journal = {CoRR}, volume = {abs/2303.05470}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2303.05470}, doi = {10.48550/ARXIV.2303.05470}, eprinttype = {arXiv}, eprint = {2303.05470}, timestamp = {Wed, 05 Feb 2025 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2303-05470.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2304-14997, author = {Arthur Conmy and Augustine N. Mavor{-}Parker and Aengus Lynch and Stefan Heimersheim and Adri{\`{a}} Garriga{-}Alonso}, title = {Towards Automated Circuit Discovery for Mechanistic Interpretability}, journal = {CoRR}, volume = {abs/2304.14997}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2304.14997}, doi = {10.48550/ARXIV.2304.14997}, eprinttype = {arXiv}, eprint = {2304.14997}, timestamp = {Thu, 04 May 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2304-14997.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2206-15475, author = {Jean Kaddour and Aengus Lynch and Qi Liu and Matt J. Kusner and Ricardo Silva}, title = {Causal Machine Learning: {A} Survey and Open Problems}, journal = {CoRR}, volume = {abs/2206.15475}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2206.15475}, doi = {10.48550/ARXIV.2206.15475}, eprinttype = {arXiv}, eprint = {2206.15475}, timestamp = {Wed, 05 Feb 2025 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2206-15475.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }

manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.