2.7/distributed.fsdp.fully_shard.html



<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  
  <title>torch.distributed.fsdp.fully_shard &mdash; PyTorch 2.7 documentation</title>
  

    <link rel="canonical" href="https://pytorch.org/docs/stable/distributed.fsdp.fully_shard.html"/>
  

  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <!-- <link rel="stylesheet" href="_static/pygments.css" type="text/css" /> -->
  <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="_static/copybutton.css" type="text/css" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.10.0-beta/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.11/dist/katex.min.css" type="text/css" />
  <link rel="stylesheet" href="_static/katex-math.css" type="text/css" />
  <link rel="stylesheet" href="_static/sphinx-dropdown.css" type="text/css" />
  <link rel="stylesheet" href="_static/panels-bootstrap.min.css" type="text/css" />
  <link rel="stylesheet" href="_static/css/jit.css" type="text/css" />
  <link rel="stylesheet" href="_static/css/custom.css" type="text/css" />
    <link rel="index" title="Index" href="genindex.html" />
    <link rel="search" title="Search" href="search.html" />
    <link rel="next" title="Tensor Parallelism - torch.distributed.tensor.parallel" href="distributed.tensor.parallel.html" />
    <link rel="prev" title="FullyShardedDataParallel" href="fsdp.html" />


  <!-- Google Tag Manager -->
    <script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
    new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
    j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
    'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
    })(window,document,'script','dataLayer','GTM-T8XT4PS');</script>
    <!-- End Google Tag Manager -->
  

  <script src="_static/js/modernizr.min.js"></script>

  <!-- Preload the theme fonts -->

<link rel="preload" href="_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">

<!-- Preload the katex fonts -->

<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.2/css/all.css" integrity="sha384-vSIIfh2YWi9wW0r9iZe7RJPrKwp6bG+s9QZMoITbCckVJqGCCRhc+ccxNcdpHuYu" crossorigin="anonymous">
</head>

<div class="container-fluid header-holder tutorials-header" id="header-holder">
  <div class="container">
    <div class="header-container">
      <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>

      <div class="main-menu">
        <ul>

          <li class="main-menu-item">
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Learn
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/get-started">
                  <span class=dropdown-title>Get Started</span>
                  <p>Run PyTorch locally or get started quickly with one of the supported cloud platforms</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials">
                  <span class="dropdown-title">Tutorials</span>
                  <p>Whats new in PyTorch tutorials</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/beginner/basics/intro.html">
                  <span class="dropdown-title">Learn the Basics</span>
                  <p>Familiarize yourself with PyTorch concepts and modules</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/recipes/recipes_index.html">
                  <span class="dropdown-title">PyTorch Recipes</span>
                  <p>Bite-size, ready-to-deploy PyTorch code examples</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tutorials/beginner/introyt.html">
                  <span class="dropdown-title">Intro to PyTorch - YouTube Series</span>
                  <p>Master PyTorch basics with our engaging YouTube tutorial series</p>
                </a>
              </div>
            </div>
          </li>

          <li>
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Ecosystem
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem">
                  <span class="dropdown-title">Tools</span>
                  <p>Learn about the tools and frameworks in the PyTorch Ecosystem</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/#community-module">
                  <span class=dropdown-title>Community</span>
                  <p>Join the PyTorch developer community to contribute, learn, and get your questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://discuss.pytorch.org/" target="_blank">
                  <span class=dropdown-title>Forums</span>
                  <p>A place to discuss PyTorch code, issues, install, research</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/resources">
                  <span class=dropdown-title>Developer Resources</span>
                  <p>Find resources and get questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/ecosystem/contributor-awards-2024">
                  <span class="dropdown-title">Contributor Awards - 2024</span>
                  <p>Award winners announced at this year's PyTorch Conference</p>
                </a>
              </div>
            </div>
          </li>

          <li>
          <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Edge
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/edge">
                  <span class="dropdown-title">About PyTorch Edge</span>
                  <p>Build innovative and privacy-aware AI experiences for edge devices</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/executorch-overview">
                  <span class="dropdown-title">ExecuTorch</span>
                  <p>End-to-end solution for enabling on-device inference capabilities across mobile and edge devices</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/executorch/stable/index.html">
                  <span class="dropdown-title">ExecuTorch Docs</span>
                </a>
              </div>
            </div>  
          </li>

          <li class="main-menu-item">
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Docs
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/docs/stable/index.html">
                  <span class="dropdown-title">PyTorch</span>
                  <p>Explore the documentation for comprehensive guidance on how to use PyTorch</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/pytorch-domains">
                  <span class="dropdown-title">PyTorch Domains</span>
                  <p>Read the PyTorch Domains documentation to learn more about domain-specific libraries</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                Blogs & News 
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/blog/">
                  <span class="dropdown-title">PyTorch Blog</span>
                  <p>Catch up on the latest technical news and happenings</p>
                </a>
                 <a class="nav-dropdown-item" href="https://pytorch.org/community-blog">
                  <span class="dropdown-title">Community Blog</span>
                  <p>Stories from the PyTorch ecosystem</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/videos">
                  <span class="dropdown-title">Videos</span>
                  <p>Learn about the latest PyTorch tutorials, new, and more </p>
                <a class="nav-dropdown-item" href="https://pytorch.org/community-stories">
                  <span class="dropdown-title">Community Stories</span>
                  <p>Learn how our community solves real, everyday machine learning problems with PyTorch</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/events">
                  <span class="dropdown-title">Events</span>
                  <p>Find events, webinars, and podcasts</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/newsletter">
                  <span class="dropdown-title">Newsletter</span>
                  <p>Stay up-to-date with the latest updates</p>
                </a>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="with-down-arrow">
                About
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/foundation">
                  <span class="dropdown-title">PyTorch Foundation</span>
                  <p>Learn more about the PyTorch Foundation</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/governing-board">
                  <span class="dropdown-title">Governing Board</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/credits">
                  <span class="dropdown-title">Cloud Credit Program</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/tac">
                  <span class="dropdown-title">Technical Advisory Council</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/staff">
                  <span class="dropdown-title">Staff</span>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/contact-us">
                  <span class="dropdown-title">Contact Us</span>
                </a>
              </div>
            </div>
          </li>

          <li class="main-menu-item">
            <div class="no-dropdown">
              <a href="https://pytorch.org/join" data-cta="join">
                Become a Member
              </a>
            </div>
          </li>
          <li>
           <div class="main-menu-item">
             <a href="https://github.com/pytorch/pytorch" class="github-icon">
             </a>
           </div>
          </li>
          <!--- TODO: This block adds the search icon to the nav bar. We will enable it later. 
          <li>
            <div class="main-menu-item">
             <a href="https://github.com/pytorch/pytorch" class="search-icon">
             </a>
            </div>
          </li>
          --->
        </ul>
      </div>

      <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
    </div>
  </div>
</div>

<body class="pytorch-body">

   
    <div class="table-of-contents-link-wrapper">
      <span>Table of Contents</span>
      <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
    </div>

    <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
      <div class="pytorch-side-scroll">
        <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          <div class="pytorch-left-menu-search">
            
    <div class="version">
      <a href='https://pytorch.org/docs/versions.html'>2.7 &#x25BC</a>
    </div>
    <div id="searchBox">
    <div class="searchbox" id="googleSearchBox">
      <script async src="https://cse.google.com/cse.js?cx=e65585f8c3ea1440e"></script>
      <div class="gcse-search"></div>
    </div>
    <div id="sphinxSearchBox" style="display: none;">
      <div role="search">
        <form id="rtd-search-form" class="wy-form" action="search.html" method="get">
          <input type="text" name="q" placeholder="Search Docs" />
          <input type="hidden" name="check_keywords" value="yes" />
          <input type="hidden" name="area" value="default" />
        </form>
      </div>
    </div>
  </div>
  <form id="searchForm">
    <label style="margin-bottom: 1rem">
      <input type="radio" name="searchType" value="google" checked>
      Google Search
    </label>
    <label style="margin-bottom: 1rem">
      <input type="radio" name="searchType" value="sphinx">
      Classic Search
    </label>
  </form>

  <script>
     document.addEventListener('DOMContentLoaded', function() {
      const searchForm = document.getElementById('searchForm');
      const googleSearchBox = document.getElementById('googleSearchBox');
      const sphinxSearchBox = document.getElementById('sphinxSearchBox');
      // Function to toggle search box visibility
      function toggleSearchBox(searchType) {
        googleSearchBox.style.display = searchType === 'google' ? 'block' : 'none';
        sphinxSearchBox.style.display = searchType === 'sphinx' ? 'block' : 'none';
      }
      // Determine the default search type
      let defaultSearchType;
      const currentUrl = window.location.href;
      if (currentUrl.startsWith('https://pytorch.org/docs/stable')) {
        // For the stable documentation, default to Google
        defaultSearchType = localStorage.getItem('searchType') || 'google';
      } else {
        // For any other version, including docs-preview, default to Sphinx
        defaultSearchType = 'sphinx';
      }
      // Set the default search type
      document.querySelector(`input[name="searchType"][value="${defaultSearchType}"]`).checked = true;
      toggleSearchBox(defaultSearchType);
      // Event listener for changes in search type
      searchForm.addEventListener('change', function(event) {
        const selectedSearchType = event.target.value;
        localStorage.setItem('searchType', selectedSearchType);
        toggleSearchBox(selectedSearchType);
      });
      // Set placeholder text for Google search box
      window.onload = function() {
        var placeholderText = "Search Docs";
        var googleSearchboxText = document.querySelector("#gsc-i-id1");
        if (googleSearchboxText) {
          googleSearchboxText.placeholder = placeholderText;
          googleSearchboxText.style.fontFamily = 'FreightSans';
          googleSearchboxText.style.fontSize = "1.2rem";
          googleSearchboxText.style.color = '#262626';
        }
      };
    });
  </script>

          </div>

          
              <p class="caption" role="heading"><span class="caption-text">Community</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="community/build_ci_governance.html">PyTorch Governance | Build + CI</a></li>
<li class="toctree-l1"><a class="reference internal" href="community/contribution_guide.html">PyTorch Contribution Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="community/design.html">PyTorch Design Philosophy</a></li>
<li class="toctree-l1"><a class="reference internal" href="community/governance.html">PyTorch Governance | Mechanics</a></li>
<li class="toctree-l1"><a class="reference internal" href="community/persons_of_interest.html">PyTorch Governance | Maintainers</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Developer Notes</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="notes/amp_examples.html">Automatic Mixed Precision examples</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/autograd.html">Autograd mechanics</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/broadcasting.html">Broadcasting semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/cpu_threading_torchscript_inference.html">CPU threading and TorchScript inference</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/cuda.html">CUDA semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/custom_operators.html">PyTorch Custom Operators Landing Page</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/ddp.html">Distributed Data Parallel</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/extending.html">Extending PyTorch</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/extending.func.html">Extending torch.func with autograd.Function</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/faq.html">Frequently Asked Questions</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/fsdp.html">FSDP Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/get_start_xpu.html">Getting Started on Intel GPU</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/gradcheck.html">Gradcheck mechanics</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/hip.html">HIP (ROCm) semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/large_scale_deployments.html">Features for large-scale deployments</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/libtorch_stable_abi.html">LibTorch Stable ABI</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/modules.html">Modules</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/mps.html">MPS backend</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/multiprocessing.html">Multiprocessing best practices</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/numerical_accuracy.html">Numerical accuracy</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/randomness.html">Reproducibility</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/serialization.html">Serialization semantics</a></li>
<li class="toctree-l1"><a class="reference internal" href="notes/windows.html">Windows FAQ</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Language Bindings</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="cpp_index.html">C++</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/javadoc/">Javadoc</a></li>
<li class="toctree-l1"><a class="reference internal" href="deploy.html">torch::deploy</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Python API</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="torch.html">torch</a></li>
<li class="toctree-l1"><a class="reference internal" href="nn.html">torch.nn</a></li>
<li class="toctree-l1"><a class="reference internal" href="nn.functional.html">torch.nn.functional</a></li>
<li class="toctree-l1"><a class="reference internal" href="tensors.html">torch.Tensor</a></li>
<li class="toctree-l1"><a class="reference internal" href="tensor_attributes.html">Tensor Attributes</a></li>
<li class="toctree-l1"><a class="reference internal" href="tensor_view.html">Tensor Views</a></li>
<li class="toctree-l1"><a class="reference internal" href="amp.html">torch.amp</a></li>
<li class="toctree-l1"><a class="reference internal" href="autograd.html">torch.autograd</a></li>
<li class="toctree-l1"><a class="reference internal" href="library.html">torch.library</a></li>
<li class="toctree-l1"><a class="reference internal" href="accelerator.html">torch.accelerator</a></li>
<li class="toctree-l1"><a class="reference internal" href="cpu.html">torch.cpu</a></li>
<li class="toctree-l1"><a class="reference internal" href="cuda.html">torch.cuda</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_cuda_memory.html">Understanding CUDA Memory Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_cuda_memory.html#generating-a-snapshot">Generating a Snapshot</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_cuda_memory.html#using-the-visualizer">Using the visualizer</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_cuda_memory.html#snapshot-api-reference">Snapshot API Reference</a></li>
<li class="toctree-l1"><a class="reference internal" href="mps.html">torch.mps</a></li>
<li class="toctree-l1"><a class="reference internal" href="xpu.html">torch.xpu</a></li>
<li class="toctree-l1"><a class="reference internal" href="mtia.html">torch.mtia</a></li>
<li class="toctree-l1"><a class="reference internal" href="mtia.memory.html">torch.mtia.memory</a></li>
<li class="toctree-l1"><a class="reference internal" href="meta.html">Meta device</a></li>
<li class="toctree-l1"><a class="reference internal" href="backends.html">torch.backends</a></li>
<li class="toctree-l1"><a class="reference internal" href="export.html">torch.export</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.html">torch.distributed</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.tensor.html">torch.distributed.tensor</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.algorithms.join.html">torch.distributed.algorithms.join</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.elastic.html">torch.distributed.elastic</a></li>
<li class="toctree-l1"><a class="reference internal" href="fsdp.html">torch.distributed.fsdp</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">torch.distributed.fsdp.fully_shard</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.tensor.parallel.html">torch.distributed.tensor.parallel</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.optim.html">torch.distributed.optim</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.pipelining.html">torch.distributed.pipelining</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributed.checkpoint.html">torch.distributed.checkpoint</a></li>
<li class="toctree-l1"><a class="reference internal" href="distributions.html">torch.distributions</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch.compiler.html">torch.compiler</a></li>
<li class="toctree-l1"><a class="reference internal" href="fft.html">torch.fft</a></li>
<li class="toctree-l1"><a class="reference internal" href="func.html">torch.func</a></li>
<li class="toctree-l1"><a class="reference internal" href="futures.html">torch.futures</a></li>
<li class="toctree-l1"><a class="reference internal" href="fx.html">torch.fx</a></li>
<li class="toctree-l1"><a class="reference internal" href="fx.experimental.html">torch.fx.experimental</a></li>
<li class="toctree-l1"><a class="reference internal" href="hub.html">torch.hub</a></li>
<li class="toctree-l1"><a class="reference internal" href="jit.html">torch.jit</a></li>
<li class="toctree-l1"><a class="reference internal" href="linalg.html">torch.linalg</a></li>
<li class="toctree-l1"><a class="reference internal" href="monitor.html">torch.monitor</a></li>
<li class="toctree-l1"><a class="reference internal" href="signal.html">torch.signal</a></li>
<li class="toctree-l1"><a class="reference internal" href="special.html">torch.special</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch.overrides.html">torch.overrides</a></li>
<li class="toctree-l1"><a class="reference internal" href="package.html">torch.package</a></li>
<li class="toctree-l1"><a class="reference internal" href="profiler.html">torch.profiler</a></li>
<li class="toctree-l1"><a class="reference internal" href="nn.init.html">torch.nn.init</a></li>
<li class="toctree-l1"><a class="reference internal" href="nn.attention.html">torch.nn.attention</a></li>
<li class="toctree-l1"><a class="reference internal" href="onnx.html">torch.onnx</a></li>
<li class="toctree-l1"><a class="reference internal" href="optim.html">torch.optim</a></li>
<li class="toctree-l1"><a class="reference internal" href="complex_numbers.html">Complex Numbers</a></li>
<li class="toctree-l1"><a class="reference internal" href="ddp_comm_hooks.html">DDP Communication Hooks</a></li>
<li class="toctree-l1"><a class="reference internal" href="quantization.html">Quantization</a></li>
<li class="toctree-l1"><a class="reference internal" href="rpc.html">Distributed RPC Framework</a></li>
<li class="toctree-l1"><a class="reference internal" href="random.html">torch.random</a></li>
<li class="toctree-l1"><a class="reference internal" href="masked.html">torch.masked</a></li>
<li class="toctree-l1"><a class="reference internal" href="nested.html">torch.nested</a></li>
<li class="toctree-l1"><a class="reference internal" href="size.html">torch.Size</a></li>
<li class="toctree-l1"><a class="reference internal" href="sparse.html">torch.sparse</a></li>
<li class="toctree-l1"><a class="reference internal" href="storage.html">torch.Storage</a></li>
<li class="toctree-l1"><a class="reference internal" href="testing.html">torch.testing</a></li>
<li class="toctree-l1"><a class="reference internal" href="utils.html">torch.utils</a></li>
<li class="toctree-l1"><a class="reference internal" href="benchmark_utils.html">torch.utils.benchmark</a></li>
<li class="toctree-l1"><a class="reference internal" href="bottleneck.html">torch.utils.bottleneck</a></li>
<li class="toctree-l1"><a class="reference internal" href="checkpoint.html">torch.utils.checkpoint</a></li>
<li class="toctree-l1"><a class="reference internal" href="cpp_extension.html">torch.utils.cpp_extension</a></li>
<li class="toctree-l1"><a class="reference internal" href="data.html">torch.utils.data</a></li>
<li class="toctree-l1"><a class="reference internal" href="deterministic.html">torch.utils.deterministic</a></li>
<li class="toctree-l1"><a class="reference internal" href="jit_utils.html">torch.utils.jit</a></li>
<li class="toctree-l1"><a class="reference internal" href="dlpack.html">torch.utils.dlpack</a></li>
<li class="toctree-l1"><a class="reference internal" href="mobile_optimizer.html">torch.utils.mobile_optimizer</a></li>
<li class="toctree-l1"><a class="reference internal" href="model_zoo.html">torch.utils.model_zoo</a></li>
<li class="toctree-l1"><a class="reference internal" href="tensorboard.html">torch.utils.tensorboard</a></li>
<li class="toctree-l1"><a class="reference internal" href="module_tracker.html">torch.utils.module_tracker</a></li>
<li class="toctree-l1"><a class="reference internal" href="type_info.html">Type Info</a></li>
<li class="toctree-l1"><a class="reference internal" href="named_tensor.html">Named Tensors</a></li>
<li class="toctree-l1"><a class="reference internal" href="name_inference.html">Named Tensors operator coverage</a></li>
<li class="toctree-l1"><a class="reference internal" href="config_mod.html">torch.__config__</a></li>
<li class="toctree-l1"><a class="reference internal" href="future_mod.html">torch.__future__</a></li>
<li class="toctree-l1"><a class="reference internal" href="logging.html">torch._logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="torch_environment_variables.html">Torch Environment Variables</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Libraries</span></p>
<ul>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/audio/stable">torchaudio</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/data">TorchData</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/torchrec">TorchRec</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/serve">TorchServe</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/text/stable">torchtext</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/vision/stable">torchvision</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/xla/">PyTorch on XLA Devices</a></li>
<li class="toctree-l1"><a class="reference external" href="https://pytorch.org/ao">torchao</a></li>
</ul>

            
        </div>
      </div>
    </nav>

    <div class="pytorch-container">
      <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
        <div class="pytorch-breadcrumbs-wrapper">
          

<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="pytorch-breadcrumbs">
    
      <li>
        <a href="index.html">
          
            Docs
          
        </a> &gt;
      </li>

        
      <li>torch.distributed.fsdp.fully_shard</li>
    
    
      <li class="pytorch-breadcrumbs-aside">
        
            
            <a href="_sources/distributed.fsdp.fully_shard.rst.txt" rel="nofollow"><img src="_static/images/view-page-source-icon.svg"></a>
          
        
      </li>
    
  </ul>

  
</div>
        </div>

        <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
          Shortcuts
        </div>
      </div>

      <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
        <div class="pytorch-content-left">

        
          <!-- Google Tag Manager (noscript) -->
          <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-T8XT4PS"
          height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
          <!-- End Google Tag Manager (noscript) -->
          
          <div class="rst-content">
          
            <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
             <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
              
  <div class="section" id="torch-distributed-fsdp-fully-shard">
<h1>torch.distributed.fsdp.fully_shard<a class="headerlink" href="#torch-distributed-fsdp-fully-shard" title="Permalink to this heading">¶</a></h1>
<div class="section" id="pytorch-fsdp2-fully-shard">
<h2>PyTorch FSDP2 (<code class="docutils literal notranslate"><span class="pre">fully_shard</span></code>)<a class="headerlink" href="#pytorch-fsdp2-fully-shard" title="Permalink to this heading">¶</a></h2>
<p>PyTorch FSDP2 provides a fully sharded data parallelism (FSDP) implementation
targeting performant eager-mode while using per-parameter sharding for improved
usability.</p>
<ul class="simple">
<li><p>If you are new to FSDP, we recommend that you start with FSDP2 due to improved
usability.</p></li>
<li><p>If you are currently using FSDP1, consider evaluating the following
differences to see if you should switch to FSDP2:</p></li>
</ul>
<p>Compared to PyTorch FSDP1 (<code class="docutils literal notranslate"><span class="pre">FullyShardedDataParallel</span></code>):</p>
<ul class="simple">
<li><p>FSDP2 uses <code class="docutils literal notranslate"><span class="pre">DTensor</span></code>-based dim-0 per-parameter sharding for a simpler
sharding representation compared to FSDP1’s flat-parameter sharding, while
preserving similar throughput performance. More specifically, FSDP2 chunks
each parameter on dim-0 across the data parallel workers (using
<code class="docutils literal notranslate"><span class="pre">torch.chunk(dim=0)</span></code>), whereas FSDP1 flattens, concatenates, and chunks a
group of tensors together, making reasoning about what data is present on
each worker and resharding to different parallelisms complex. Per-parameter
sharding provides a more intuitive user experience, relaxes constraints
around frozen parameters, and allows for communication-free (sharded) state
dicts, which otherwise require all-gathers in FSDP1.</p></li>
<li><p>FSDP2 implements a different memory management approach to handle the
multi-stream usages that avoids <code class="docutils literal notranslate"><span class="pre">torch.Tensor.record_stream</span></code>. This ensures
deterministic and expected memory usage and does not require blocking the CPU
like in FSDP1’s <code class="docutils literal notranslate"><span class="pre">limit_all_gathers=True</span></code>.</p></li>
<li><p>FSDP2 exposes APIs for manual control over prefetching and collective
scheduling, allowing power users more customization. See the methods on
<code class="docutils literal notranslate"><span class="pre">FSDPModule</span></code> below for details.</p></li>
<li><p>FSDP2 simplifies some of the API surface: e.g. FSDP2 does not directly
support full state dicts. Instead, users can reshard the sharded state dicts
containing <code class="docutils literal notranslate"><span class="pre">DTensor</span></code> s to full state dicts themselves using <code class="docutils literal notranslate"><span class="pre">DTensor</span></code>
APIs like <code class="docutils literal notranslate"><span class="pre">DTensor.full_tensor()</span></code> or by using higher-level APIs like
<a class="reference external" href="https://pytorch.org/docs/stable/distributed.checkpoint.html">PyTorch Distributed Checkpoint</a> ‘s
distributed state dict APIs. Also, some other args have been removed; see
<a class="reference external" href="https://github.com/pytorch/torchtitan/blob/main/docs/fsdp.md">here</a> for
details.</p></li>
</ul>
<p>If you are onboarding FSDP for the first time or if any of the above appeals to
your use case, we recommend that you consider using FSDP2.</p>
<p>See <a class="reference external" href="https://github.com/pytorch/pytorch/issues/114299">this RFC</a> for details
on system design and implementation.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p><code class="docutils literal notranslate"><span class="pre">torch.distributed.fsdp.fully_shard</span></code> is currently in prototype state and
under development. The core API will likely not change, but we may make some
API changes if necessary.</p>
</div>
<p>The frontend API is <code class="docutils literal notranslate"><span class="pre">fully_shard</span></code> that can be called on a <code class="docutils literal notranslate"><span class="pre">module</span></code>:</p>
<dl class="py function">
<dt class="sig sig-object py" id="torch.distributed.fsdp.fully_shard">
<span class="sig-prename descclassname"><span class="pre">torch.distributed.fsdp.</span></span><span class="sig-name descname"><span class="pre">fully_shard</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mesh</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reshard_after_forward</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">shard_placement_fn</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mp_policy</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">MixedPrecisionPolicy(param_dtype=None,</span> <span class="pre">reduce_dtype=None,</span> <span class="pre">output_dtype=None,</span> <span class="pre">cast_forward_inputs=True)</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">offload_policy</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">OffloadPolicy()</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">ignored_params</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L84"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.fully_shard" title="Permalink to this definition">¶</a></dt>
<dd><p>Apply fully sharded data parallelism (FSDP) to <code class="docutils literal notranslate"><span class="pre">module</span></code>, where FSDP
shards module parameters, gradients, and optimizer states across data
parallel workers to save memory at the cost of communication.</p>
<p>At initialization, FSDP shards the module’s parameters across the data
parallel workers given by <code class="docutils literal notranslate"><span class="pre">mesh</span></code>. Before forward, FSDP all-gathers the
sharded parameters across the data-parallel workers to get the unsharded
parameters for forward computation. If <code class="docutils literal notranslate"><span class="pre">reshard_after_forward</span></code> is
<code class="docutils literal notranslate"><span class="pre">True</span></code>, then FSDP frees the unsharded parameters after forward and
re-all-gathers them in backward before gradient computation. After gradient
computation, FSDP frees the unsharded parameters and reduce-scatters the
unsharded gradients across data-parallel workers.</p>
<p>This implementation represents the sharded parameters as <code class="xref py py-class docutils literal notranslate"><span class="pre">DTensor</span></code> s
sharded on dim-0, while the unsharded parameters will be like the original
parameters on <code class="docutils literal notranslate"><span class="pre">module</span></code> (e.g. <a class="reference internal" href="tensors.html#torch.Tensor" title="torch.Tensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">torch.Tensor</span></code></a> if originally
<a class="reference internal" href="tensors.html#torch.Tensor" title="torch.Tensor"><code class="xref py py-class docutils literal notranslate"><span class="pre">torch.Tensor</span></code></a>). A module
<a class="reference external" href="https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.register_forward_pre_hook">forward pre-hook</a>
on <code class="docutils literal notranslate"><span class="pre">module</span></code> all-gathers the parameters, and a module
<a class="reference external" href="https://pytorch.org/docs/main/generated/torch.nn.Module.html#torch.nn.Module.register_forward_hook">forward hook</a>
on <code class="docutils literal notranslate"><span class="pre">module</span></code> frees them (if needed). Similar backward hooks all-gather
parameters and later free parameters and reduce-scatter gradients.</p>
<p>Since grouping multiple tensors together for one collective is critical for
communication efficiency, this implementation makes this grouping first
class. Calling <a class="reference internal" href="#torch.distributed.fsdp.fully_shard" title="torch.distributed.fsdp.fully_shard"><code class="xref py py-meth docutils literal notranslate"><span class="pre">fully_shard()</span></code></a> on <code class="docutils literal notranslate"><span class="pre">module</span></code> constructs one group that
includes the parameters in <code class="docutils literal notranslate"><span class="pre">module.parameters()</span></code> except those already
assigned to a group from an earlier call on a submodule. This means that
<a class="reference internal" href="#torch.distributed.fsdp.fully_shard" title="torch.distributed.fsdp.fully_shard"><code class="xref py py-meth docutils literal notranslate"><span class="pre">fully_shard()</span></code></a> should be called bottom-up on your model. Each group’s
parameters are all-gathered in one collective, and its gradients are
reduce-scattered in one collective. Partitioning the model into multiple
groups (“layer by layer”) allows for peak memory savings and communication/computation
overlap. Users generally should <em>not</em> call <a class="reference internal" href="#torch.distributed.fsdp.fully_shard" title="torch.distributed.fsdp.fully_shard"><code class="xref py py-meth docutils literal notranslate"><span class="pre">fully_shard()</span></code></a> only on the
topmost root module.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>module</strong> (<em>Union</em><em>[</em><a class="reference internal" href="generated/torch.nn.Module.html#torch.nn.Module" title="torch.nn.Module"><em>nn.Module</em></a><em>, </em><em>List</em><em>[</em><a class="reference internal" href="generated/torch.nn.Module.html#torch.nn.Module" title="torch.nn.Module"><em>nn.Module</em></a><em>]</em>) – The module or modules to
shard with FSDP and group together for communication.</p></li>
<li><p><strong>mesh</strong> (<em>Optional</em><em>[</em><a class="reference internal" href="distributed.html#torch.distributed.device_mesh.DeviceMesh" title="torch.distributed.device_mesh.DeviceMesh"><em>DeviceMesh</em></a><em>]</em>) – This data parallel mesh defines the
sharding and device. If 1D, then parameters are fully sharded
across the 1D mesh (FSDP) with <code class="docutils literal notranslate"><span class="pre">(Shard(0),)</span></code> placement. If 2D,
then parameters are sharded across the 1st dim and replicated
across the 0th dim (HSDP) with <code class="docutils literal notranslate"><span class="pre">(Replicate(),</span> <span class="pre">Shard(0))</span></code>
placement. The mesh’s device type gives the device type used for
communication; if a CUDA or CUDA-like device type, then we use the
current device.</p></li>
<li><p><strong>reshard_after_forward</strong> (<em>Union</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a><em>, </em><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a><em>]</em>) – <p>This controls the parameter
behavior after forward and can trade off memory and communication:</p>
<ul>
<li><p>If <code class="docutils literal notranslate"><span class="pre">True</span></code>, then this reshards parameters after forward and
re-all-gathers in backward.</p></li>
<li><p>If <code class="docutils literal notranslate"><span class="pre">False</span></code>, then this keeps the unsharded parameters in memory
after forward and avoids the all-gather in backward.</p></li>
<li><p>If an <code class="docutils literal notranslate"><span class="pre">int</span></code>, then this represents the world size to reshard to
after forward. It should be a non-trivial divisor of the <code class="docutils literal notranslate"><span class="pre">mesh</span></code>
shard dim size (i.e. excluding 1 and the dim size itself). A
choice may be the intra-node size (e.g. <code class="docutils literal notranslate"><span class="pre">torch.cuda.device_count()</span></code>).
This allows the all-gather in backward to be over a smaller world
size at the cost of higher memory usage than setting to <code class="docutils literal notranslate"><span class="pre">True</span></code>.</p></li>
<li><p>The root FSDP state has its value specially set to <code class="docutils literal notranslate"><span class="pre">False</span></code> as a
heuristic since its parameters would typically be immediately
all-gathered for backward.</p></li>
<li><p>After forward, the parameters registered to the module depend on
to this: The registered parameters are the sharded parameters if
<code class="docutils literal notranslate"><span class="pre">True</span></code>; unsharded parameters if <code class="docutils literal notranslate"><span class="pre">False</span></code>; and the paramters
resharded to the smaller mesh otherwise. To modify the parameters
between forward and backward, the registered parameters must be
the sharded parameters. For <code class="docutils literal notranslate"><span class="pre">False</span></code> or an <code class="docutils literal notranslate"><span class="pre">int</span></code>, this can be
done by manually resharding via <code class="xref py py-meth docutils literal notranslate"><span class="pre">reshard()</span></code>.</p></li>
</ul>
</p></li>
<li><p><strong>shard_placement_fn</strong> (<em>Optional</em><em>[</em><em>Callable</em><em>[</em><em>[</em><em>nn.Parameter</em><em>]</em><em>, </em><em>Optional</em><em>[</em><a class="reference internal" href="distributed.tensor.html#torch.distributed.tensor.placement_types.Shard" title="torch.distributed.tensor.placement_types.Shard"><em>Shard</em></a><em>]</em><em>]</em><em>]</em>) – This callable can be used to override the sharding placement for a
parameter to shard a parameter on a dimension other than dim-0. If
this callable returns a <code class="xref py py-class docutils literal notranslate"><span class="pre">Shard</span></code> placement (not <code class="docutils literal notranslate"><span class="pre">None</span></code>),
then FSDP will shard according to that placement (e.g. <code class="docutils literal notranslate"><span class="pre">Shard(1)</span></code>).
If sharding on a nonzero dim, we currently require even sharding,
i.e. the tensor dim size on that dim must be divisible by the FSDP
shard mesh size.</p></li>
<li><p><strong>mp_policy</strong> (<a class="reference internal" href="#torch.distributed.fsdp.MixedPrecisionPolicy" title="torch.distributed.fsdp.MixedPrecisionPolicy"><em>MixedPrecisionPolicy</em></a>) – This controls the mixed precision
policy, which offers parameter/reduction mixed precision for this
module. See <a class="reference internal" href="#torch.distributed.fsdp.MixedPrecisionPolicy" title="torch.distributed.fsdp.MixedPrecisionPolicy"><code class="xref py py-class docutils literal notranslate"><span class="pre">MixedPrecisionPolicy</span></code></a> for details.</p></li>
<li><p><strong>offload_policy</strong> (<a class="reference internal" href="#torch.distributed.fsdp.OffloadPolicy" title="torch.distributed.fsdp.OffloadPolicy"><em>OffloadPolicy</em></a>) – This controls the offloading policy,
which offers parameter/gradient/optimizer state offloading. See
<a class="reference internal" href="#torch.distributed.fsdp.OffloadPolicy" title="torch.distributed.fsdp.OffloadPolicy"><code class="xref py py-class docutils literal notranslate"><span class="pre">OffloadPolicy</span></code></a> and its subclasses for details.</p></li>
<li><p><strong>ignored_params</strong> (<em>Optional</em><em>[</em><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#set" title="(in Python v3.13)"><em>set</em></a><em>[</em><em>nn.Parameter</em><em>]</em><em>]</em>) – Optional(Set[nn.Parameter]): The set of parameters that we
don’t want to shard with FSDP.</p></li>
</ul>
</dd>
<dt class="field-even">Returns</dt>
<dd class="field-even"><p>The module with FSDP applied (in-place).</p>
</dd>
<dt class="field-odd">Return type</dt>
<dd class="field-odd"><p><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule" title="torch.distributed.fsdp.FSDPModule">FSDPModule</a></p>
</dd>
</dl>
</dd></dl>

<p>Calling <code class="docutils literal notranslate"><span class="pre">fully_shard(module)</span></code> dynamically constructs a new class that
subclasses <code class="docutils literal notranslate"><span class="pre">type(module)</span></code> and an FSDP class <code class="docutils literal notranslate"><span class="pre">FSDPModule</span></code>. For example, if
we call <code class="docutils literal notranslate"><span class="pre">fully_shard(linear)</span></code> on a module <code class="docutils literal notranslate"><span class="pre">linear:</span> <span class="pre">nn.Linear</span></code>, then FSDP
constructs a new class <code class="docutils literal notranslate"><span class="pre">FSDPLinear</span></code> and changes <code class="docutils literal notranslate"><span class="pre">linear</span></code> ‘s type to this.
Otherwise, <code class="docutils literal notranslate"><span class="pre">fully_shard</span></code> does not change the module structure and parameter
fully-qualified names. The class <code class="docutils literal notranslate"><span class="pre">FSDPModule</span></code> allows providing some
FSDP-specific methods on the module.</p>
<dl class="py class">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">torch.distributed.fsdp.</span></span><span class="sig-name descname"><span class="pre">FSDPModule</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="o"><span class="pre">*</span></span><span class="n"><span class="pre">args</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">**</span></span><span class="n"><span class="pre">kwargs</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule" title="Permalink to this definition">¶</a></dt>
<dd><dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.reshard">
<span class="sig-name descname"><span class="pre">reshard</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.reshard"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L264"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.reshard" title="Permalink to this definition">¶</a></dt>
<dd><p>Reshards the module’s parameters, freeing the unsharded parameters if
they are allocated and registering the sharded parameters to the
module. This method is <em>not</em> recursive.</p>
<dl class="field-list simple">
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_all_reduce_hook">
<span class="sig-name descname"><span class="pre">set_all_reduce_hook</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">hook</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">stream</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_all_reduce_hook"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L416"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_all_reduce_hook" title="Permalink to this definition">¶</a></dt>
<dd><dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>hook</strong> (<em>Callable</em><em>[</em><em>[</em><a class="reference internal" href="tensors.html#torch.Tensor" title="torch.Tensor"><em>torch.Tensor</em></a><em>]</em><em>, </em><em>None</em><em>]</em>) – User-defined all-reduce hook
with expected signature <code class="docutils literal notranslate"><span class="pre">hook(reduce_output:</span> <span class="pre">torch.Tensor)</span> <span class="pre">-&gt;</span> <span class="pre">None</span></code>
where <code class="docutils literal notranslate"><span class="pre">reduce_output</span></code> is the reduce-scatter output if only
using FSDP or the all-reduce output if using native HSDP.</p></li>
<li><p><strong>stream</strong> (<em>Optional</em><em>[</em><a class="reference internal" href="generated/torch.cuda.Stream.html#torch.cuda.Stream" title="torch.cuda.Stream"><em>torch.cuda.Stream</em></a><em>]</em>) – Stream to run the all-reduce
hook in. This should only be set if not using native HSDP. If
using native HSDP, the hook will run in the internally defined
all-reduce stream used by the native HSDP all-reduce.</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_is_last_backward">
<span class="sig-name descname"><span class="pre">set_is_last_backward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">is_last_backward</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_is_last_backward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L303"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_is_last_backward" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets whether the next backward is the last one. On the last backward,
FSDP waits on pending gradient reduction and clears internal data
data structures for backward prefetching. This can be useful for
microbatching.</p>
<dl class="field-list simple">
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_modules_to_backward_prefetch">
<span class="sig-name descname"><span class="pre">set_modules_to_backward_prefetch</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">modules</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_modules_to_backward_prefetch"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L396"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_modules_to_backward_prefetch" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets the FSDP modules for which this FSDP module should explicitly
prefetch all-gathers in backward. This overrides the default backward
pretching implementation that prefetches the next FSDP module based on
the reverse post-forward order.</p>
<p>Passing a singleton list containing the previous FSDP module gives the
same all-gather overlap behavior as the default overlap behavior.
Passing a list with at least length two is required for more aggressive
overlap and will use more reserved memory.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>modules</strong> (<em>List</em><em>[</em><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule" title="torch.distributed.fsdp.FSDPModule"><em>FSDPModule</em></a><em>]</em>) – FSDP modules to prefetch.</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_modules_to_forward_prefetch">
<span class="sig-name descname"><span class="pre">set_modules_to_forward_prefetch</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">modules</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_modules_to_forward_prefetch"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L376"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_modules_to_forward_prefetch" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets the FSDP modules for which this FSDP module should explicitly
prefetch all-gathers in forward. The prefetching runs after this
module’s all-gather copy-out.</p>
<p>Passing a singleton list containing the next FSDP module gives the same
all-gather overlap behavior as the default overlap behavior, except the
prefetched all-gather is issued earlier from the CPU. Passing a list
with at least length two is required for more aggressive overlap and
will use more reserved memory.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>modules</strong> (<em>List</em><em>[</em><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule" title="torch.distributed.fsdp.FSDPModule"><em>FSDPModule</em></a><em>]</em>) – FSDP modules to prefetch.</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_post_optim_event">
<span class="sig-name descname"><span class="pre">set_post_optim_event</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">event</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_post_optim_event"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L441"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_post_optim_event" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets a post-optimizer-step event for the root FSDP module to wait the
all-gather streams on.</p>
<p>By default, the root FSDP module waits the all-gather streams on the
current stream to ensure that the optimizer step has finished before
all-gathering. However, this may introduce false dependencies if
there is unrelated computation after the optimizer step. This API
allows the user to provide their own event to wait on. After the root
waits on the event, the event is discarded, so this API should be
called with a new event each iteration.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>event</strong> (<a class="reference internal" href="generated/torch.Event.html#torch.Event" title="torch.Event"><em>torch.Event</em></a>) – Event recorded after the optimizer step
to wait all-gather streams on.</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_reduce_scatter_divide_factor">
<span class="sig-name descname"><span class="pre">set_reduce_scatter_divide_factor</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">factor</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_reduce_scatter_divide_factor"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L460"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_reduce_scatter_divide_factor" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets a custom divide factor for the reduce-scatter. This becomes a
custom reduce op using NCCL’s PreMulSum, which allows multiplying by
the factor before reduction.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>factor</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#float" title="(in Python v3.13)"><em>float</em></a>) – Custom divide factor.</p>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_requires_all_reduce">
<span class="sig-name descname"><span class="pre">set_requires_all_reduce</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">requires_all_reduce</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">recurse</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_requires_all_reduce"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L337"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_requires_all_reduce" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets if the module should all-reduce gradients. This can be used to
implement gradient accumulation with only reduce-scatter but not
all-reduce for HSDP.</p>
<dl class="field-list simple">
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_requires_gradient_sync">
<span class="sig-name descname"><span class="pre">set_requires_gradient_sync</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">requires_gradient_sync</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">recurse</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_requires_gradient_sync"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L313"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_requires_gradient_sync" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets if the module should sync gradients. This can be used to implement
gradient accumulation <em>without communication</em>. For HSDP, this controls
both reduce-scatter and all-reduce together. This is the equivalence of
<cite>no_sync</cite> in FSDP1.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>requires_gradient_sync</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Whether to reduce gradients for the
module’s parameters.</p></li>
<li><p><strong>recurse</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Whether to set for all FSDP submodules or just the
passed-in module.</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_reshard_after_backward">
<span class="sig-name descname"><span class="pre">set_reshard_after_backward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">reshard_after_backward</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">recurse</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_reshard_after_backward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L353"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_reshard_after_backward" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets if the module should reshard parameters after backward. This can
be used during gradient accumulation to trade off higher memory for
reduced communication since the unsharded parameters do not need to be
re-all-gathered before the next forward.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>reshard_after_backward</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Whether to reshard parameters after
backward.</p></li>
<li><p><strong>recurse</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Whether to set for all FSDP submodules or just the
passed-in module.</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.set_unshard_in_backward">
<span class="sig-name descname"><span class="pre">set_unshard_in_backward</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">unshard_in_backward</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.set_unshard_in_backward"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L475"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.set_unshard_in_backward" title="Permalink to this definition">¶</a></dt>
<dd><p>Sets whether the FSDP module’s parameters need to be unsharded in
backward. This can be used in expert cases when the user knows that all
parameters in this FSDP module’s parameter group are not needed for
backward computation (e.g. embedding).</p>
<dl class="field-list simple">
</dl>
</dd></dl>

<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.FSDPModule.unshard">
<span class="sig-name descname"><span class="pre">unshard</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">async_op</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#FSDPModule.unshard"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L274"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.FSDPModule.unshard" title="Permalink to this definition">¶</a></dt>
<dd><p>Unshards the module’s parameters by allocating memory and all-gathering
the parameters. This method is <em>not</em> recursive. The unshard follows the
<a class="reference internal" href="#torch.distributed.fsdp.MixedPrecisionPolicy" title="torch.distributed.fsdp.MixedPrecisionPolicy"><code class="xref py py-class docutils literal notranslate"><span class="pre">MixedPrecisionPolicy</span></code></a>, so it will all-gather following
<code class="docutils literal notranslate"><span class="pre">param_dtype</span></code> if set.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><p><strong>async_op</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – If <code class="docutils literal notranslate"><span class="pre">True</span></code>, then returns a <a class="reference internal" href="#torch.distributed.fsdp.UnshardHandle" title="torch.distributed.fsdp.UnshardHandle"><code class="xref py py-class docutils literal notranslate"><span class="pre">UnshardHandle</span></code></a>
that has a <code class="xref py py-meth docutils literal notranslate"><span class="pre">wait()</span></code> method to wait on the unshard op. If
<code class="docutils literal notranslate"><span class="pre">False</span></code>, then returns <code class="docutils literal notranslate"><span class="pre">None</span></code> and waits on the handle inside
this function.</p>
</dd>
<dt class="field-even">Return type</dt>
<dd class="field-even"><p><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Optional" title="(in Python v3.13)"><em>Optional</em></a>[<a class="reference internal" href="#torch.distributed.fsdp.UnshardHandle" title="torch.distributed.fsdp.UnshardHandle"><em>UnshardHandle</em></a>]</p>
</dd>
</dl>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If <code class="docutils literal notranslate"><span class="pre">async_op=True</span></code>, then FSDP will wait on the pending
unshard in the module’s pre-forward for the user. The user only
needs to call <code class="xref py py-meth docutils literal notranslate"><span class="pre">wait()</span></code> explicitly if the wait should happen
before pre-forward.</p>
</div>
</dd></dl>

</dd></dl>

<dl class="py class">
<dt class="sig sig-object py" id="torch.distributed.fsdp.UnshardHandle">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">torch.distributed.fsdp.</span></span><span class="sig-name descname"><span class="pre">UnshardHandle</span></span><a class="headerlink" href="#torch.distributed.fsdp.UnshardHandle" title="Permalink to this definition">¶</a></dt>
<dd><p>A handle to wait on a <a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.unshard" title="torch.distributed.fsdp.FSDPModule.unshard"><code class="xref py py-meth docutils literal notranslate"><span class="pre">FSDPModule.unshard()</span></code></a> op.</p>
<dl class="py method">
<dt class="sig sig-object py" id="torch.distributed.fsdp.UnshardHandle.wait">
<span class="sig-name descname"><span class="pre">wait</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="reference internal" href="_modules/torch/distributed/fsdp/_fully_shard/_fully_shard.html#UnshardHandle.wait"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L530"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.UnshardHandle.wait" title="Permalink to this definition">¶</a></dt>
<dd><p>Waits on the unshard op. This ensures that the current stream can use
the unsharded parameters, which are now registered to the module.</p>
<dl class="field-list simple">
</dl>
</dd></dl>

</dd></dl>

<dl class="py function">
<dt class="sig sig-object py" id="torch.distributed.fsdp.register_fsdp_forward_method">
<span class="sig-prename descclassname"><span class="pre">torch.distributed.fsdp.</span></span><span class="sig-name descname"><span class="pre">register_fsdp_forward_method</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">module</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">method_name</span></span></em><span class="sig-paren">)</span><a class="reference external" href="https://github.com/pytorch/pytorch/blob/v2.7.0/torch/distributed/fsdp/_fully_shard/_fully_shard.py#L549"><span class="viewcode-link"><span class="pre">[source]</span></span></a><a class="headerlink" href="#torch.distributed.fsdp.register_fsdp_forward_method" title="Permalink to this definition">¶</a></dt>
<dd><p>Registers a method on <code class="docutils literal notranslate"><span class="pre">module</span></code> to be considered a forward method for
FSDP.</p>
<p>FSDP all-gathers parameters pre-forward and optionally frees parameters
post-forward (depending on <code class="docutils literal notranslate"><span class="pre">reshard_after_forward</span></code>). FSDP only knows to
do this for <code class="xref py py-meth docutils literal notranslate"><span class="pre">nn.Module.forward()</span></code> by default. This function patches a
user-specified method to run the pre/post-forward hooks before/after the
method, respectively. If <code class="docutils literal notranslate"><span class="pre">module</span></code> is not an <a class="reference internal" href="#torch.distributed.fsdp.FSDPModule" title="torch.distributed.fsdp.FSDPModule"><code class="xref py py-class docutils literal notranslate"><span class="pre">FSDPModule</span></code></a>, then
this is a no-op.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>module</strong> (<a class="reference internal" href="generated/torch.nn.Module.html#torch.nn.Module" title="torch.nn.Module"><em>nn.Module</em></a>) – Module to register the forward method on.</p></li>
<li><p><strong>method_name</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) – Name of the forward method.</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py class">
<dt class="sig sig-object py" id="torch.distributed.fsdp.MixedPrecisionPolicy">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">torch.distributed.fsdp.</span></span><span class="sig-name descname"><span class="pre">MixedPrecisionPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">param_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">reduce_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">output_dtype</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">cast_forward_inputs</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#torch.distributed.fsdp.MixedPrecisionPolicy" title="Permalink to this definition">¶</a></dt>
<dd><p>This configures FSDP’s mixed precision. Unlike autocast, this applies mixed
precision at the module level, not op level, which means low-precision
activations are saved for backward and high-to-low-precision casts are
incurred only at module boundaries.</p>
<p>FSDP works well with module-level mixed precision since it keeps the
high-precision sharded parameters in memory anyway. In other words, FSDP
does not require any extra memory to keep a high-precision copy of the
parameters for the optimizer step.</p>
<dl class="field-list simple">
<dt class="field-odd">Variables</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>param_dtype</strong> (<em>Optional</em><em>[</em><a class="reference internal" href="tensor_attributes.html#torch.dtype" title="torch.dtype"><em>torch.dtype</em></a><em>]</em>) – This specifies the dtype for
the unsharded parameter and hence the dtype for forward/backward
computation and the parameter all-gather. If this is <code class="docutils literal notranslate"><span class="pre">None</span></code>, then
the unsharded parameter uses the original dtype. The optimizer step
uses the sharded parameter in the original dtype. (Default:
<code class="docutils literal notranslate"><span class="pre">None</span></code>)</p></li>
<li><p><strong>reduce_dtype</strong> (<em>Optional</em><em>[</em><a class="reference internal" href="tensor_attributes.html#torch.dtype" title="torch.dtype"><em>torch.dtype</em></a><em>]</em>) – This specifies the dtype for
gradient reduction (i.e. reduce-scatter or all-reduce). If this is
<code class="docutils literal notranslate"><span class="pre">None</span></code> but <code class="docutils literal notranslate"><span class="pre">param_dtype</span></code> is not <code class="docutils literal notranslate"><span class="pre">None</span></code>, then the reduction
uses the compute dtype. This can be used to run gradient reduction
in full precision while using low precision for compute. If also
gradient reduction is disabled via <code class="xref py py-meth docutils literal notranslate"><span class="pre">set_requires_gradient_sync()</span></code>,
then FSDP will accumulate gradients using <code class="docutils literal notranslate"><span class="pre">reduce_dtype</span></code>.
(Default: <code class="docutils literal notranslate"><span class="pre">None</span></code>)</p></li>
<li><p><strong>output_dtype</strong> (<em>Optional</em><em>[</em><a class="reference internal" href="tensor_attributes.html#torch.dtype" title="torch.dtype"><em>torch.dtype</em></a><em>]</em>) – This specifies the dtype for
casting floating-point forward outputs. This can be used to
help implement cases where different modules have different mixed
precision policies. (Default: <code class="docutils literal notranslate"><span class="pre">None</span></code>)</p></li>
<li><p><strong>cast_forward_inputs</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – This specifies whether FSDP should cast the
forward’s floating-point input tensors to <code class="docutils literal notranslate"><span class="pre">param_dtype</span></code> or not.</p></li>
</ul>
</dd>
</dl>
</dd></dl>

<dl class="py class">
<dt class="sig sig-object py" id="torch.distributed.fsdp.OffloadPolicy">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">torch.distributed.fsdp.</span></span><span class="sig-name descname"><span class="pre">OffloadPolicy</span></span><a class="headerlink" href="#torch.distributed.fsdp.OffloadPolicy" title="Permalink to this definition">¶</a></dt>
<dd><p>This base class represents the policy of no offloading and is only used as
the default value for the <code class="docutils literal notranslate"><span class="pre">offload_policy</span></code> arg.</p>
<dl class="field-list simple">
</dl>
</dd></dl>

<dl class="py class">
<dt class="sig sig-object py" id="torch.distributed.fsdp.CPUOffloadPolicy">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">torch.distributed.fsdp.</span></span><span class="sig-name descname"><span class="pre">CPUOffloadPolicy</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">pin_memory</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">True</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#torch.distributed.fsdp.CPUOffloadPolicy" title="Permalink to this definition">¶</a></dt>
<dd><p>This offload policy offloads parameters, gradients, and optimizer states to
CPU. Sharded parameters are copied host-to-device before all-gather. The
all-gathered parameters are freed according to <code class="docutils literal notranslate"><span class="pre">reshard_after_forward</span></code>.
Sharded gradients are copied device-to-host in backward, and the optimizer
step runs on CPU with CPU optimizer states.</p>
<dl class="field-list simple">
<dt class="field-odd">Variables</dt>
<dd class="field-odd"><p><strong>pin_memory</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><em>bool</em></a>) – Whether to pin sharded parameter and gradient
memory. Pinning memory allows both more efficient H2D/D2H copies
and for the copies to overlap with compute. However, the pinned
memory cannot be used by other processes. Set this to <code class="docutils literal notranslate"><span class="pre">False</span></code> if
you have insufficient CPU memory. (Default: <code class="docutils literal notranslate"><span class="pre">True</span></code>)</p>
</dd>
</dl>
</dd></dl>

</div>
</div>


             </article>
             
            </div>
            <footer>
  
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
      
        <a href="distributed.tensor.parallel.html" class="btn btn-neutral float-right" title="Tensor Parallelism - torch.distributed.tensor.parallel" accesskey="n" rel="next">Next <img src="_static/images/chevron-right-orange.svg" class="next-page"></a>
      
      
        <a href="fsdp.html" class="btn btn-neutral" title="FullyShardedDataParallel" accesskey="p" rel="prev"><img src="_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
      
    </div>
  

    <hr>

  
  <div role="contentinfo">
    <p>
        &copy; Copyright PyTorch Contributors.

    </p>
  </div>
    
      <div>
        Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
      </div>
     

</footer>

          </div>
<script>

var match = window.location.href.match(/\/_[a-zA-Z0-9_]*.html|_dynamo/gi);
var url = window.location.href.lastIndexOf(match[match.length-1]);

if (url)
  {
    var div = '<div class="admonition note"><p class="admonition-title">Note</p><p><i class="fa fa-exclamation-circle" aria-hidden="true">&nbsp</i> This page describes an internal API which is not intended to be used outside of the PyTorch codebase and can be modified or removed without notice.</p></div>'
    document.getElementById("pytorch-article").insertAdjacentHTML('afterBegin', div)
  }
</script>
        </div>

        <div class="pytorch-content-right" id="pytorch-content-right">
          <div class="pytorch-right-menu" id="pytorch-right-menu">
            <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
              <ul>
<li><a class="reference internal" href="#">torch.distributed.fsdp.fully_shard</a><ul>
<li><a class="reference internal" href="#pytorch-fsdp2-fully-shard">PyTorch FSDP2 (<code class="docutils literal notranslate"><span class="pre">fully_shard</span></code>)</a><ul>
<li><a class="reference internal" href="#torch.distributed.fsdp.fully_shard"><code class="docutils literal notranslate"><span class="pre">fully_shard()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule"><code class="docutils literal notranslate"><span class="pre">FSDPModule</span></code></a><ul>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.reshard"><code class="docutils literal notranslate"><span class="pre">FSDPModule.reshard()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_all_reduce_hook"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_all_reduce_hook()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_is_last_backward"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_is_last_backward()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_modules_to_backward_prefetch"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_modules_to_backward_prefetch()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_modules_to_forward_prefetch"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_modules_to_forward_prefetch()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_post_optim_event"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_post_optim_event()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_reduce_scatter_divide_factor"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_reduce_scatter_divide_factor()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_requires_all_reduce"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_requires_all_reduce()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_requires_gradient_sync"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_requires_gradient_sync()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_reshard_after_backward"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_reshard_after_backward()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.set_unshard_in_backward"><code class="docutils literal notranslate"><span class="pre">FSDPModule.set_unshard_in_backward()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.FSDPModule.unshard"><code class="docutils literal notranslate"><span class="pre">FSDPModule.unshard()</span></code></a></li>
</ul>
</li>
<li><a class="reference internal" href="#torch.distributed.fsdp.UnshardHandle"><code class="docutils literal notranslate"><span class="pre">UnshardHandle</span></code></a><ul>
<li><a class="reference internal" href="#torch.distributed.fsdp.UnshardHandle.wait"><code class="docutils literal notranslate"><span class="pre">UnshardHandle.wait()</span></code></a></li>
</ul>
</li>
<li><a class="reference internal" href="#torch.distributed.fsdp.register_fsdp_forward_method"><code class="docutils literal notranslate"><span class="pre">register_fsdp_forward_method()</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.MixedPrecisionPolicy"><code class="docutils literal notranslate"><span class="pre">MixedPrecisionPolicy</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.OffloadPolicy"><code class="docutils literal notranslate"><span class="pre">OffloadPolicy</span></code></a></li>
<li><a class="reference internal" href="#torch.distributed.fsdp.CPUOffloadPolicy"><code class="docutils literal notranslate"><span class="pre">CPUOffloadPolicy</span></code></a></li>
</ul>
</li>
</ul>
</li>
</ul>

            </div>
          </div>
        </div>
      </section>
    </div>

  
       <script type="text/javascript" id="documentation_options" data-url_root="./" src="_static/documentation_options.js"></script>
         <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
         <script src="_static/jquery.js"></script>
         <script src="_static/underscore.js"></script>
         <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
         <script src="_static/doctools.js"></script>
         <script src="_static/sphinx_highlight.js"></script>
         <script src="_static/clipboard.min.js"></script>
         <script src="_static/copybutton.js"></script>
     

  <script type="text/javascript" src="_static/js/vendor/popper.min.js"></script>
  <script type="text/javascript" src="_static/js/vendor/bootstrap.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
  <script type="text/javascript" src="_static/js/theme.js"></script>

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>
 
<script script type="text/javascript">
  var collapsedSections = ['Developer Notes', 'Language Bindings', 'Libraries', 'Community'];
</script>

<img height="1" width="1" style="border-style:none;" alt="" src="https://www.googleadservices.com/pagead/conversion/795629140/?label=txkmCPmdtosBENSssfsC&amp;guid=ON&amp;script=0"/>


  <!-- Begin Footer -->

  <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
    <div class="container">
      <div class="row">
        <div class="col-md-4 text-center">
          <h2>Docs</h2>
          <p>Access comprehensive developer documentation for PyTorch</p>
          <a class="with-right-arrow" href="https://pytorch.org/docs/stable/index.html">View Docs</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Tutorials</h2>
          <p>Get in-depth tutorials for beginners and advanced developers</p>
          <a class="with-right-arrow" href="https://pytorch.org/tutorials">View Tutorials</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Resources</h2>
          <p>Find development resources and get your questions answered</p>
          <a class="with-right-arrow" href="https://pytorch.org/resources">View Resources</a>
        </div>
      </div>
    </div>
  </div>

  <footer class="site-footer">
    <div class="container footer-container">
      <div class="footer-logo-wrapper">
        <a href="https://pytorch.org/" class="footer-logo"></a>
      </div>

      <div class="footer-links-wrapper">
        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/">PyTorch</a></li>
            <li><a href="https://pytorch.org/get-started">Get Started</a></li>
            <li><a href="https://pytorch.org/features">Features</a></li>
            <li><a href="https://pytorch.org/ecosystem">Ecosystem</a></li>
            <li><a href="https://pytorch.org/blog/">Blog</a></li>
            <li><a href="https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md">Contributing</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/resources">Resources</a></li>
            <li><a href="https://pytorch.org/tutorials">Tutorials</a></li>
            <li><a href="https://pytorch.org/docs/stable/index.html">Docs</a></li>
            <li><a href="https://discuss.pytorch.org" target="_blank">Discuss</a></li>
            <li><a href="https://github.com/pytorch/pytorch/issues" target="_blank">Github Issues</a></li>
            <li><a href="https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf" target="_blank">Brand Guidelines</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">Stay up to date</li>
            <li><a href="https://www.facebook.com/pytorch" target="_blank">Facebook</a></li>
            <li><a href="https://twitter.com/pytorch" target="_blank">Twitter</a></li>
            <li><a href="https://www.youtube.com/pytorch" target="_blank">YouTube</a></li>
            <li><a href="https://www.linkedin.com/company/pytorch" target="_blank">LinkedIn</a></li>
          </ul>  
          </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">PyTorch Podcasts</li>
            <li><a href="https://open.spotify.com/show/6UzHKeiy368jKfQMKKvJY5" target="_blank">Spotify</a></li>
            <li><a href="https://podcasts.apple.com/us/podcast/pytorch-developer-podcast/id1566080008" target="_blank">Apple</a></li>
            <li><a href="https://www.google.com/podcasts?feed=aHR0cHM6Ly9mZWVkcy5zaW1wbGVjYXN0LmNvbS9PQjVGa0lsOA%3D%3D" target="_blank">Google</a></li>
            <li><a href="https://music.amazon.com/podcasts/7a4e6f0e-26c2-49e9-a478-41bd244197d0/PyTorch-Developer-Podcast?" target="_blank">Amazon</a></li>
          </ul>
         </div>
        </div>
        
        <div class="privacy-policy">
          <ul>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/terms/" target="_blank">Terms</a></li>
            <li class="privacy-policy-links">|</li>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/privacy-policy/" target="_blank">Privacy</a></li>
          </ul>
        </div>
        <div class="copyright">
        <p>© Copyright The Linux Foundation. The PyTorch Foundation is a project of The Linux Foundation.
          For web site terms of use, trademark policy and other policies applicable to The PyTorch Foundation please see
          <a href="https://www.linuxfoundation.org/policies/">www.linuxfoundation.org/policies/</a>. The PyTorch Foundation supports the PyTorch open source
          project, which has been established as PyTorch Project a Series of LF Projects, LLC. For policies applicable to the PyTorch Project a Series of LF Projects, LLC,
          please see <a href="https://www.lfprojects.org/policies/">www.lfprojects.org/policies/</a>.</p>
      </div>
     </div>

  </footer>

  <div class="cookie-banner-wrapper">
  <div class="container">
    <p class="gdpr-notice">To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: <a href="https://www.facebook.com/policies/cookies/">Cookies Policy</a>.</p>
    <img class="close-button" src="_static/images/pytorch-x.svg">
  </div>
</div>

  <!-- End Footer -->

  <!-- Begin Mobile Menu -->

  <div class="mobile-main-menu">
    <div class="container-fluid">
      <div class="container">
        <div class="mobile-main-menu-header-container">
          <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
          <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
        </div>
      </div>
    </div>

    <div class="mobile-main-menu-links-container">
      <div class="main-menu">
        <ul>
           <li class="resources-mobile-menu-title">
             <a>Learn</a>
           </li>
           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/get-started">Get Started</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials">Tutorials</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/beginner/basics/intro.html">Learn the Basics</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/recipes/recipes_index.html">PyTorch Recipes</a>
             </li>
             <li>
               <a href="https://pytorch.org/tutorials/beginner/introyt.html">Introduction to PyTorch - YouTube Series</a>
             </li>
           </ul>
           <li class="resources-mobile-menu-title">
             <a>Ecosystem</a>
           </li>
           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/ecosystem">Tools</a>
             </li>
             <li>
               <a href="https://pytorch.org/#community-module">Community</a>
             </li>
             <li>
               <a href="https://discuss.pytorch.org/">Forums</a>
             </li>
             <li>
               <a href="https://pytorch.org/resources">Developer Resources</a>
             </li>
             <li>
               <a href="https://pytorch.org/ecosystem/contributor-awards-2023">Contributor Awards - 2024</a>
             </li>
           </ul>

           <li class="resources-mobile-menu-title">
             <a>Edge</a>
           </li>

           <ul class="resources-mobile-menu-items">
             <li>
               <a href="https://pytorch.org/edge">About PyTorch Edge</a>
             </li>
             
             <li>
               <a href="https://pytorch.org/executorch-overview">ExecuTorch</a>
             </li>
             <li>
               <a href="https://pytorch.org/executorch/stable/index.html">ExecuTorch Documentation</a>
             </li>
           </ul>

           <li class="resources-mobile-menu-title">
             <a>Docs</a>
           </li>

           <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/docs/stable/index.html">PyTorch</a>
            </li>

            <li>
              <a href="https://pytorch.org/pytorch-domains">PyTorch Domains</a>
            </li>
          </ul>

          <li class="resources-mobile-menu-title">
            <a>Blog & News</a>
          </li>
            
           <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/blog/">PyTorch Blog</a>
            </li>
            <li>
              <a href="https://pytorch.org/community-blog">Community Blog</a>
            </li>

            <li>
              <a href="https://pytorch.org/videos">Videos</a>
            </li>

            <li>
              <a href="https://pytorch.org/community-stories">Community Stories</a>
            </li>
            <li>
              <a href="https://pytorch.org/events">Events</a>
            </li>
            <li>
               <a href="https://pytorch.org/newsletter">Newsletter</a>
             </li>
          </ul>
          
          <li class="resources-mobile-menu-title">
            <a>About</a>
          </li>

          <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/foundation">PyTorch Foundation</a>
            </li>
            <li>
              <a href="https://pytorch.org/governing-board">Governing Board</a>
            </li>
            <li>
               <a href="https://pytorch.org/credits">Cloud Credit Program</a>
            </li>
            <li>
               <a href="https://pytorch.org/tac">Technical Advisory Council</a>
            </li>
            <li>
               <a href="https://pytorch.org/staff">Staff</a>
            </li>
            <li>
               <a href="https://pytorch.org/contact-us">Contact Us</a>
            </li>
          </ul>
        </ul>
      </div>
    </div>
  </div>

  <!-- End Mobile Menu -->

  <script type="text/javascript" src="_static/js/vendor/anchor.min.js"></script>

  <script type="text/javascript">
    $(document).ready(function() {
      mobileMenu.bind();
      mobileTOC.bind();
      pytorchAnchors.bind();
      sideMenus.bind();
      scrollToAnchor.bind();
      highlightNavigation.bind();
      mainMenuDropdown.bind();
      filterTags.bind();

      // Add class to links that have code blocks, since we cannot create links in code blocks
      $("article.pytorch-article a span.pre").each(function(e) {
        $(this).closest("a").addClass("has-code");
      });
    })
  </script>
</body>
</html>