pytorch
diff --git a/‎.ci/docker/build.sh
Lines changed: 2 additions & 1 deletion b/‎.ci/docker/build.sh
Lines changed: 2 additions & 1 deletion
diff --git a/‎.ci/docker/common/common_utils.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/common/common_utils.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/requirements.txt
Lines changed: 3 additions & 3 deletions b/‎.ci/docker/requirements.txt
Lines changed: 3 additions & 3 deletions
diff --git a/‎.jenkins/metadata.json
Lines changed: 3 additions & 0 deletions b/‎.jenkins/metadata.json
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 5 additions & 5 deletions b/‎README.md
Lines changed: 5 additions & 5 deletions
diff --git a/‎_static/css/custom.css
Lines changed: 21 additions & 0 deletions b/‎_static/css/custom.css
Lines changed: 21 additions & 0 deletions
diff --git a/‎_templates/layout.html
Lines changed: 17 additions & 0 deletions b/‎_templates/layout.html
Lines changed: 17 additions & 0 deletions
diff --git a/‎advanced_source/cpp_custom_ops.rst
Lines changed: 2 additions & 0 deletions b/‎advanced_source/cpp_custom_ops.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎advanced_source/dynamic_quantization_tutorial.py
Lines changed: 2 additions & 1 deletion b/‎advanced_source/dynamic_quantization_tutorial.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎advanced_source/python_custom_ops.py
Lines changed: 10 additions & 5 deletions b/‎advanced_source/python_custom_ops.py
Lines changed: 10 additions & 5 deletions
@@ -11,8 +11,9 @@ IMAGE_NAME="$1"
 shift
 
 export UBUNTU_VERSION="20.04"
+export CUDA_VERSION="12.4.1"
 
-export BASE_IMAGE="ubuntu:${UBUNTU_VERSION}"
+export BASE_IMAGE="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
 echo "Building ${IMAGE_NAME} Docker image"
 
 docker build \
 
@@ -22,5 +22,5 @@ conda_run() {
 }
 
 pip_install() {
-  as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
+  as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION pip3 install --progress-bar off $*
 }
@@ -30,8 +30,8 @@ pytorch-lightning
 torchx
 torchrl==0.5.0
 tensordict==0.5.0
-ax-platform>==0.4.0
-nbformat>==5.9.2
+ax-platform>=0.4.0
+nbformat>=5.9.2
 datasets
 transformers
 torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable
@@ -68,4 +68,4 @@ pygame==2.1.2
 pycocotools
 semilearn==0.3.2
 torchao==0.0.3
-segment_anything==1.0
+segment_anything==1.0
@@ -28,6 +28,9 @@
   "intermediate_source/model_parallel_tutorial.py": {
     "needs": "linux.16xlarge.nvidia.gpu"
   },
+  "recipes_source/torch_export_aoti_python.py": {
+    "needs": "linux.g5.4xlarge.nvidia.gpu"
+  }, 
   "advanced_source/pendulum.py": {
     "needs": "linux.g5.4xlarge.nvidia.gpu",
     "_comment": "need to be here for the compiling_optimizer_lr_scheduler.py to run."
 
@@ -22,6 +22,8 @@ We use sphinx-gallery's [notebook styled examples](https://sphinx-gallery.github
 
 Here is how you can create a new tutorial (for a detailed description, see [CONTRIBUTING.md](./CONTRIBUTING.md)):
 
+NOTE: Before submitting a new tutorial, read [PyTorch Tutorial Submission Policy](./tutorial_submission_policy.md).
+
 1. Create a Python file. If you want it executed while inserted into documentation, save the file with the suffix `tutorial` so that the file name is `your_tutorial.py`.
 2. Put it in one of the `beginner_source`, `intermediate_source`, `advanced_source` directory based on the level of difficulty. If it is a recipe, add it to `recipes_source`. For tutorials demonstrating unstable prototype features, add to the `prototype_source`.
 3. For Tutorials (except if it is a prototype feature), include it in the `toctree` directive and create a `customcarditem` in [index.rst](./index.rst).
@@ -31,7 +33,7 @@ If you are starting off with a Jupyter notebook, you can use [this script](https
 
 ## Building locally
 
-The tutorial build is very large and requires a GPU. If your machine does not have a GPU device, you can preview your HTML build without actually downloading the data and running the tutorial code: 
+The tutorial build is very large and requires a GPU. If your machine does not have a GPU device, you can preview your HTML build without actually downloading the data and running the tutorial code:
 
 1. Install required dependencies by running: `pip install -r requirements.txt`.
 
@@ -40,8 +42,6 @@ The tutorial build is very large and requires a GPU. If your machine does not ha
 - If you have a GPU-powered laptop, you can build using `make docs`. This will download the data, execute the tutorials and build the documentation to `docs/` directory. This might take about 60-120 min for systems with GPUs. If you do not have a GPU installed on your system, then see next step.
 - You can skip the computationally intensive graph generation by running `make html-noplot` to build basic html documentation to `_build/html`. This way, you can quickly preview your tutorial.
 
-> If you get **ModuleNotFoundError: No module named 'pytorch_sphinx_theme' make: *** [html-noplot] Error 2** from /tutorials/src/pytorch-sphinx-theme or /venv/src/pytorch-sphinx-theme (while using virtualenv), run `python setup.py install`.
-
 ## Building a single tutorial
 
 You can build a single tutorial by using the `GALLERY_PATTERN` environment variable. For example to run only `neural_style_transfer_tutorial.py`, run:
@@ -59,8 +59,8 @@ The `GALLERY_PATTERN` variable respects regular expressions.
 
 
 ## About contributing to PyTorch Documentation and Tutorials
-* You can find information about contributing to PyTorch documentation in the 
-PyTorch Repo [README.md](https://github.com/pytorch/pytorch/blob/master/README.md) file. 
+* You can find information about contributing to PyTorch documentation in the
+PyTorch Repo [README.md](https://github.com/pytorch/pytorch/blob/master/README.md) file.
 * Additional information can be found in [PyTorch CONTRIBUTING.md](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md).
 
 
 
@@ -91,3 +91,24 @@
     transition: none;
     transform-origin: none;
 }
+
+.pytorch-left-menu-search input[type=text] {
+    background-image: none;
+}
+
+.gsc-control-cse {
+   padding-left: 0px !important;
+   padding-bottom: 0px !important;
+}
+
+.gsc-search-button .gsc-search-button-v2:focus {
+   border: transparent !important;
+   outline: none;
+   box-shadow: none;
+}
+.gsc-search-button-v2:active {
+   border: none !important;
+}
+.gsc-search-button-v2 {
+   border: none !important;
+}
@@ -11,6 +11,23 @@
 </script>
 {%- endblock %}
 
+{% block sidebartitle %}
+    {% if theme_display_version %}
+      {%- set nav_version = version %}
+      {% if READTHEDOCS and current_version %}
+        {%- set nav_version = current_version %}
+      {% endif %}
+      {% if nav_version %}
+        <div class="version">
+            {{ nav_version }}
+        </div>
+      {% endif %}
+    {% endif %}
+    <div class="searchbox">
+        <script async src="https://cse.google.com/cse.js?cx=e65585f8c3ea1440e"></script>
+        <div class="gcse-search"></div>
+    </div>
+{% endblock %}
 
 {% block footer %}
 {{ super() }}
 
@@ -174,6 +174,8 @@ To add ``torch.compile`` support for an operator, we must add a FakeTensor kerne
 known as a "meta kernel" or "abstract impl"). FakeTensors are Tensors that have
 metadata (such as shape, dtype, device) but no data: the FakeTensor kernel for an
 operator specifies how to compute the metadata of output tensors given the metadata of input tensors.
+The FakeTensor kernel should return dummy Tensors of your choice with
+the correct Tensor metadata (shape/strides/``dtype``/device).
 
 We recommend that this be done from Python via the `torch.library.register_fake` API,
 though it is possible to do this from C++ as well (see
 
@@ -151,7 +151,8 @@ def tokenize(self, path):
 model.load_state_dict(
     torch.load(
         model_data_filepath + 'word_language_model_quantize.pth',
-        map_location=torch.device('cpu')
+        map_location=torch.device('cpu'),
+        weights_only=True
         )
     )
 
 
@@ -66,7 +66,7 @@ def display(img):
 ######################################################################
 # ``crop`` is not handled effectively out-of-the-box by
 # ``torch.compile``: ``torch.compile`` induces a
-# `"graph break" <https://pytorch.org/docs/stable/torch.compiler_faq.html#graph-breaks>`_ 
+# `"graph break" <https://pytorch.org/docs/stable/torch.compiler_faq.html#graph-breaks>`_
 # on functions it is unable to handle and graph breaks are bad for performance.
 # The following code demonstrates this by raising an error
 # (``torch.compile`` with ``fullgraph=True`` raises an error if a
@@ -85,9 +85,9 @@ def f(img):
 #
 # 1. wrap the function into a PyTorch custom operator.
 # 2. add a "``FakeTensor`` kernel" (aka "meta kernel") to the operator.
-#    Given the metadata (e.g. shapes)
-#    of the input Tensors, this function says how to compute the metadata
-#    of the output Tensor(s).
+#    Given some ``FakeTensors`` inputs (dummy Tensors that don't have storage),
+#    this function should return dummy Tensors of your choice with the correct
+#    Tensor metadata (shape/strides/``dtype``/device).
 
 
 from typing import Sequence
@@ -130,6 +130,11 @@ def f(img):
 # ``autograd.Function`` with PyTorch operator registration APIs can lead to (and
 # has led to) silent incorrectness when composed with ``torch.compile``.
 #
+# If you don't need training support, there is no need to use
+# ``torch.library.register_autograd``.
+# If you end up training with a ``custom_op`` that doesn't have an autograd
+# registration, we'll raise an error message.
+#
 # The gradient formula for ``crop`` is essentially ``PIL.paste`` (we'll leave the
 # derivation as an exercise to the reader). Let's first wrap ``paste`` into a
 # custom operator:
@@ -203,7 +208,7 @@ def setup_context(ctx, inputs, output):
 ######################################################################
 # Mutable Python Custom operators
 # -------------------------------
-# You can also wrap a Python function that mutates its inputs into a custom 
+# You can also wrap a Python function that mutates its inputs into a custom
 # operator.
 # Functions that mutate inputs are common because that is how many low-level
 # kernels are written; for example, a kernel that computes ``sin`` may take in
Original file line number	Diff line number	Diff line change
`@@ -22,5 +22,5 @@ conda_run() {`
`22`	`22`	`}`
`23`	`23`
`24`	`24`	`pip_install() {`
`25`		`- as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*`
	`25`	`+ as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION pip3 install --progress-bar off $*`
`26`	`26`	`}`
Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,8 @@ def tokenize(self, path):`
`151`	`151`	`model.load_state_dict(`
`152`	`152`	`torch.load(`
`153`	`153`	`model_data_filepath + 'word_language_model_quantize.pth',`
`154`		`- map_location=torch.device('cpu')`
	`154`	`+ map_location=torch.device('cpu'),`
	`155`	`+ weights_only=True`
`155`	`156`	`)`
`156`	`157`	`)`
`157`	`158`