pytorch · desertfire · Oct 12, 2022 · Oct 11, 2022 · Oct 11, 2022 · Oct 11, 2022
diff --git a/Makefile b/Makefile
@@ -13,7 +13,7 @@ PIP ?= python -m pip
 # versions used in CI
 # Also update the "Install nightly binaries" section of the README when updating these
 PYTORCH_VERSION ?= dev20221011
-TRITON_VERSION ?= d3c925db8a81ca74f14680876b9311e7d079c5a1
+TRITON_VERSION ?= af76c989eb4799b015f8b288ccd8421558772e56
 
 
 default: develop

diff --git a/README.md b/README.md
@@ -52,7 +52,7 @@ the command below, you will need CUDA 11.7.
 
 ```shell
 pip install --pre torch==1.14.0.dev20221011+cu117 --extra-index-url https://download.pytorch.org/whl/nightly/cu117
-pip install -U "git+https://github.com/openai/triton@998fd5f9afe166247f441999c605dfe624ca9331#subdirectory=python"
+pip install -U "git+https://github.com/openai/triton@af76c989eb4799b015f8b288ccd8421558772e56#subdirectory=python"
 pip install -U "git+https://github.com/pytorch/torchdynamo"
 ```
 

diff --git a/torchinductor/codegen/triton.py b/torchinductor/codegen/triton.py
@@ -755,14 +755,6 @@ def load(self, name: str, index: sympy.Expr):
         line = f"tl.load({var} + ({index}), {mask}{ep}{other})"
         if V.graph.get_dtype(name) in (torch.float16, torch.bfloat16):
             line += ".to(tl.float32)"
-        """
-        elif V.graph.get_dtype(name) == torch.bool:
-            # This is a fix for https://github.com/pytorch/torchdynamo/issues/1450
-            # The root cause of the problem is a one-element bool tensor was stored as
-            # tensor([255], device='cuda:0', dtype=torch.uint8) in the forward pass output,
-            # which confuses the backward pass when it calls sum on the bool tensor.
-            line = f"({line} != 0)"
-        """
 
         if (
             self.inside_reduction

diff --git a/torchinductor/lowering.py b/torchinductor/lowering.py
@@ -3183,16 +3183,6 @@ def sum_(x, axis=None, keepdims=False, *, dtype=None):
     ) and dtype is None:
         dtype = torch.int64
 
-    # This is a temp fix for https://github.com/pytorch/torchdynamo/issues/1450
-    # The root cause of the problem is a one-element bool tensor was stored as
-    # tensor([255], device='cuda:0', dtype=torch.uint8) in the forward pass output,
-    # which confuses the backward pass when it calls sum on the bool tensor.
-    # A better place to fix is in triton.py (see the comment there), but it is
-    # blocked by a trition issue on bool comparison, causing opinfo tests like
-    # test_comprehensive_gt_cuda_bool to fail.
-    if is_boolean_dtype(x.get_dtype()):
-        x = to_dtype(to_dtype(x, dtype), torch.bool)
-
     fn = make_reduction("sum", override_return_dtype=dtype)
     return fn(x, axis, keepdims, dtype=dtype)
 

diff --git a/torchinductor/triton_ops/autotune.py b/torchinductor/triton_ops/autotune.py
@@ -106,7 +106,8 @@ def launcher({', '.join(def_args)}, grid, stream):
                 # set_device(current_device())  # TODO(jansel): is this needed?
                 grid_0, grid_1, grid_2 = grid(grid_meta)
                 bin.c_wrapper(grid_0, grid_1, grid_2, bin.num_warps, bin.shared,
-                              stream, bin.cu_function, {', '.join(call_args)})
+                              stream, bin.cu_function, None, None, None,
+                              {', '.join(call_args)})
             """.lstrip(),
             scope,
         )