[quant][graphmode][fx] Separate handling Copy operator to a helper function (#54644) (#55429)

jerryzh168 · facebook-github-bot · commit 4d449f915ffd · 2021-04-08T22:12:24.000-07:00
Summary: Pull Request resolved: #55429 Previously we special case copy operator in normal insert observer code, this PR tries to split the special case logic to a separate function and keep the rest of the code clean. Test Plan: Imported from OSS Imported from OSS Reviewed By: vkuzo Differential Revision: D27609972 fbshipit-source-id: 378f6aa70f18c0b477b62b6efe236648748aae7e
diff --git a/test/quantization/test_quantize_fx.py b/test/quantization/test_quantize_fx.py
@@ -645,7 +645,7 @@ def forward(self, x):
 
         dict_input = {"input": torch.randn(1, 1, 1, 1)}
         m = M().eval()
-        qconfig_dict = {"object_type": [(torch.nn.Conv2d, default_qconfig)]}
+        qconfig_dict = {"": default_qconfig}
         m = prepare_fx(m, qconfig_dict)
         m(dict_input)
         m = convert_fx(m)
@@ -2296,15 +2296,15 @@ def forward(self, x):
             model = FuncLinear(use_bias, has_relu, f_relu)
             linear_fun = ns.call_function(torch.nn.functional.linear)
             prepare_node_occurrence = {
-                # activation, weight, bias, output
-                ns.call_module(torch.quantization.PlaceholderObserver): 4 if use_bias else 3
+                # activation, weight, bias and output
+                ns.call_module(torch.quantization.PlaceholderObserver): 3 + int(use_bias)
             }
             convert_node_occurrence = {
                 # we don't support static fp16 ops, so the linear functino
                 # is unfused
                 linear_fun: 1,
-                # activation, weight, bias, output
-                ns.call_method("to"): 4 if use_bias else 3
+                # activation, weight, bias and output
+                ns.call_method("to"): 3 + int(use_bias)
             }
             self.checkGraphModeFxOp(
                 model, data, QuantType.DYNAMIC, linear_fun,
@@ -3643,12 +3643,8 @@ def forward(self, x):
         # make sure it runs
         m = convert_fx(m)
         expected_occurrence = {
-            # we have extra quant/dequant after reshape since currently we do not
-            # propagate the information about the dtype of the output
-            # of CopyNode, we may improve this later and remove the
-            # extra quant/dequant
             ns.call_function(torch.quantize_per_tensor): 2,
-            ns.call_method("dequantize"): 3,
+            ns.call_method("dequantize"): 2,
             ns.call_method("to"): 1,
             ns.call_function(torch.ops.quantized.linear): 2
         }
diff --git a/torch/quantization/fx/quantization_patterns.py b/torch/quantization/fx/quantization_patterns.py
@@ -137,9 +137,10 @@ def __init__(self, quantizer: QuantizerCls, node: Node):
         # determine how many of the first two args are Tensors (versus scalars)
         # this distinguishes things like "x + y" from "x + 2" or "2 + x"
         self.num_tensor_args = 0
+        cache_for_no_tensor_check: Dict[Node, bool] = dict()
         for arg_idx in range(len(self.binary_op_node.args)):
             arg = self.binary_op_node.args[arg_idx]
-            if isinstance(arg, Node) and (not all_node_args_have_no_tensors(arg)):
+            if isinstance(arg, Node) and (not all_node_args_have_no_tensors(arg, quantizer.modules, cache_for_no_tensor_check)):
                 self.num_tensor_args += 1
         self.all_node_args_are_tensors = \
             (self.num_tensor_args == len(self.binary_op_node.args))
@@ -190,7 +191,10 @@ def convert(self, quantizer: QuantizerCls, node: Node, load_arg: Callable,
             if self.num_tensor_args == 1:
                 # add/mul scalar
                 first_arg = self.binary_op_node.args[0]
-                if isinstance(first_arg, Node) and (not all_node_args_have_no_tensors(first_arg)):
+                cache_for_no_tensor_check: Dict[Node, bool] = dict()
+                if isinstance(first_arg, Node) and (
+                        not all_node_args_have_no_tensors(
+                            first_arg, quantizer.modules, cache_for_no_tensor_check)):
                     quantized_index = 0
                 else:
                     quantized_index = 1
@@ -958,8 +962,8 @@ def convert(self, quantizer: QuantizerCls, node: Node, load_arg: Callable,
 @register_quant_pattern(torch.squeeze)
 @register_quant_pattern(torch.stack)
 @register_quant_pattern(torch.unsqueeze)
-@register_quant_pattern(operator.getitem)
 @register_quant_pattern(operator.floordiv)
+@register_quant_pattern(operator.getitem)
 @register_quant_pattern('chunk')
 @register_quant_pattern('clamp')
 @register_quant_pattern('contiguous')
diff --git a/torch/quantization/fx/quantize.py b/torch/quantization/fx/quantize.py
@@ -108,7 +108,6 @@ def insert_observer(
     observer_name = get_new_observer_name(model)
     setattr(model, observer_name, observer)
     # put observer instance activation_post_process map
-    assert activation_post_process_map is not None
     activation_post_process_map[node.name].append(observer_name)
     # initialize index map for activation_post_process
     if node.name not in activation_post_process_indexes:
@@ -154,7 +153,7 @@ def maybe_insert_observer_for_special_module(
         observed_standalone_module = \
             prepare(standalone_module, sm_qconfig_dict, sm_prepare_config_dict)
         standalone_module_input_idxs = observed_standalone_module.\
-            _standalone_module_input_quantized_idxs.int().tolist()
+            _standalone_module_input_quantized_idxs.int().tolist()  # type: ignore
         observed_standalone_module = ObservedStandaloneGraphModule(
             observed_standalone_module, observed_standalone_module.graph)
         parent_name, name = _parent_name(node.target)
@@ -210,15 +209,14 @@ def insert_observer_for_output_of_the_node(
             inserted_observer = True
         elif (isinstance(quantize_handler,
                          FixedQParamsOpQuantizeHandler) and
-              not model.training) or \
-                isinstance(quantize_handler, CopyNodeQuantizeHandler):
+              not model.training):
             # inserting observers for output of observed module, or
             # mark the output as observed
             assert node.op in [
                 'call_module',
                 'call_function',
                 'call_method'], \
-                'CopyNodeQuantizeHandler of type ' + node.op + ' is not handled'
+                'FixedQParamsQuantizeHandler of type ' + node.op + ' is not handled'
 
             def is_observed(input_arg):
                 if isinstance(input_arg, Node):
@@ -327,6 +325,80 @@ def insert_observer_for_input_arg_of_observed_node(
                     activation_post_process_indexes,
                     env, observed_graph, load_arg, observed_node_names_set, quants)
 
+def handle_copy_nodes(
+        observed_graph: Graph, matches: Dict[str, MatchResult],
+        quants: Dict[str, List[Tuple[DefaultQuantizeHandler, Callable]]],
+        qconfig_map: Dict[str, QConfigAny],
+        activation_post_process_map: Dict[str, List[str]],
+        modules: Dict[str, torch.nn.Module]):
+    # map from node name to whether it is observed or not
+    observed_nodes: Set[Node] = set()
+    copy_nodes: Set[Node] = set()
+    non_tensor_input_binary_op_nodes: Set[Node] = set()
+    app_to_remove: Set[Node] = set()
+    env: Dict[Any, Any] = {}
+
+    def load_arg(a: Argument) -> Argument:
+        return map_arg(a, lambda node: env[node.name])
+
+    def in_nodes(a: Argument, nodes: Set[Node]) -> bool:
+        if isinstance(a, Node):
+            return a in nodes
+        elif isinstance(a, list) or isinstance(a, tuple):
+            return all([in_nodes(arg, nodes) for arg in a])
+        return False
+
+    result_graph = Graph()
+    cache_for_no_tensor_check: Dict[Node, bool] = dict()
+    for node in observed_graph.nodes:
+        root_node, matched_nodes, pattern, quantize_handler, qconfig = matches.get(
+            node.name, (None, None, None, None, None))
+
+        if node.op == "call_module" and is_activation_post_process(modules[node.target]):
+            # rule 1: if the input of a copy node is observed, we won't need to
+            # insert observer for the output of copy node
+            if in_nodes(node.args[0], copy_nodes) and in_nodes(node.args[0], observed_nodes):
+                # we'll remove the activation_post_process if the previous node is
+                # an observed copy node
+                app_to_remove.add(node)
+
+            # rule 2: if the previous node is a binary op without tensor input, we can remove the observer
+            if in_nodes(node.args[0], non_tensor_input_binary_op_nodes):
+                app_to_remove.add(node)
+            observed_nodes.add(node)
+
+        if root_node is node and qconfig is not None:
+            if isinstance(quantize_handler, CopyNodeQuantizeHandler):
+                copy_nodes.add(node)
+                # if previous node is observed, the copy node will be observed as well
+                if in_nodes(node.args[0], observed_nodes):
+                    observed_nodes.add(node)
+        if all_node_args_have_no_tensors(node, modules, cache_for_no_tensor_check):
+            non_tensor_input_binary_op_nodes.add(node)
+
+        # rule 3: for special node, we'll just remove observer for its input
+        special_nodes = [
+            ("call_function", operator.getitem),
+        ]
+        if (node.op, node.target) in special_nodes:
+            if in_nodes(node.args[0], observed_nodes):
+                prev_node = node.args[0].args[0]
+                if prev_node.name not in qconfig_map or qconfig_map[prev_node.name] is None:
+                    app_to_remove.add(node.args[0])
+                    # if the previous node is not quantized, remove node from copy nodes
+                    if node in copy_nodes:
+                        copy_nodes.remove(node)
+
+    for node in observed_graph.nodes:
+        if node.op == "output":
+            result_graph.output(map_arg(node.args[0], load_arg))
+        elif node in app_to_remove:
+            env[node.name] = env[node.args[0].name]
+        else:
+            env[node.name] = result_graph.node_copy(node, load_arg)
+
+    return result_graph
+
 # A dictionary for querying the weight index for a given op
 WEIGHT_INDEX_DICT = {
     torch.nn.functional.conv1d : [1],
@@ -376,16 +448,15 @@ class Quantizer:
     def __init__(self):
         # mapping from matched node to full qualified path of activation_post_process
         # must be filled before convert
-        self.activation_post_process_map: Optional[
-            Dict[str, List[str]]] = None
+        self.activation_post_process_map: Dict[str, List[str]] = {}
 
         # mapping from matched node to the index of activation_post_process that we are
         # using currently
         self.activation_post_process_indexes: Dict[str, int] = {}
 
         # mapping from node name to qconfig that should be used for that node
         # filled out for a model during _generate_qconfig_map
-        self.qconfig_map: Optional[Dict[str, QConfigAny]] = None
+        self.qconfig_map: Dict[str, QConfigAny] = {}
         # mapping from fully qualified module name to module instance
         # for example,
         # {
@@ -504,7 +575,7 @@ def _prepare(
 
         self.modules = dict(model.named_modules())
 
-        # map from node name to qconfig, used in _find_matches
+        # fill self.qconfig_map, a map from node name to qconfig, used in _find_matches
         self._generate_qconfig_map(model, model.graph, qconfig_dict, node_name_to_scope)
 
         # match the patterns that will get quantized
@@ -526,7 +597,7 @@ def _prepare(
         # have to be quantized, which requires measuring stats,
         # initialize an DefaultQuantizeHandler object for each
         quants: Dict[str, List[Tuple[DefaultQuantizeHandler, Callable]]] = \
-            self._find_quants(model.graph, matches)
+            self._find_quants(model.graph, self.modules, matches)
 
         self.activation_post_process_map = defaultdict(list)
         env: Dict[Any, Any] = {}
@@ -619,6 +690,17 @@ def load_arg(a):
                 env,
                 observed_graph, load_arg)
 
+        self.modules = dict(model.named_modules())
+
+        # TODO: refactor this to a separate function
+        matches = self._find_matches(
+            observed_graph, self.modules, self.patterns, standalone_module_names,
+            standalone_module_classes, custom_module_classes)
+        quants = self._find_quants(observed_graph, self.modules, matches)
+
+        observed_graph = handle_copy_nodes(
+            observed_graph, matches, quants, self.qconfig_map,
+            self.activation_post_process_map, self.modules)
 
         self.save_state(model)
         model = ObservedGraphModule(model, observed_graph)
@@ -726,7 +808,7 @@ def _convert(self, model: GraphModule, is_reference: bool = False,
             custom_module_classes=custom_module_classes)
 
         quants: Dict[str, List[Tuple[DefaultQuantizeHandler, Callable]]] = \
-            self._find_quants(model.graph, matches)
+            self._find_quants(model.graph, self.modules, matches)
 
         self.quantized_graph = Graph()
         env: Dict[str, Node] = {}
@@ -845,7 +927,9 @@ def is_output_quantized(node: Node, obj: QuantizeHandler) -> bool:
             quantized = True
 
             # Need to get correct quantized/non-quantized state forn the output
-            # of CopyNodeQuantizeHandler
+            # of FixedQParamsQuantizeHandler
+            # TODO: we may want to try to remove the special case here
+            # as well
             if type(obj) in [
                     CopyNodeQuantizeHandler,
                     FixedQParamsOpQuantizeHandler
@@ -854,14 +938,14 @@ def is_output_quantized(node: Node, obj: QuantizeHandler) -> bool:
                     'call_module',
                     'call_function',
                     'call_method'], \
-                    'CopyNodeQuantizeHandler of type ' + node.op + ' is not handled'
+                    'FixedQParamsQuantizeHandler of type ' + node.op + ' is not handled'
                 # TODO: need to extend this to consider all relevant args instead of just arg[0]
                 quantized = node_arg_is_quantized(node.args[0])
 
             # the output is unquantized if the node is not a CopyNode
             # and activation is fp16 (since we will output fp32 currently for fp16
             # converter
-            if (not isinstance(obj, CopyNodeQuantizeHandler) and not activation_is_int8_quantized(qconfig)) or \
+            if not activation_is_int8_quantized(qconfig) or \
                not input_output_observed(obj):
                 quantized = False
             if node_return_type_is_int(node):
@@ -1155,14 +1239,14 @@ def record_match(pattern, node, matched):
             else:
                 matched.append(node)
 
-        assert self.qconfig_map is not None
+        cache_for_no_tensor_check: Dict[Node, bool] = dict()
         for node in reversed(graph.nodes):
             if node.name not in match_map and node.name not in all_matched:
                 for pattern, value in patterns.items():
                     if is_match(modules, node, pattern):
                         skip_this_match = False
                         if value is BinaryOpQuantizeHandler:
-                            use_copy_node = all_node_args_have_no_tensors(node)
+                            use_copy_node = all_node_args_have_no_tensors(node, modules, cache_for_no_tensor_check)
                             if use_copy_node:
                                 # TODO(future PR): update the pattern to quantize
                                 # handler logic to take this into account.
@@ -1220,14 +1304,16 @@ def is_standalone_module(node_target):
 
         return match_map
 
-    def _find_quants(self, graph: Graph, matches: Dict[str, MatchResult],
-                     ) -> Dict[str, List[Tuple[DefaultQuantizeHandler, Callable]]]:
+    def _find_quants(
+            self, graph: Graph, modules: Dict[str, torch.nn.Module],
+            matches: Dict[str, MatchResult]) -> Dict[str, List[Tuple[DefaultQuantizeHandler, Callable]]]:
         """
         Takes the nodes in the input graph and pending matches, and finds and
         returns the input and output nodes which need to be quantized.
 
         Inputs:
           - graph: an fx.Graph object
+          - modules: a dictionary from module path to module
           - matches: output of self._find_matches function
 
         Outputs a map of
@@ -1241,13 +1327,14 @@ def _find_quants(self, graph: Graph, matches: Dict[str, MatchResult],
          int8 and then float16
         """
         quants: Dict[str, List[Tuple[DefaultQuantizeHandler, Callable]]] = defaultdict(list)
+        cache_for_no_tensor_check: Dict[Node, bool] = dict()
 
         def visit(node, matched_pattern, qconfig):
             def visit_arg(arg):
                 is_weight = node_arg_is_weight(node, arg)
                 is_bias = node_arg_is_bias(node, arg)
                 is_activation = not (is_weight or is_bias)
-                no_tensors = all_node_args_have_no_tensors(arg)
+                no_tensors = all_node_args_have_no_tensors(arg, modules, cache_for_no_tensor_check)
                 # bias needs to be quantized if activation is fp16 and weight is fp16
                 # this is the case for glow
                 should_add_handler = qconfig is not None and (
diff --git a/torch/quantization/fx/utils.py b/torch/quantization/fx/utils.py