Extract shape information and encode it as constants. This improves the step time of the ptb_word_lm model by 5 to 10%, and the training speed of the inception model by 5%.

benoitsteiner · tensorflower-gardener · commit 8f9a91a2e1e7 · 2016-08-08T09:18:04.000-07:00
We avoid encoding static shapes as constant when control flow operations are involved since the static shape information may be incorrect in some cases.
Change: 129636322
diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py
@@ -526,7 +526,7 @@ def testSizeOfScalar(self):
     tf_val = tf.size(tf.constant(0.0))
     c_val = tf.contrib.util.constant_value(tf_val)
     self.assertEqual(1, c_val)
-    self.assertEqual(np.int32, type(c_val))
+    self.assertEqual(np.ndarray, type(c_val))
 
   def testRank(self):
     tf_val = tf.rank(tf.constant(0.0, shape=[1, 2, 3]))
diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py
@@ -104,7 +104,6 @@
 # Aliases for some automatically-generated names.
 listdiff = gen_array_ops.list_diff
 
-
 def shape(input, name=None):
   """Returns the shape of a tensor.
 
@@ -121,13 +120,34 @@ def shape(input, name=None):
     input: A `Tensor` or `SparseTensor`.
     name: A name for the operation (optional).
 
+  Returns:
+    A `Tensor` of type `int32`.
+  """
+  return shape_internal(input, name, optimize=True)
+
+
+def shape_internal(input, name=None, optimize=True):
+  """Returns the shape of a tensor.
+
+  Args:
+    input: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+    optimize: if true, encode the shape as a constant when possible.
+
   Returns:
     A `Tensor` of type `int32`.
   """
   with ops.op_scope([input], name, "Shape") as name:
     if isinstance(input, ops.SparseTensor):
       return gen_math_ops.cast(input.shape, dtypes.int32)
     else:
+      input_tensor = ops.convert_to_tensor(input)
+      input_shape = input_tensor.get_shape()
+      # Static shape inference can be incorrect when loops are involved: disable
+      # shape optimization in this case to avoid generating invalid constants.
+      optimize &= input_tensor.graph._get_control_flow_context() is None
+      if optimize and input_shape.is_fully_defined():
+        return constant(input_shape.as_list(), dtypes.int32, name=name)
       return gen_array_ops.shape(input, name=name)
 
 
@@ -148,6 +168,20 @@ def size(input, name=None):
     input: A `Tensor` or `SparseTensor`.
     name: A name for the operation (optional).
 
+  Returns:
+    A `Tensor` of type `int32`.
+  """
+  return size_internal(input, name, optimize=True)
+
+
+def size_internal(input, name=None, optimize=True):
+  """Returns the size of a tensor.
+
+  Args:
+    input: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+    optimize: if true, encode the size as a constant when possible.
+
   Returns:
     A `Tensor` of type `int32`.
   """
@@ -156,6 +190,13 @@ def size(input, name=None):
       return gen_math_ops._prod(gen_math_ops.cast(input.shape, dtypes.int32), 0,
                                 name=name)
     else:
+      input_tensor = ops.convert_to_tensor(input)
+      input_shape = input_tensor.get_shape()
+      # Static shape inference can be incorrect when loops are involved: disable
+      # shape optimization in this case to avoid generating invalid constants.
+      optimize &= input_tensor.graph._get_control_flow_context() is None
+      if optimize and input_shape.is_fully_defined():
+        return constant(input_shape.num_elements(), dtypes.int32, name=name)
       return gen_array_ops.size(input, name=name)
 
 
@@ -180,13 +221,34 @@ def rank(input, name=None):
     input: A `Tensor` or `SparseTensor`.
     name: A name for the operation (optional).
 
+  Returns:
+    A `Tensor` of type `int32`.
+  """
+  return rank_internal(input, name, optimize=True)
+
+
+def rank_internal(input, name=None, optimize=True):
+  """Returns the rank of a tensor.
+
+  Args:
+    input: A `Tensor` or `SparseTensor`.
+    name: A name for the operation (optional).
+    optimize: if true, encode the rank as a constant when possible.
+
   Returns:
     A `Tensor` of type `int32`.
   """
   with ops.op_scope([input], name, "Rank") as name:
     if isinstance(input, ops.SparseTensor):
       return gen_array_ops.size(input.shape, name=name)
     else:
+      input_tensor = ops.convert_to_tensor(input)
+      input_shape = input_tensor.get_shape()
+      # Static shape inference can be incorrect when loops are involved: disable
+      # shape optimization in this case to avoid generating invalid constants.
+      optimize &= input_tensor.graph._get_control_flow_context() is None
+      if optimize and input_shape.ndims is not None:
+        return constant(input_shape.ndims, dtypes.int32, name=name)
       return gen_array_ops.rank(input, name=name)
 
 
@@ -1074,7 +1136,7 @@ def zeros(shape, dtype=dtypes.float32, name=None):
   return output
 
 
-def zeros_like(tensor, dtype=None, name=None):
+def zeros_like(tensor, dtype=None, name=None, optimize=True):
   """Creates a tensor with all elements set to zero.
 
   Given a single tensor (`tensor`), this operation returns a tensor of the
@@ -1093,21 +1155,23 @@ def zeros_like(tensor, dtype=None, name=None):
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
     `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`.
     name: A name for the operation (optional).
+    optimize: if true, attempt to statically determine the shape of 'tensor'
+    and encode it as a constant.
 
   Returns:
     A `Tensor` with all elements set to zero.
   """
   with ops.op_scope([tensor], name, "zeros_like") as name:
     tensor = ops.convert_to_tensor(tensor, name="tensor")
     if dtype is not None and tensor.dtype != dtype:
-      ret = zeros(shape(tensor), dtype, name=name)
+      ret = zeros(shape_internal(tensor, optimize=optimize), dtype, name=name)
       ret.set_shape(tensor.get_shape())
       return ret
     else:
       return gen_array_ops._zeros_like(tensor, name=name)
 
 
-def ones_like(tensor, dtype=None, name=None):
+def ones_like(tensor, dtype=None, name=None, optimize=True):
   """Creates a tensor with all elements set to 1.
 
   Given a single tensor (`tensor`), this operation returns a tensor of the same
@@ -1126,13 +1190,15 @@ def ones_like(tensor, dtype=None, name=None):
     dtype: A type for the returned `Tensor`. Must be `float32`, `float64`,
     `int8`, `int16`, `int32`, `int64`, `uint8`, `complex64`, or `complex128`.
     name: A name for the operation (optional).
+    optimize: if true, attempt to statically determine the shape of 'tensor'
+    and encode it as a constant.
 
   Returns:
     A `Tensor` with all elements set to 1.
   """
   with ops.op_scope([tensor], name, "ones_like") as name:
     tensor = ops.convert_to_tensor(tensor, name="tensor")
-    ones_shape = shape(tensor)
+    ones_shape = shape_internal(tensor, optimize=optimize)
     if dtype is None:
       dtype = tensor.dtype
     ret = ones(ones_shape, dtype=dtype, name=name)
diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py
@@ -371,7 +371,7 @@ def merge(inputs, name=None):
       return ops.SparseTensor(indices, values, dense_shape), chosen_index
     else:
       # For now convert all the inputs as IndexedSlices.
-      inputs = math_ops._as_indexed_slices_list(inputs)
+      inputs = math_ops._as_indexed_slices_list(inputs, optimize=False)
       values, _ = merge([inp.values for inp in inputs], name=name)
       indices, chosen_index = gen_control_flow_ops._merge(
           [inp.indices for inp in inputs], name="indices")
@@ -452,7 +452,7 @@ def _AddNextAndBackEdge(m, v):
     m.op._update_input(1, v)   # pylint: disable=protected-access
   elif isinstance(m, ops.IndexedSlices):
     # pylint: disable=protected-access
-    v = math_ops._as_indexed_slices(v)
+    v = math_ops._as_indexed_slices(v, optimize=False)
     v = _NextIteration(v)
     m.values.op._update_input(1, v.values)
     m.indices.op._update_input(1, v.indices)
@@ -902,7 +902,7 @@ def ZerosLikeForExit(self, val):
       else:
         # Only the shape of value is needed for backprop.
         forward_ctxt.outer_context.Enter()
-        shape = array_ops.shape(val)
+        shape = array_ops.shape_internal(val, optimize=False)
         forward_ctxt.outer_context.Exit()
         # Save the shape to a stack.
         history_shape = outer_grad_state.AddForwardAccumulator(shape)
@@ -920,7 +920,7 @@ def ZerosLikeForExit(self, val):
         # with the right shape.
         result = array_ops.zeros(val_shape.dims, val.dtype)
       else:
-        result = array_ops.zeros_like(val)
+        result = array_ops.zeros_like(val, optimize=False)
     return result
 
   def ZerosLike(self, op, index):
@@ -963,13 +963,13 @@ def ZerosLike(self, op, index):
         branch = op_ctxt.branch
         op_ctxt.outer_context.Enter()
         val = _SwitchRefOrTensor(op.inputs[0], pred)[1 - branch]
-        zeros_shape = array_ops.shape(val)
+        zeros_shape = array_ops.shape_internal(val, optimize=False)
         op_ctxt.outer_context.Exit()
         val.op._set_control_flow_context(op_ctxt)
         zeros_shape.op._set_control_flow_context(op_ctxt)
       else:
         op_ctxt.Enter()
-        zeros_shape = array_ops.shape(val)
+        zeros_shape = array_ops.shape_internal(val, optimize=False)
         op_ctxt.Exit()
 
       # Add forward accumulator for shape.
@@ -1054,13 +1054,13 @@ def ZerosLikeOutsideLoop(op, index):
   """Create zeros_like for the specified output of an op."""
   val = op.outputs[index]
   if not IsSwitch(op):
-    return array_ops.zeros_like(val)
+    return array_ops.zeros_like(val, optimize=False)
   else:
     op_ctxt = op._get_control_flow_context()
     pred = op_ctxt.pred
     branch = op_ctxt.branch
     switch_val = switch(op.inputs[0], pred)[1 - branch]
-    zeros_shape = array_ops.shape(switch_val)
+    zeros_shape = array_ops.shape_internal(switch_val, optimize=False)
     return array_ops.zeros(zeros_shape, dtype=val.dtype)
 
 
@@ -1664,7 +1664,7 @@ def AddBackPropAccumulator(self, op, grad):
       if self.outer_context:
         forward_ctxt = self.grad_state.forward_ctxt
         forward_ctxt.outer_context.Enter()
-        zeros_shape = array_ops.shape(value)
+        zeros_shape = array_ops.shape_internal(value, optimize=False)
         forward_ctxt.outer_context.Exit()
         history_zeros_shape = grad_state.AddForwardAccumulator(zeros_shape)
         self.outer_context.Enter()
@@ -1673,7 +1673,7 @@ def AddBackPropAccumulator(self, op, grad):
         acc = array_ops.zeros(real_shape, grad.dtype)
         self.outer_context.Exit()
       else:
-        zeros_shape = array_ops.shape(value)
+        zeros_shape = array_ops.shape_internal(value, optimize=False)
         acc = array_ops.zeros(zeros_shape, grad.dtype)
       acc._shape = grad.get_shape()  # pylint: disable=protected-access
 
@@ -1720,7 +1720,7 @@ def AddBackPropIndexedSlicesAccumulator(self, op, grad):
                                         name="b_acc")
       if self.outer_context: self.outer_context.Exit()
     else:
-      values_shape = array_ops.shape(op.inputs[0])[1:]
+      values_shape = array_ops.shape_internal(op.inputs[0], optimize=False)[1:]
       values_shape = array_ops.concat(0, [[1], values_shape])
       values_acc = array_ops.zeros(values_shape)
     indices_acc = constant_op.constant([0], indices.dtype)
@@ -1732,7 +1732,10 @@ def AddBackPropIndexedSlicesAccumulator(self, op, grad):
                                          shape=dense_shape.get_shape())
         if self.outer_context: self.outer_context.Exit()
       else:
-        shape_acc = array_ops.zeros_like(array_ops.shape(op.inputs[0]))
+        shape_acc = array_ops.zeros_like(
+            array_ops.shape_internal(
+                op.inputs[0], optimize=False),
+            optimize=False)
 
     if self.outer_context: self.outer_context.Exit()
 
diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py
@@ -1387,13 +1387,14 @@ def _calc_mat_mul_weight_parameters(graph, node):
                      (int(weights_shape[1]) * int(weights_shape[0])))
 
 
-def _as_indexed_slices(x):
+def _as_indexed_slices(x, optimize=True):
   """Convert 'x' to IndexedSlices.
 
   Convert a dense Tensor to a block-sparse IndexedSlices.
 
   Args:
     x: Either a Tensor object, or an IndexedSlices object.
+    optimize: if true, attempt to optimize the conversion of 'x'.
 
   Returns:
     An IndexedSlices object.
@@ -1406,18 +1407,19 @@ def _as_indexed_slices(x):
     raise TypeError("Not a Tensor or IndexedSlices: %s" % type(x))
   if isinstance(x, ops.IndexedSlices):
     return x
-  x_shape = array_ops.shape(x)
+  x_shape = array_ops.shape_internal(x, optimize=optimize)
   return ops.IndexedSlices(x, range(0, x_shape[0]), x_shape)
 
 
-def _as_indexed_slices_list(inputs):
+def _as_indexed_slices_list(inputs, optimize=True):
   """Convert all elements of 'inputs' to IndexedSlices.
 
   Additionally, homogenize the types of all the indices to
   either int32 or int64.
 
   Args:
     inputs: List containing either Tensor or IndexedSlices objects.
+    optimize: if true, attempt to optimize the conversion of each input.
 
   Returns:
     A list of IndexedSlices objects.
@@ -1427,7 +1429,7 @@ def _as_indexed_slices_list(inputs):
   """
   if not isinstance(inputs, (list, tuple)):
     raise TypeError("Expected a list or tuple, not a %s" % type(inputs))
-  outputs = [_as_indexed_slices(i) for i in inputs]
+  outputs = [_as_indexed_slices(i, optimize=optimize) for i in inputs]
   with_int32_index = [o.indices for o in outputs
                       if o.indices.dtype == dtypes.int32]
   if not with_int32_index or len(with_int32_index) == len(outputs):
diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py
@@ -999,7 +999,7 @@ def assert_variables_initialized(var_list=None):
     ranks = []
     for var in var_list:
       with ops.colocate_with(var.op):
-        ranks.append(array_ops.rank(var))
+        ranks.append(array_ops.rank_internal(var, optimize=False))
     if len(ranks) == 1:
       return ranks[0]
     else: