Tensorflow Cheatsheet

This is a cheatsheet for Tensorflow 2.0.

Install Tensorflow

# Upgrade pip
pip install --upgrade pip

# Install the current stable release of tensorflow.
pip install tensorflow

Tensorflow APIs

Tensor

# Define a constant tensor.
tf.constant(5)

# Define another constant tensor.
tf.constant([[1.0, 2.0], [3.0, 4.0]])

# Compute a tensor from two constant tensors.
t1 = tf.constant([[1.0, 2.0], [3.0, 4.0]])
t2 = tf.constant([[1.0, 1.0], [0.0, 1.0]])
t3 = tf.matmul(t1, t2)

# Get the tensor device.
t.device

# Get DType of the tensor.
t.dtype

# Get the tensor name.
t.name

# Get the graph containing this tensor.
t.graph

# Get the Operation that produces this tensor as an output.
t.op

# Get the tensor shape.
t.shape

# Get the index of this tensor in the outputs of its Operation.
t.value_index

RaggedTensor

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_splits(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_lengths(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_lengths=[4, 0, 3, 1, 0]
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_value_rowids(
    values=[3, 1, 4, 1, 5, 9, 2, 6], value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_starts(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_starts=[0, 4, 4, 7, 8]
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_limits(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_limits=[4, 4, 7, 8, 8]
)

# The tensor value is <tf.RaggedTensor [[3, 1], [4, 1], [5, 9], [2, 6]]>
tf.RaggedTensor.from_uniform_row_length(
    values=[3, 1, 4, 1, 5, 9, 2, 6], uniform_row_length=2
)

SparseTensor

# Defines a sparse tensor representing the following dense tensor:
# [[1, 0, 0, 0]
#  [0, 0, 2, 0]
#  [0, 0, 0, 0]]
SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])

Variable

# Create a variable.
v = tf.Variable(1.)

# Assign 2.0 to the variable.
v.assign(2.)

# Add 0.5 to the variable.
v.assign_add(0.5)

# Substract 0.5 from the variable.
v.assign_sub(0.5)

# Matmul a variable and a constant tensor.
w = tf.Variable([[1.], [2.]])
x = tf.constant([[3., 4.]])
tf.matmul(w, x)

# Variable can only be created once within a tf.function.
class M(tf.Module):
    @tf.function
    def __call__(self, x):
        if not hasattr(self, "v"):    # Or set self.v to None in __init__
            self.v = tf.Variable(x)
        return self.v * x

tf.data

# Load dataset using range.
tf.data.Dataset.range(5)                               # [0, 1, 2, 3, 4]
tf.data.Dataset.range(2, 5)                            # [2, 3, 4]
tf.data.Dataset.range(1, 5, 2)                         # [1, 3]
tf.data.Dataset.range(1, 5, -2)                        # []
tf.data.Dataset.range(5, 1)                            # []
tf.data.Dataset.range(5, 1, -2)                        # [5, 3]
tf.data.Dataset.range(2, 5, output_type=tf.int32)      # [2, 3, 4]
tf.data.Dataset.range(1, 5, 2, output_type=tf.float32) # [1.0, 3.0]

# Load tf data from python array
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])

# Load dataset from txt files.
dataset = tf.data.TextLineDataset(["file1.txt", "file2.txt"])

# Load data from tfrecords files.
dataset = tf.data.TFRecordDataset(["file1.tfrecords", "file2.tfrecords"])

# Create a dataset using all files matching a pattern.
dataset = tf.data.Dataset.list_files("/path/*.txt")

# Split dataset into batches.
dataset = tf.data.Dataset.range(8)
dataset = dataset.batch(3)  # The dataset value is [[0, 1, 2], [3, 4, 5], [6, 7]]

# Transform a dataset.
dataset = dataset.map(lambda x: x*2)

# Prefetch a dataset.
dataset = tf.data.Dataset.range(3)
dataset = dataset.prefetch(2)

# Repeat a dataset.
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
dataset = dataset.repeat(3)  # [1, 2, 3, 1, 2, 3, 1, 2, 3]

# Shuttle a dataset.
dataset = tf.data.Dataset.range(3)
dataset = dataset.shuffle(3, reshuffle_each_iteration=False)
dataset = dataset.repeat(2)  # [1, 0, 2, 1, 0, 2]

# Concat a dataset.
a = tf.data.Dataset.range(1, 4) # [1, 2, 3]
b = tf.data.Dataset.range(4, 8) # [4, 5, 6, 7]
ds = a.concatenate(b)           # [1, 2, 3, 4, 5, 6, 7]

# Zip two datasets.
a = tf.data.Dataset.range(1, 4)  # [1, 2, 3]
b = tf.data.Dataset.range(4, 7)  # [4, 5, 6]
ds = tf.data.Dataset.zip((a, b)) # [(1, 4), (2, 5), (3, 6)]

# Iterating data in tf.data.
for element in dataset:
    print(element)

tf.math

# Get absolute values.
x = tf.constant([-2.25, 3.25])
tf.abs(x) # [2.25, 3.25]

# Add a scalar and a list.
tf.add([1, 2, 3, 4, 5], 1) # [2, 3, 4, 5, 6]

# Add two tensors.
x = tf.convert_to_tensor([1, 2, 3, 4, 5])
y = tf.convert_to_tensor(1)
z = x + y # [2, 3, 4, 5, 6]

# Add a list and a tensor.
x = [1, 2, 3, 4, 5]
y = tf.constant([1, 2, 3, 4, 5])
tf.add(x, y)

# Add n tensors.
a = tf.constant([[3, 5], [4, 8]])
b = tf.constant([[1, 6], [2, 9]])
tf.math.add_n([a, b, a]) # [[7, 16], [10, 25]]

# Get the cumulative sum.
x = tf.constant([2, 4, 6, 8])
tf.cumsum(x) # [2, 6, 12, 20]

# Get the cumulative sum for certain axis.
y = tf.constant([[2, 4, 6, 8], [1, 3, 5, 7]])
tf.cumsum(y, axis=0) # [[2, 4, 6, 8], [3, 7, 11, 15]]
tf.cumsum(y, axis=1) # [[2, 6, 12, 20], [1, 4, 9, 16]]

# Get the exclusive cumulative sum.
x = tf.constant([2, 4, 6, 8])
tf.cumsum(x, exclusive=True) # [0, 2, 6, 12]

# Get the reverse cumulative sum.
x = tf.constant([2, 4, 6, 8])
tf.cumsum(x, reverse=True) # [18, 14, 8, 0]

# Divide tensors.
x = tf.constant([16, 12, 11])
y = tf.constant([4, 6, 2])
tf.divide(x, y) # [4.0, 2.0, 5.5]

# Get tensor equals.
x = tf.constant([2, 4])
y = tf.constant(2)
tf.math.equal(x, y) # [True, False]

# Get tensor equals.
x = tf.constant([2, 4])
y = tf.constant([2, 4])
tf.math.equal(x, y) # [True, True]

# Multiply tensors.
x = tf.constant(([1, 2, 3, 4]))
tf.math.multiply(x, x) # [1, 4, 9, 16]

# Multiple tensors of different shapes with broadcast.
x = tf.ones([1, 2]);
y = tf.ones([2, 1]);
x * y  # [[1.0, 1.0], [1.0, 1.0]]

# Compute the power of one value to another.
x = tf.constant([[2, 2], [3, 3]])
y = tf.constant([[8, 16], [2, 3]])
tf.pow(x, y)  # [[256, 65536], [9, 27]]

# Compute sigmoid of a tensor.
x = tf.constant([0.0, 1.0, 50.0, 100.0])
tf.math.sigmoid(x) # [0.5, 0.7310586, 1.0, 1.0]

tf.linalg

# Transpose a matrix.
x = tf.constant([[1, 2, 3], [4, 5, 6]])
tf.linalg.matrix_transpose(x)  # [[1, 4], [2, 5], [3, 6]]

# Matmul two tensors.
a = tf.constant([[1, 2, 3], [4, 5, 6]])
b = tf.constant([[7, 8], [9, 10], [11, 12]])
c = tf.matmul(a, b) # [[58, 64], [139, 154]]

tf.distribute

################################################################################
# Define a mirrored strategy, and create a variable in it.
# The variable will be mirrored on both GPU:0 and GPU:1.
################################################################################
strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1"])
with strategy.scope():
    x = tf.Variable(1.)


################################################################################
# Variables (e.g., x in this example) created in tf.function is still mirrored.
################################################################################
x = []
@tf.function  # Wrap the function with tf.function.
def create_variable():
    if not x:
        x.append(tf.Variable(1.))
    return x[0]
strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1"])
with strategy.scope():
    _ = create_variable()


################################################################################
# Dataset can also be mirrored to multiple devices within the MirroredStrategy.
################################################################################
my_strategy = tf.distribute.MirroredStrategy()
with my_strategy.scope():
  @tf.function
  def distribute_train_epoch(dataset):
    def replica_fn(input):
      # process input and return result
      return result

    total_result = 0
    for x in dataset:
      per_replica_result = my_strategy.run(replica_fn, args=(x,))
      total_result += my_strategy.reduce(tf.distribute.ReduceOp.SUM,
                                         per_replica_result, axis=None)
    return total_result

  dist_dataset = my_strategy.experimental_distribute_dataset(dataset)
  for _ in range(EPOCHS):
    train_result = distribute_train_epoch(dist_dataset)


################################################################################
# MultiWorkerMirroredStrategy is used for distributed training.
################################################################################
strategy = tf.distribute.MultiWorkerMirroredStrategy()

@tf.function
def train_step(iterator):
    def step_fn(inputs):
        features, labels = inputs
        with tf.GradientTape() as tape:
            logits = model(features, training=True)
            loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits)

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    strategy.run(step_fn, args=(next(iterator),))

for _ in range(NUM_STEP):
    train_step(iterator)


################################################################################
# Use TPUStrategy to train a model on TPUs.
################################################################################
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.TPUStrategy(resolver)

with strategy.scope():
    model = tf.keras.Sequential([tf.keras.layers.Dense(2, input_shape=(5,))])
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

def dataset_fn(ctx):
    x = np.random.random((2, 5)).astype(np.float32)
    y = np.random.randint(2, size=(2, 1))
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    return dataset.repeat().batch(1, drop_remainder=True)

dist_dataset = strategy.distribute_datasets_from_function(dataset_fn)
iterator = iter(dist_dataset)

@tf.function()
def train_step(iterator):
    def step_fn(inputs):
        features, labels = inputs
        with tf.GradientTape() as tape:
            logits = model(features, training=True)
            loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits)

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    strategy.run(step_fn, args=(next(iterator),))

train_step(iterator)

tf.saved_model

# Define a tf module.
class Adder(tf.Module):
    @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.float32)])
    def add(self, x):
        return x + x

# Save the tf module as our model.
model = Adder()
tf.saved_model.save(model, '/tmp/adder')

# Load the model.
loaded_model = tf.saved_model.load('/tmp/adder')
loaded_model.add(1.) # Returns a tensor with a value of 2.0.

tf.keras

Model

# Defining a keras model.
import tensorflow as tf

class MyModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    self.dense1 = tf.keras.layers.Dense(4, activation=tf.nn.relu)
    self.dense2 = tf.keras.layers.Dense(5, activation=tf.nn.softmax)

  def call(self, inputs):
    x = self.dense1(inputs)
    return self.dense2(x)

model = MyModel()

Sequential

# Optionally, the first layer can receive an `input_shape` argument:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
model.add(tf.keras.layers.Dense(4))

# This is identical to the following:
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(16,)))
model.add(tf.keras.layers.Dense(8))

# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(8))
model.add(tf.keras.layers.Dense(4))
# model.weights not created yet

# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
model.add(tf.keras.layers.Dense(4))
len(model.weights)  # Returns "4"

# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(8))
model.add(tf.keras.layers.Dense(4))
model.build((None, 16))
len(model.weights)  # Returns "4"

# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(8))
model.add(tf.keras.layers.Dense(1))
model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
model.fit(x, y, batch_size=32, epochs=10)

Layers

# Define a relu activation layer.
layer = tf.keras.layers.Activation('relu')
output = layer([-3.0, -1.0, 0.0, 2.0])  # [0.0, 0.0, 0.0, 2.0]

# Define an Add layer.
input_shape = (2, 3, 4)
x1 = tf.random.normal(input_shape)
x2 = tf.random.normal(input_shape)
y = tf.keras.layers.Add()([x1, x2]) # the shape of y is (2, 3, 4)

# Define an Average layer.
x1 = np.ones((2, 2))
x2 = np.zeros((2, 2))
y = tf.keras.layers.Average()([x1, x2]) # [[0.5, 0.5], [0.5, 0.5]]

# Define an AveragePooling1D layer.
x = tf.constant([1., 2., 3., 4., 5.])
x = tf.reshape(x, [1, 5, 1])
avg_pool_1d = tf.keras.layers.AveragePooling1D(pool_size=2, strides=1, padding='valid')
avg_pool_1d(x) # [[[1.5], [2.5], [3.5], [4.5]]]

# Define a Concatenate layer.
x = np.arange(20).reshape(2, 2, 5)
y = np.arange(20, 30).reshape(2, 1, 5)
z = tf.keras.layers.Concatenate(axis=1)([x, y]) # the shape of z is (2, 3, 5)

# Define a Conv1D layer.
# The inputs are 128-length vectors with 10 timesteps, and the batch size is 4.
input_shape = (4, 10, 128)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv1D(32, 3, activation='relu', input_shape=input_shape[1:])(x) # the shape of y is (4, 8, 32)

# Define a Conv2D layer.
input_shape = (4, 28, 28, 3)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv2D(2, 3, activation='relu', input_shape=input_shape[1:])(x) # the shape of y is (4, 26, 26, 2)

# Define a Conv3D layer.
input_shape =(4, 28, 28, 28, 1)
x = tf.random.normal(input_shape)
y = tf.keras.layers.Conv3D(2, 3, activation='relu', input_shape=input_shape[1:])(x) # the shape of y is (4, 26, 26, 26, 2)

# Define a Dense layer with relu as its activation function.
tf.keras.layers.Dense(32, activation='relu'))

# Define a Dropout layer with a 20% drop rate.
tf.keras.layers.Dropout(.2, input_shape=(2,))

# Define an Embedding layer with an input dimension of 1000 and output dimension of 64.
tf.keras.layers.Embedding(1000, 64, input_length=10)

# Define a Flatten layer.
Flatten()

# Define a Hashing layer with 32 bins.
tf.keras.layers.Hashing(num_bins=32)

# Define a four-units LSTM layer.
tf.keras.layers.LSTM(4)

# Define a string look-up layer.
vocab = ["a", "b", "c", "d"]
data = tf.constant([["a", "c", "d"], ["d", "z", "b"]])
layer = tf.keras.layers.StringLookup(vocabulary=vocab)
layer(data) # Returns [[1, 3, 4], [4, 0, 2]]

Save & Load

# Define a model.
model = tf.keras.Sequential(
    [tf.keras.layers.Dense(5, input_shape=(3,)),
    tf.keras.layers.Softmax()]
)

# Save the model to /tmp/model.
model.save_model('/tmp/model')  # Or model.save('/tmp/model')

# Load the model back from /tmp/model
loaded_model = tf.keras.models.load_model('/tmp/model')

Example Model Training Pipelines

A Sequential Model Training Pipeline

import tensorflow as tf
import tensorflow.keras.datasets.mnist as mnist

# Load training data.
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Construct the model.
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
])

# Define the loss function.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Compile the model.
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])

# Train the model.
model.fit(x_train, y_train, epochs=5)

# Evaluate the model.
model.evaluate(x_test, y_test, verbose=2)

A Custom Model Training Pipeline

import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model

# Get dataset.
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Add a channels dimension
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

# Process dataset with tf.data.
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Define the model.
class MyModel(Model):
    def __init__(self):
        super(MyModel, self).__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)

# Create an instance of the model
model = MyModel()

# Define loss function.
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Define optimizer.
optimizer = tf.keras.optimizers.Adam()

# Define train loss and accuracy.
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

# Define test loss and accuracy.
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

# Define the train step with tf.function.
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        # training=True is only needed if there are layers with different
        # behavior during training versus inference (e.g. Dropout).
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, predictions)

# Define the test step with tf.function.
@tf.function
def test_step(images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(images, training=False)
    t_loss = loss_object(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)

# Train and evaluate the model.
for epoch in range(4):
    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    for images, labels in train_ds:
        train_step(images, labels)

    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)

    print(
        f'Epoch {epoch + 1}, '
        f'Loss: {train_loss.result()}, '
        f'Accuracy: {train_accuracy.result() * 100}, '
        f'Test Loss: {test_loss.result()}, '
        f'Test Accuracy: {test_accuracy.result() * 100}'
    )