1

I was trying to reproduce a DNN where a block activation function called BlockRelu is used. It is defined as

BlockRelu

I tried to write this function according to some example codes about self-defined activation functions, but these functions are all scalar functions but BlockRelu deals with the block as a whole. Because of the difference of a numpy array and a tensor, a numpy function cannot be used here. I wonder if anyone could help. Thanks.Here are my code:

import tensorflow as tf
import numpy as np
from tensorflow.python.framework import ops

def block_relu(x):
    for i in range(x.shape[0]):
        if x[i] > 0:
            return x
    return x * 0


def grad_block_relu(x):
    for i in range(x.shape[0]):
        if x[i] > 0:
             return np.ones(x.shape[0])
    return x * 0


# transferring a common function into a numpy function, not needed here
'''
block_relu_np = np.vectorize(block_relu)
grad_block_relu_np = np.vectorize(grad_block_relu)
'''
# numpy uses float64 but tensorflow uses float32
block_relu_np32 = lambda x: block_relu(x).astype(np.float32)
grad_block_relu_np32 = lambda x: grad_block_relu(x).astype(np.float32)


def grad_block_relu_tf(x, name=None):
    with ops.name_scope(name, "grad_block_relu_tf", [x]) as name:
        y = tf.py_func(grad_block_relu_np32, [x], [tf.float32], False, name)
    return y[0]


def my_py_func(func, inp, Tout, stateful=False, name=None, my_grad_func=None):
    # a unique name is required to avoid duplicates:
    random_name = "PyFuncGrad" + str(np.random.randint(0, 1E+8))
    tf.RegisterGradient(random_name)(my_grad_func)
    g = tf.get_default_graph()
    with g.gradient_override_map({"PyFunc": random_name, "PyFuncStateless": random_name}):
        return tf.py_func(func, inp, Tout, stateful=stateful, name=name)


# The gradient function we need to pass to the above my_py_func function takes a special form:
# It needs to take in (an operation, the previous gradients before the operation)
# and propagate(i.e., return) the gradients backward after the operation.
def _block_relu_grad(op, pre_grad):
    x = op.inputs[0]
    cur_grad = grad_block_relu(x)
    next_grad = pre_grad * cur_grad
    return next_grad


def block_relu_tf(x, name=None):
    with ops.name_scope(name, "block_relu_tf", [x]) as name:
        y = my_py_func(block_relu_np32, [x], [tf.float32], stateful=False, name=name, my_grad_func=_block_relu_grad)
    return y[0]


with tf.Session() as sess:
    x = tf.constant([-0.3, 0.005, 0.08, 0.12])
    y = block_relu_tf(x)
    tf.global_variables_initializer().run()
    print(x.eval())
    print(y.eval())
    print(tf.gradients(y, [x])[0].eval())

It will give an error: TypeError: Using a tf.Tensor as a Python bool is not allowed. Use if t is not None: instead of if t: to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.

Bowen Hou
  • 13
  • 3

1 Answers1

0

I'm pretty sure you can implement it with standard Tensorflow functions:

# input: x
y = tf.scalar_mul( tf.sign( tf.reduce_max( tf.nn.relu(x))), x)
Mark.F
  • 1,624
  • 4
  • 17
  • 28