There is no out-of-the-box implementation of the weighted softmax in MxNet, but the same people, who have contributed a lot to MxNet, have developed an example, which uses weighted softmax for 2 classes (basically, it is weighted logistic regression). You can take a look into their implementation here - https://github.com/apache/incubator-mxnet/blob/df558290930f9f3ed6941c306b5cc650505f7481/example/sparse/linear_classification/weighted_softmax_ce.py
The example introduces a new Custom Operation, that takes one value - a weight of a positive class. I have taken their code an adjusted it to your use case - passing an array of weights instead of a single value. Unfortunately, you cannot pass anything besides a string into a custom operation's constructor. That's why the class_weights_value is defined as a string and there is an ugly conversion into nd.array in the code.
This code won't be as fast as original Softmax, which is written in a highly optimized fashion in C:
import mxnet as mx
class WeightedSoftmaxCrossEntropyLoss(mx.operator.CustomOp):
""" softmax cross entropy weighted loss, where the loss is adjusted by \
(class_weight) / sum_of_all_weights)
"""
def __init__(self, class_weights):
# parse initial weights from a string to separate items
self.class_weights = mx.nd.array([float(x) for x in class_weights.split(',')])
# scale weights, so they would add up to 1
self.class_scales = self.class_weights / len(self.class_weights)
def forward(self, is_train, req, in_data, out_data, aux):
"""Implements forward computation.
is_train : bool, whether forwarding for training or testing.
req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc.
in_data : list of NDArray, input data.
out_data : list of NDArray, pre-allocated output buffers.
aux : list of NDArray, mutable auxiliary states. Usually not used.
"""
data = in_data[0]
label = in_data[1]
pred = mx.nd.SoftmaxOutput(data, label)
self.assign(out_data[0], req[0], pred)
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
"""Implements backward computation
req : list of {'null', 'write', 'inplace', 'add'}, how to assign to in_grad
out_grad : list of NDArray, gradient w.r.t. output data.
in_grad : list of NDArray, gradient w.r.t. input data. This is the output buffer.
"""
label = in_data[1]
pred = out_data[0]
# move to GPU context if needed
class_scales = self.class_scales.as_in_context(label.context)
dx = pred - mx.nd.one_hot(label, len(class_scales))
# find a weight based on a label of an example
scale_factor = (class_scales[label]).reshape((pred.shape[0],1))
#apply scaling
rescaled_dx = scale_factor * dx
self.assign(in_grad[0], req[0], rescaled_dx)
@mx.operator.register("weighted_softmax_ce_loss")
class WeightedSoftmaxCrossEntropyLossProp(mx.operator.CustomOpProp):
def __init__(self, class_weights):
super(WeightedSoftmaxCrossEntropyLossProp, self).__init__(True)
self.class_weights = class_weights
def list_arguments(self):
return ['data', 'label']
def list_outputs(self):
return ['output']
def infer_shape(self, in_shapes):
"""Calculate output shapes from input shapes. This can be
omited if all your inputs and outputs have the same shape.
in_shapes : list of shape. Shape is described by a tuple of int.
"""
data_shape = in_shapes[0]
output_shape = data_shape
# return 3 lists representing inputs shapes, outputs shapes, and aux data shapes.
return (in_shapes), (output_shape,), ()
def create_operator(self, ctx, in_shapes, in_dtypes):
# create and return the CustomOp class.
return WeightedSoftmaxCrossEntropyLoss(self.class_weights)
To use it, you would need to create a model that uses this custom operation instead of regular SoftmaxOutput. Here is the model code:
import logging
import mxnet as mx
# it is important to import this file, even it is not directly used in the code below
import WeightedSoftmaxCrossEntropyLoss
batch_size = 100
# define a string of weights - one weight for each class, starting from the '0' class
class_weights_values = '5,1,1,1,1,1,1,1,1,1'
mnist = mx.test_utils.get_mnist()
train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
data = mx.sym.var('data')
label_names = mx.symbol.Variable("softmax_label")
data = mx.sym.flatten(data=data)
fc1 = mx.sym.FullyConnected(data=data, num_hidden=15)
act1 = mx.sym.Activation(data=fc1, act_type="relu")
fc2 = mx.sym.FullyConnected(data=act1, num_hidden=10) # MNIST has 10 classes
# Weighted Softmax
weighted_softmax = mx.sym.Custom(fc2, label_names, op_type='weighted_softmax_ce_loss', class_weights=class_weights_values, name='out')
# Making sure it is used as a loss funciton
mlp = mx.sym.MakeLoss(weighted_softmax)
logging.getLogger().setLevel(logging.DEBUG) # logging to stdout
# create a trainable module on CPU
mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu(), label_names=['softmax_label'])
mlp_model.fit(train_iter, # train data
eval_data=val_iter, # validation data
optimizer='sgd', # use SGD to train
optimizer_params={'learning_rate':0.1}, # use fixed learning rate
eval_metric='acc', # report accuracy during training
batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches
num_epoch=1) # train for at most 10 dataset passes
I hope this will work for you. I haven't tried to use the results of the training, and maybe it is not working as you would expect. So, please, feel free to adjust this code as you need.