Are there any examples of anomaly detection algorithms implemented with TensorFlow?

Question

I'm fairly new to this subject and I am working on a project that deals with detecting anomalies in time-series data. I want to use TensorFlow so that I could potentially deploy the model onto a mobile device. I'm having a difficult time finding relevant material and examples of anomaly detection algorithms implemented in TensorFlow.

Some algorithms I'm looking into are clustering algorithms for classifying windowed samples and Holt-Winters for streaming data.

Any example would help me tremendously!

score 14 · Answer 1 · edited May 23 '17 at 12:32

Here is an example of sequential filtering using Holt-Winters. The same pattern should work for other types of sequential modeling such as the Kalman filter.

from matplotlib import pyplot
import numpy as np
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

seasonality = 10

def model_fn(features, targets):
  """Defines a basic Holt-Winters sequential filtering model in TensorFlow.

  See http://www.itl.nist.gov/div898/handbook/pmc/section4/pmc435.htm"""
  times = features["times"]
  values = features["values"]
  # Initial estimates
  initial_trend = tf.reduce_sum(
      (values[seasonality:2*seasonality] - values[:seasonality])
      / seasonality ** 2)
  initial_smoothed_observation = values[0]
  # Seasonal indices are multiplicative, so having them near 0 leads to
  # instability
  initial_seasonal_indices = 1. + tf.exp(
      tf.get_variable("initial_seasonal_indices", shape=[seasonality]))
  with tf.variable_scope("smoothing_parameters",
                         initializer=tf.zeros_initializer):
    # Trained scalars for smoothing, transformed to be in (0, 1)
    observation_smoothing = tf.sigmoid(
        tf.get_variable(name="observation_smoothing", shape=[]))
    trend_smoothing = tf.sigmoid(
        tf.get_variable(name="trend_smoothing", shape=[]))
    seasonal_smoothing = tf.sigmoid(
        tf.get_variable(name="seasonal_smoothing", shape=[]))
  def filter_function(
      current_index, seasonal_indices, previous_smoothed_observation,
      previous_trend, previous_loss_sum):
    current_time = tf.gather(times, current_index)
    current_observation = tf.gather(values, current_index)
    current_season = current_time % seasonality
    one_step_ahead_prediction = (
        (previous_smoothed_observation + previous_trend)
        * tf.gather(seasonal_indices, current_season))
    new_loss_sum = previous_loss_sum + (
        one_step_ahead_prediction - current_observation) ** 2
    new_smoothed_observation = (
        (observation_smoothing * current_observation
         / tf.gather(seasonal_indices, current_season))
        + ((1. - observation_smoothing)
           * (previous_smoothed_observation + previous_trend)))
    new_trend = (
        (trend_smoothing
         * (new_smoothed_observation - previous_smoothed_observation))
        + (1. - trend_smoothing) * previous_trend)
    updated_seasonal_index = (
        seasonal_smoothing * current_observation / new_smoothed_observation
        + ((1. - seasonal_smoothing)
           * tf.gather(seasonal_indices, current_season)))
    new_seasonal_indices = tf.concat(
        concat_dim=0,
        values=[seasonal_indices[:current_season],
                [updated_seasonal_index],
                seasonal_indices[current_season + 1:]])
    # Preserve shape to keep the while_loop shape invariants happy
    new_seasonal_indices.set_shape(seasonal_indices.get_shape())
    return (current_index + 1, new_seasonal_indices, new_smoothed_observation,
            new_trend, new_loss_sum)
  def while_run_condition(current_index, *unused_args):
    return current_index < tf.shape(times)[0]
  (_, final_seasonal_indices, final_smoothed_observation, final_trend,
   sum_squared_errors) = tf.while_loop(
      cond=while_run_condition,
      body=filter_function,
      loop_vars=[0, initial_seasonal_indices, initial_smoothed_observation,
                 initial_trend, 0.])
  normalized_loss = sum_squared_errors / tf.cast(tf.shape(times)[0],
                                                 dtype=tf.float32)
  train_op = tf.contrib.layers.optimize_loss(
      loss=normalized_loss,
      global_step=tf.contrib.framework.get_global_step(),
      learning_rate=0.1,
      optimizer="Adam")
  prediction_times = tf.range(30)
  prediction_values = (
      (final_smoothed_observation + final_trend * tf.cast(prediction_times,
                                                          dtype=tf.float32))
      * tf.cast(tf.gather(params=final_seasonal_indices,
                          indices=prediction_times % seasonality),
                dtype=tf.float32))
  predictions = {"times": prediction_times,
                 "values": prediction_values}
  return predictions, normalized_loss, train_op

# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (
    0.02 * times + np.sin(times * 2 * np.pi / float(seasonality))
    + np.random.normal(size=[series_length], scale=0.2)).astype(np.float32)
# Define an input function to feed the data into our model
input_fn = lambda: ({"times":tf.convert_to_tensor(times, dtype=tf.int32),
                     "values":tf.convert_to_tensor(values, dtype=tf.float32)},
                    {})
# Wrap the model in a tf.learn Estimator for training and inference
estimator = tf.contrib.learn.Estimator(model_fn=model_fn)
estimator.fit(input_fn=input_fn, steps=500)
predictions = estimator.predict(input_fn=input_fn, as_iterable=False)
# Plot the training data and predictions
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + predictions["times"], predictions["values"])
pyplot.show()

(I was using TensorFlow 0.11.0rc0 when writing this)

Output of Holt-Winters on synthetic data: training data followed by predictions.

However, this code will be quite slow when scaling up to longer time series. The issue is that TensorFlow (and most other tools for automatic differentiation) do not have great performance on sequential computations (looping). Usually this is ameliorated by batching data and operating on large chunks. It's somewhat tricky to do for sequential models, since there is state which needs to be passed from one timestep to the next.

A much faster (but maybe less satisfying) approach is to use an autoregressive model. This has the added benefit of being very easy to implement in TensorFlow:

import numpy as np
from matplotlib import pyplot
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

seasonality = 10

# Create a synthetic time series with seasonality, trend, and a little noise
series_length = 50
times = np.arange(series_length, dtype=np.int32)
values = 5. + (0.02 * times + np.sin(times * 2 * np.pi / float(seasonality))
               + np.random.normal(size=[series_length], scale=0.2)).astype(
                   np.float32)
# Parameters for stochastic gradient descent
batch_size = 16
window_size = 10
# Define a column format for the linear regression
input_column = tf.contrib.layers.real_valued_column(column_name="input_window",
                                                    dimension=window_size)
def training_input_fn():
  window_starts = tf.random_uniform(shape=[batch_size], dtype=tf.int32,
                                    maxval=series_length - window_size - 1)
  element_indices = (tf.expand_dims(window_starts, 1)
                     + tf.expand_dims(tf.range(window_size), 0))
  return ({input_column: tf.gather(values, element_indices)},
          tf.gather(values, window_starts + window_size))
estimator = tf.contrib.learn.LinearRegressor(feature_columns=[input_column])
estimator.fit(input_fn=training_input_fn, steps=500)
predictions = list(values[-10:])
def predict_input_fn():
  return ({input_column: tf.reshape(predictions[-10:], [1, 10])}, {})
predict_length = 30
for i in xrange(predict_length):
  prediction = estimator.predict(input_fn=predict_input_fn, as_iterable=False)
  predictions.append(prediction[0])
predictions = predictions[10:]
pyplot.plot(range(series_length), values)
pyplot.plot(series_length + np.arange(predict_length), predictions)
pyplot.show()

Output of the autoregressive model on the same synthetic dataset.

Notice that since the model has no state to keep, we can do mini-batch stochastic gradient descent very easily.

For clustering, something like k-means could work for time series.

Are there any examples of anomaly detection algorithms implemented with TensorFlow?

1 Answers1