0

I'm implementing an auto-encoder for anomaly detection of IoT sensor data. My data set comes from a simulation, but basically it is accelerometer data - three dimensions, one for each axis.

I'm reading it from a CSV file, column 2-4 contain the data - sorry for the code quality, it is quick and dirty:

private static DataSetIterator getTrainingData(int batchSize, Random rand) {
    double[] ix = new double[nSamples];
    double[] iy = new double[nSamples];
    double[] iz = new double[nSamples];
    double[] ox = new double[nSamples];
    double[] oy = new double[nSamples];
    double[] oz = new double[nSamples];
    Reader in;
    try {
        in = new FileReader("/Users/romeokienzler/Downloads/lorenz_healthy.csv");

        Iterable<CSVRecord> records;

        records = CSVFormat.DEFAULT.parse(in);
        int index = 0;
        for (CSVRecord record : records) {
            String[] recordArray = record.get(0).split(";");
            ix[index] = Double.parseDouble(recordArray[1]);
            iy[index] = Double.parseDouble(recordArray[2]);
            iz[index] = Double.parseDouble(recordArray[3]);
            ox[index] = Double.parseDouble(recordArray[1]);
            oy[index] = Double.parseDouble(recordArray[2]);
            oz[index] = Double.parseDouble(recordArray[3]);
            index++;
        }
        INDArray ixNd = Nd4j.create(ix);
        INDArray iyNd = Nd4j.create(iy);
        INDArray izNd = Nd4j.create(iz);
        INDArray oxNd = Nd4j.create(ox);
        INDArray oyNd = Nd4j.create(oy);
        INDArray ozNd = Nd4j.create(oz);
        INDArray iNd = Nd4j.hstack(ixNd, iyNd, izNd);
        INDArray oNd = Nd4j.hstack(oxNd, oyNd, ozNd);
        DataSet dataSet = new DataSet(iNd, oNd);
        List<DataSet> listDs = dataSet.asList();
        Collections.shuffle(listDs, rng);
        return new ListDataSetIterator(listDs, batchSize);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.exit(-1);
        return null;
    }
}

This is the net:

    public static void main(String[] args) {
        // Generate the training data
        DataSetIterator iterator = getTrainingData(batchSize, rng);

        // Create the network
        int numInput = 3;
        int numOutputs = 3;
        int nHidden = 1;
        int listenerFreq = batchSize / 5;

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(seed)
                .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
                .gradientNormalizationThreshold(1.0).iterations(iterations).momentum(0.5)
                .momentumAfter(Collections.singletonMap(3, 0.9))
                .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).list(2)
                .layer(0,
                        new AutoEncoder.Builder().nIn(numInput).nOut(nHidden).weightInit(WeightInit.XAVIER)
                                .lossFunction(LossFunction.RMSE_XENT).corruptionLevel(0.3).build())
                .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD).activation("softmax").nIn(nHidden)
                        .nOut(numOutputs).build())
                .pretrain(true).backprop(false).build();

        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();
        model.setListeners(Collections.singletonList((IterationListener) new ScoreIterationListener(listenerFreq)));

        for (int i = 0; i < nEpochs; i++) {
            iterator.reset();
            model.fit(iterator);
        }

    }

I'm getting the following error: Shapes do not match: x.shape=[1, 9000], y.shape=[1, 3]

Exception in thread "main" java.lang.IllegalArgumentException: Shapes do not match: x.shape=[1, 9000], y.shape=[1, 3]
    at org.nd4j.linalg.api.parallel.tasks.cpu.CPUTaskFactory.getTransformAction(CPUTaskFactory.java:92)
    at org.nd4j.linalg.api.ops.executioner.DefaultOpExecutioner.doTransformOp(DefaultOpExecutioner.java:409)
    at org.nd4j.linalg.api.ops.executioner.DefaultOpExecutioner.exec(DefaultOpExecutioner.java:62)
    at org.nd4j.linalg.api.ndarray.BaseNDArray.subi(BaseNDArray.java:2660)
    at org.nd4j.linalg.api.ndarray.BaseNDArray.subi(BaseNDArray.java:2641)
    at org.nd4j.linalg.api.ndarray.BaseNDArray.sub(BaseNDArray.java:2419)
    at org.deeplearning4j.nn.layers.feedforward.autoencoder.AutoEncoder.computeGradientAndScore(AutoEncoder.java:123)
    at org.deeplearning4j.optimize.solvers.BaseOptimizer.gradientAndScore(BaseOptimizer.java:132)
    at org.deeplearning4j.optimize.solvers.BaseOptimizer.optimize(BaseOptimizer.java:151)
    at org.deeplearning4j.optimize.Solver.optimize(Solver.java:52)
    at org.deeplearning4j.nn.layers.BaseLayer.fit(BaseLayer.java:486)
    at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.pretrain(MultiLayerNetwork.java:170)
    at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.fit(MultiLayerNetwork.java:1134)
    at org.deeplearning4j

.examples.feedforward.autoencoder.AnomalyDetector.main(AnomalyDetector.java:136)

But I'm not defining dimension anywhere and IMHO the dimensions of input and output should be (3,3000) and (3,3000). Where is my mistake?

Thanks a lot in advance...

EDIT: UPDATE to latest release 13.9.16 I'm getting the same error (semantically), here is what I'm doing now:

private static DataSetIterator getTrainingData(int batchSize, Random rand) {
    double[] ix = new double[nSamples];
    double[] iy = new double[nSamples];
    double[] iz = new double[nSamples];
    double[] ox = new double[nSamples];
    double[] oy = new double[nSamples];
    double[] oz = new double[nSamples];
    try {
        RandomAccessFile in = new RandomAccessFile(new File("/Users/romeokienzler/Downloads/lorenz_healthy.csv"),
                "r");
        int index = 0;
        String record;
        while ((record = in.readLine()) != null) {
            String[] recordArray = record.split(";");
            ix[index] = Double.parseDouble(recordArray[1]);
            iy[index] = Double.parseDouble(recordArray[2]);
            iz[index] = Double.parseDouble(recordArray[3]);
            ox[index] = Double.parseDouble(recordArray[1]);
            oy[index] = Double.parseDouble(recordArray[2]);
            oz[index] = Double.parseDouble(recordArray[3]);
            index++;
        }
        INDArray ixNd = Nd4j.create(ix);
        INDArray iyNd = Nd4j.create(iy);
        INDArray izNd = Nd4j.create(iz);
        INDArray oxNd = Nd4j.create(ox);
        INDArray oyNd = Nd4j.create(oy);
        INDArray ozNd = Nd4j.create(oz);
        INDArray iNd = Nd4j.hstack(ixNd, iyNd, izNd);
        INDArray oNd = Nd4j.hstack(oxNd, oyNd, ozNd);
        DataSet dataSet = new DataSet(iNd, oNd);
        List<DataSet> listDs = dataSet.asList();
        Collections.shuffle(listDs, rng);
        return new ListDataSetIterator(listDs, batchSize);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        System.exit(-1);
        return null;
    }
}

And here the net:

// Set up network. 784 in/out (as MNIST images are 28x28).
    // 784 -> 250 -> 10 -> 250 -> 784
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).iterations(1)
            .weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD).activation("relu")
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).learningRate(learningRate)
            .regularization(true).l2(0.0001).list().layer(0, new DenseLayer.Builder().nIn(3).nOut(1).build())
            .layer(1, new OutputLayer.Builder().nIn(1).nOut(3).lossFunction(LossFunctions.LossFunction.MSE).build())
            .pretrain(false).backprop(true).build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.setListeners(Collections.singletonList((IterationListener) new ScoreIterationListener(1)));

    // Load data and split into training and testing sets. 40000 train,
    // 10000 test
    DataSetIterator iter = getTrainingData(batchSize, rng);

    // Train model:
    int nEpochs = 30;
    while (iter.hasNext()) {
        DataSet ds = iter.next();
        for (int epoch = 0; epoch < nEpochs; epoch++) {
            net.fit(ds.getFeatures(), ds.getLabels());
            System.out.println("Epoch " + epoch + " complete");
        }
    }

My error is:

Exception in thread "main" java.lang.IllegalStateException: Mis matched lengths: [9000] != [3]
    at org.nd4j.linalg.util.LinAlgExceptions.assertSameLength(LinAlgExceptions.java:39)
    at org.nd4j.linalg.api.ndarray.BaseNDArray.subi(BaseNDArray.java:2786)
    at org.nd4j.linalg.api.ndarray.BaseNDArray.subi(BaseNDArray.java:2767)
    at org.nd4j.linalg.api.ndarray.BaseNDArray.sub(BaseNDArray.java:2547)
    at org.deeplearning4j.nn.layers.BaseOutputLayer.getGradientsAndDelta(BaseOutputLayer.java:182)
    at org.deeplearning4j.nn.layers.BaseOutputLayer.backpropGradient(BaseOutputLayer.java:161)
    at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.calcBackpropGradients(MultiLayerNetwork.java:1125)
    at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.backprop(MultiLayerNetwork.java:1077)
    at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.computeGradientAndScore(MultiLayerNetwork.java:1817)
    at org.deeplearning4j.optimize.solvers.BaseOptimizer.gradientAndScore(BaseOptimizer.java:152)
    at org.deeplearning4j.optimize.solvers.StochasticGradientDescent.optimize(StochasticGradientDescent.java:54)
    at org.deeplearning4j.optimize.Solver.optimize(Solver.java:51)
    at org.deeplearning4j.nn.multilayer.MultiLayerNetwork.fit(MultiLayerNetwork.java:1445)
    at org.deeplearning4j.examples.feedforward.anomalydetection.IoTAnomalyExample.main(IoTAnomalyExample.java:110)

I'm pretty sure I'm messing up with the training data - the shape of the training data is 3000 rows, 3 columns - same for the target (the very same data because I want to build an autoencoder) - test data can be found here: https://pmqsimulator-romeokienzler-2310.mybluemix.net/data

Any ideas?

Romeo Kienzler
  • 3,373
  • 3
  • 36
  • 58
  • First of all, not sure why you're running rc3.8. We got rid of the java stuff in january...We run on power now (and have for months now) Try upgrading first. It's hard for us to justify supporting a version that's that old now. – Adam Gibson Sep 10 '16 at 12:22

1 Answers1

0

Thanks to Alex Black of Skymind, this is the solution (got the shape wrong)

        INDArray ixNd = Nd4j.create(ix, new int[]{3000,1});
        INDArray iyNd = Nd4j.create(iy, new int[]{3000,1});
        INDArray izNd = Nd4j.create(iz, new int[]{3000,1});
        INDArray oxNd = Nd4j.create(ox, new int[]{3000,1});
        INDArray oyNd = Nd4j.create(oy, new int[]{3000,1});
        INDArray ozNd = Nd4j.create(oz, new int[]{3000,1});
Romeo Kienzler
  • 3,373
  • 3
  • 36
  • 58