0

How can I use GridFS to store data?

I have stored data content in a document as follows:

Schema:

var mongoose = require("mongoose");
var mongoosePaginate = require('mongoose-paginate');

// Declare schema
var streadatamSchema = new mongoose.Schema({
    user_id: {
        type: String,
        required: true
    },
    title: {
        type: String,
        required: true
    },
    description: {
        type: String,
        required: true
    },
    public_key: {
        type: String
    },
    private_key: {
        type: String
    },
    data: {
        type: Object
    },
    entries_number: {
        type: Number,
        default: 0
    },
    last_entry_at: {
        type: Date
    },
    created_at: {
        type: Date,
        default: Date.now,
        index: 1 // Note 1
    },
});

streamSchema.plugin(mongoosePaginate);

// Export schema
// Model.paginate()
mongoose.model("Stream", streamSchema);

Example stream:

{
    "_id" : ObjectId("57cfeabd8d9cc38d6d25fd60"),
    "user_id" : "579f52bc53d9e8cc14f504da",
    "title" : "Stream 3",
    "description" : "bla bla bla",
    "public_key" : "CxM2jlAaOHvhC3v4GB",
    "private_key" : "cHeELOOnr2x0WCqdo",
    "data" : {
        "particles" : [ 
            "27", 
            "3", 
            "3", 
            "8", 
            "29", 
            "4", 
            "0", 
            "0", 
            "0", 
            "0", 
            "0", 
            "0", 
            "0", 
            "2", 
            "2", 
            "2", 
            "1", 
            "32", 
            "0", 
            "7", 
            "0", 
            "5", 
            "0", 
            "0", 
            "1", 
            "0", 
            "0", 
            "0", 
            "0", 
            "0", 
            "0", 
            "1", 
            "0", 
            "0", 
            "0", 
            "0", 
            "0", 
            "32", 
            "50", 
            "52", 
            "27", 
            "52", 
            "3", 
            "3", 
            "0", 
            "0", 
            "1", 
            "3", 
            "17", 
            "2", 
            "15", 
            "0", 
            "0", 
            "1", 
            "48", 
            "21", 
            "27", 
            "7", 
            "6", 
            "6", 
            "2", 
            "4", 
            "0", 
            "0", 
            "0", 
            "8", 
            "2", 
            "0", 
            "0", 
            "3"
        ],
        "timestamp" : [ 
            1473244226641.0, 
            1473244254890.0, 
            1473244283134.0, 
            1473244311293.0, 
            1473244339536.0, 
            1473244420579.0, 
            1473246125416.0, 
            1473246153736.0, 
            1473246182281.0, 
            1473246210171.0, 
            1473246238506.0, 
            1473246266681.0, 
            1473246294915.0, 
            1473246325204.0, 
            1473246351579.0, 
            1473246379670.0, 
            1473246408000.0, 
            1473246436252.0, 
            1473246464504.0, 
            1473246492743.0, 
            1473246520906.0, 
            1473246549158.0, 
            1473246577414.0, 
            1473246605652.0, 
            1473246633917.0, 
            1473246695549.0, 
            1473246723868.0, 
            1473246752127.0, 
            1473246780382.0, 
            1473246808543.0, 
            1473246836795.0, 
            1473246865028.0, 
            1473246893295.0, 
            1473246921625.0, 
            1473246949790.0, 
            1473246978115.0, 
            1473247006374.0, 
            1473247034712.0, 
            1473247062773.0, 
            1473247091109.0, 
            1473247119278.0, 
            1473247147609.0, 
            1473247175787.0, 
            1473247204099.0, 
            1473247232287.0, 
            1473247260531.0, 
            1473247288785.0, 
            1473247346870.0, 
            1473247375027.0, 
            1473247414220.0, 
            1473247442496.0, 
            1473247470722.0, 
            1473247498963.0, 
            1473247527122.0, 
            1473247555416.0, 
            1473247583645.0, 
            1473247611975.0, 
            1473247640211.0, 
            1473247668447.0, 
            1473247696712.0, 
            1473247724866.0, 
            1473247753121.0, 
            1473247781412.0, 
            1473247809628.0, 
            1473247837876.0, 
            1473247866137.0, 
            1473247894452.0, 
            1473247922612.0, 
            1473247950961.0, 
            1473247979195.0
        ]
    },
    "created_at" : ISODate("2016-09-07T10:23:57.692Z"),
    "entries_number" : 70,
    "species" : [ 
        {
            "public_name" : "CO2",
            "code_name" : "particles"
        }
    ],
    "__v" : 0,
    "last_entry_at" : ISODate("2016-09-07T11:32:59.195Z")
}

As you can see that the data inside the data can just keep going forever and eventually exceeding the limit - 16MB.

So, how can I extend the data in data with GridFS? Is this possible?

I am using mongoose to store and read data. There are a couple of GridFS packages work with mongooes, gridfs-stream and mongoose-gridfs, but I just have no idea how to integrate them with my existing code.

This is how I inject data into the data field:

var express = require('express');
var router = express.Router();

// Import dependencies.
var mongoose = require("mongoose");

// Import the User schema and the authentication middleware.
var Stream = mongoose.model("Stream");

// GET or POST request to push data to a stream.
// @format:
// http://127.0.0.1:8080/input/<public_key>?private_key=<private_key>&field1=<value>&field2=<value>
// @example:
// http://127.0.0.1:3000/input/ksdoLOZ99qpdL9?private_key=PSsoE6nXcHN7&particles=1.2
router.get("/", log);
router.get("/:publicKey", log);
router.post("/:publicKey", log);

function log (req, res) {

    // Get values from request arguments
    var publicKey = req.params.publicKey;

    // The private key might come in the header or as a GET var depending on the method used for sending data.
    var privateKey = req.headers['stream-private-key'] ? req.headers['stream-private-key'] : req.query.private_key;

    // Strip out cruft
    delete req.query.private_key;

    var data = {};

    if (req.method === 'GET') {
        data = req.query;
    }

    if (req.method === 'POST') {
        data = req.body;
    }

    // Check for public key
    if (!publicKey) {
        res.set('Content-Type', 'application/json');
        return res.status(404).send('stream not found');
    }

    // Check for private key
    if (!privateKey) {
        res.set('Content-Type', 'application/json');
        return res.status(403).send('forbidden: missing private key');
    }

    // Make sure they sent some data.
    // Check if the array object is empty then don't update the model.
    if (Object.keys(data).length === 0 && data.constructor === Object) {
        res.set('Content-Type', 'application/json');
        return res.status(200).send('no data received');
    }

    var updateQuery = {};data

    // Send status code for each case : -1 if error, 0 if no stream found and 1 if update successful
    Stream.findOne({
        public_key:publicKey,
        private_key:privateKey
    }, function(err, stream) {

        if (err) {
            console.log("Error retrieving stream: " + err);
            return res.sendStatus(500); 
        }

        if (stream === null) {
            console.log("Either no stream was found for this API key: " + privateKey + " or the stream doesn't have any variables set");
            res.set('Content-Type', 'application/json');
            return res.status(200).send('stream not found');
        }

        // Make sure the stream data (object) has keys in
        if (!Object.keys(stream.data).length > 0) {
            res.set('Content-Type', 'application/json');
            return res.status(200).send('update failed');
        }

        // Build $push query with variables passed in POST request.
        // We check that the variable have already been registered otherwise they"ll be ignored.
        for (var property in stream.data) {
            if (data.hasOwnProperty(property) && stream.data.hasOwnProperty(property)) {
                updateQuery["data." + property] = data[property];
            } else {
                updateQuery["data." + property] = null;
            }
        }

        // Current timestamp.
        var timestamp = Date.now();

        // Insert date data.
        updateQuery["data.timestamp"] = timestamp;

        // Update stream with new values and increment entries_number
        stream.update({
            $push: updateQuery,
            $inc: {entries_number: 1},
            last_entry_at: timestamp
        }, function(err, streamID) {

            if (err) {
                console.log("Error updating stream: " + err);
                return res.sendStatus(-1);
            }

            console.log("New entry for stream with API key: " + privateKey);

            res.set('Content-Type', 'application/json');
            return res.status(200).send('success 1');
        });
    });
};

Any ideas how I can integrate mongoose-gridfs or gridfs-stream with my code above?

Run
  • 54,938
  • 169
  • 450
  • 748
  • 1
    I believe you misunderstand the concept of GridFS. Please read https://docs.mongodb.com/manual/core/gridfs/ It works with **files** not documents. Neither it adds any magic to override bson limits. It just splits a file of arbitrary size to small chunks, and manages them. It still uses 2 normal collections with predefined schema in the background. – Alex Blex Mar 24 '17 at 09:13
  • @AlexBlex so what should do with my problem above? Should I use GridFS or not? – Run Mar 24 '17 at 09:42
  • 1
    Apparently the schema with arrays you are using does not fit for purpose, and should be changed. e.g. you can create a `data` collection to store `{particle, timestamp, streamId}` documents, but it really depends on use case. – Alex Blex Mar 24 '17 at 09:51
  • @AlexBlex thanks. `create a data collection to store {particle, timestamp, streamId} documents` is what I am thinking now. but will the document in this new collection exceed 16MB one day? If so, I will be falling back into the same issue. – Run Mar 24 '17 at 09:56
  • btw I cannot create a new schema with `{particle, timestamp, streamId}` bcos `particle` is dynamic - it is called something else in other streams. and it has more than one `particle` in other streams/ documents. – Run Mar 24 '17 at 10:06
  • 1
    That's exactly what I ma saying - it depends on your case. Can't advise anything specific, having just a single document. Please take my example only as an example of how the document can be refactored without any context. You might be good with GridFS, if fits into your usecase, but it is quite an architectural decision, that should be based on good understanding of your business. – Alex Blex Mar 24 '17 at 10:12
  • @AlexBlex thanks for the advise! – Run Mar 24 '17 at 10:17

0 Answers0