1

We have a big database. We collect newsletters and I want to make a trending page. The goal is to make the page realtime and fast! We want to display trending newsletters from the past 2 hours, 4 hours, 24 hours, past week, and past month.

I've worked with MongoDB for a while and I try to keep things simple. I want a new collection, trending, that stores the visitors of the newsletter pages in a time bucket. On every visit, I want to add the information of the newsletter to the object that holds the trending newsletters for that time and $inc the hits field for statistics.

My objects are:

{ 
    "_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b39"), 
    "bucket" : "last2hours", 
    "language" : "nl", 
    "time" : "2019-08-08_00", 
    "newsletters" : {
        "5d4b29ba8ddf870fe15628c7" : {
            "_id" : ObjectId("5d4b29ba8ddf870fe15628c7"), 
            "_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015", 
            "subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015", 
            "date" : ISODate("2015-12-04T13:15:03.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("557ebcc54c79597761fd71c2"), 
                "_slug" : "dalstra-nl", 
                "name" : "dalstra.nl", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29af8ddf870fe15624ba" : {
            "_id" : ObjectId("5d4b29af8ddf870fe15624ba"), 
            "_slug" : "the-carolina-weddings-show", 
            "subject" : "The Carolina Weddings Show", 
            "date" : ISODate("2015-12-04T13:13:54.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("503b950fffa67e2c790007d7"), 
                "_slug" : "livingsocialcom", 
                "name" : "Livingsocial.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29ad8ddf870fe15623f4" : {
            "_id" : ObjectId("5d4b29ad8ddf870fe15623f4"), 
            "_slug" : "newport-gangster-tour", 
            "subject" : "Newport Gangster Tour", 
            "date" : ISODate("2015-12-04T13:13:22.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("503b950fffa67e2c790007d7"), 
                "_slug" : "livingsocialcom", 
                "name" : "Livingsocial.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29bb8ddf870fe15628f3" : {
            "_id" : ObjectId("5d4b29bb8ddf870fe15628f3"), 
            "_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more", 
            "subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.", 
            "date" : ISODate("2015-12-04T13:15:05.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("5581f0b54c7959e82bfd71c2"), 
                "_slug" : "springwise-com", 
                "name" : "springwise.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(2)
        }
    }
}
{ 
    "_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b3b"), 
    "bucket" : "last2hours", 
    "language" : "nl", 
    "time" : "2019-08-08_01", 
    "newsletters" : {
        "5d4b29ba8ddf870fe15628c7" : {
            "_id" : ObjectId("5d4b29ba8ddf870fe15628c7"), 
            "_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015", 
            "subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015", 
            "date" : ISODate("2015-12-04T13:15:03.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("557ebcc54c79597761fd71c2"), 
                "_slug" : "dalstra-nl", 
                "name" : "dalstra.nl", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29af8ddf870fe15624ba" : {
            "_id" : ObjectId("5d4b29af8ddf870fe15624ba"), 
            "_slug" : "the-carolina-weddings-show", 
            "subject" : "The Carolina Weddings Show", 
            "date" : ISODate("2015-12-04T13:13:54.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("503b950fffa67e2c790007d7"), 
                "_slug" : "livingsocialcom", 
                "name" : "Livingsocial.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29ad8ddf870fe15623f4" : {
            "_id" : ObjectId("5d4b29ad8ddf870fe15623f4"), 
            "_slug" : "newport-gangster-tour", 
            "subject" : "Newport Gangster Tour", 
            "date" : ISODate("2015-12-04T13:13:22.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("503b950fffa67e2c790007d7"), 
                "_slug" : "livingsocialcom", 
                "name" : "Livingsocial.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29bb8ddf870fe15628f3" : {
            "_id" : ObjectId("5d4b29bb8ddf870fe15628f3"), 
            "_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more", 
            "subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.", 
            "date" : ISODate("2015-12-04T13:15:05.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("5581f0b54c7959e82bfd71c2"), 
                "_slug" : "springwise-com", 
                "name" : "springwise.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(2)
        }
    }
}
{ 
    "_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b3d"), 
    "bucket" : "last4hours", 
    "language" : "nl", 
    "time" : "2019-08-08_00", 
    "newsletters" : {
        "5d4b29ba8ddf870fe15628c7" : {
            "_id" : ObjectId("5d4b29ba8ddf870fe15628c7"), 
            "_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015", 
            "subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015", 
            "date" : ISODate("2015-12-04T13:15:03.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("557ebcc54c79597761fd71c2"), 
                "_slug" : "dalstra-nl", 
                "name" : "dalstra.nl", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29af8ddf870fe15624ba" : {
            "_id" : ObjectId("5d4b29af8ddf870fe15624ba"), 
            "_slug" : "the-carolina-weddings-show", 
            "subject" : "The Carolina Weddings Show", 
            "date" : ISODate("2015-12-04T13:13:54.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("503b950fffa67e2c790007d7"), 
                "_slug" : "livingsocialcom", 
                "name" : "Livingsocial.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29ad8ddf870fe15623f4" : {
            "_id" : ObjectId("5d4b29ad8ddf870fe15623f4"), 
            "_slug" : "newport-gangster-tour", 
            "subject" : "Newport Gangster Tour", 
            "date" : ISODate("2015-12-04T13:13:22.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("503b950fffa67e2c790007d7"), 
                "_slug" : "livingsocialcom", 
                "name" : "Livingsocial.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(1)
        }, 
        "5d4b29bb8ddf870fe15628f3" : {
            "_id" : ObjectId("5d4b29bb8ddf870fe15628f3"), 
            "_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more", 
            "subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.", 
            "date" : ISODate("2015-12-04T13:15:05.000+0000"), 
            "publisher" : {
                "_id" : ObjectId("5581f0b54c7959e82bfd71c2"), 
                "_slug" : "springwise-com", 
                "name" : "springwise.com", 
                "taal" : "nl"
            }, 
            "hits" : NumberInt(2)
        }
    }
}

The goal here is to only have to query the bucket language time. So, if I want to see the trending newsletters of the last 2 hours, I query {bucket: 'last2hours', language: 'nl', time: '2019-08-08_00'}, then I have all the information I need. No need for aggregation. This findOne query is fast.

So i made a method to update the trending collection:

public function setNewsletterTrendingStatistics($newsletter){

            // Buckets
            $trend_buckets = array(
                'last2hours'    => array('steps' => 2,  'step'=>'hour', 'format'=> 'Y-m-d_H'),
                'last4hours'    => array('steps' => 4,  'step'=>'hour', 'format'=> 'Y-m-d_H' ), 
                'last1day'      => array('steps' => 24, 'step'=>'hour', 'format'=> 'Y-m-d_H' ), 
                'lastweek'      => array('steps' => 7,  'step'=>'day',  'format'=> 'Y-m-d' ), 
                'lastmonth'     => array('steps' => 31, 'step'=>'day',  'format'=> 'Y-m-d' ), 
            );
            // $newsletter['date']->toDateTime()->format('U')
            $buckets = array();
            foreach($trend_buckets AS $bucket => $settings){

                for($i=0; $i<$settings['steps']; $i++){
                    $buckets[] = array(
                        'bucket'    => $bucket,
                        'time'      => date($settings['format'], strtotime('+'. $i . ' ' . $settings['step'])),
                        'language'  => $newsletter['publisher']['taal'],
                    );
                }

            }


            // Add the stats to each bucket

            foreach($buckets AS $bucket){
                $query      = array();
                $query      = $bucket;
                $update     =   array(
                                    '$set' => array(
                                        'newsletters.' . (string) $newsletter['_id'] .  '._id'      => $newsletter['_id'],
                                        'newsletters.' . (string) $newsletter['_id'] .  '._slug'    => $newsletter['_slug'],
                                        'newsletters.' . (string) $newsletter['_id'] .  '.subject'  => $newsletter['subject'],
                                        'newsletters.' . (string) $newsletter['_id'] .  '.date'         => $newsletter['date'],
                                        'newsletters.' . (string) $newsletter['_id'] .  '.publisher'    => array(
                                            '_id'       => $newsletter['publisher']['_id'],
                                            '_slug'     => $newsletter['publisher']['_slug'],
                                            'name'      => $newsletter['publisher']['name'],
                                            'taal'      => $newsletter['publisher']['taal'],
                                        ),
                                    ),
                                    '$inc' => array(
                                        'newsletters.' . (string) $newsletter['_id'] .  '.hits' => 1
                                    ),

                                );

                $options    =   array('upsert'=>true);
                $this->FW->mdb->{$this->config['collections']['newsletters_trending']}->updateOne($query, $update, $options);




            }

        }

First of all, is this a good approach? Is there a better approach? Second, I want to count unique hits, so I need to save an IP address. I want to count unique hits on the update query so I don't have to count on the findOne query. Whats the best way to achieve this? I know I can use addtoset for a unique array with IP addresses. But then I need to count these unique IP addresses.

Himanshu padia
  • 7,428
  • 1
  • 47
  • 45
Mad
  • 71
  • 1
  • 8

1 Answers1

0

So i ended up doing this:

I made buckets for each trending container (last 2 hours, last 4 hours, today, last week, last month) for every hour.

I fill this containers on every pageview with an update query $inc 1.

Every hour a cronjob combines these stats. so 2 hours fill 4 hours 4 hours fill today etc.

This seems like the best approche and are live stats.

Mad
  • 71
  • 1
  • 8