0

I'm trying to get 10000 document at a time in mongodb, but i got :

Information :

Code :


package main

import (
    "context"
    "fmt"
    "net/http"
    "os"
    "time"

    "go.mongodb.org/mongo-driver/bson"
    "go.mongodb.org/mongo-driver/mongo"
    "go.mongodb.org/mongo-driver/mongo/options"
)

var database *mongo.Database

func main() {

    ctx, _ := context.WithTimeout(context.Background(), 10*time.Second)
    client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://20.20.20.43:27017"))
    if err != nil {
        panic(err)
    }

    database = client.Database("chat_data")

    chatText := make([]chat, 0)
    now := time.Now().Unix()
    ctx, _ = context.WithTimeout(context.Background(), 30*time.Second)

    // mongodb batch option
    opt := options.Find()
    opt.SetBatchSize(15_000)
    opt.SetAllowPartialResults(false)

    // mongodb filter
    filter := bson.M{"timestamp": bson.M{"$gte": now - 108000}}

    cur, err := database.Collection("chat").Find(ctx, filter, opt)
    if err != nil {
        // fmt.Fprint(w, err)
        fmt.Println(err)
        return
    }
    defer cur.Close(ctx)

    for cur.Next(ctx) {
        var result chat
        err := cur.Decode(&result)
        if err != nil {
            fmt.Println(err)
            continue
        }
        // do something with result....
        // fmt.Println(result)
        chatText = append(chatText, result)
    }
    if err := cur.Err(); err != nil {
        // fmt.Fprint(w, cur.Err())
        fmt.Println(err)
        return
    }

    fmt.Println("done")
    fmt.Println(len(chatText))
}

can i achieve this with mongodb & go driver ?, 30 second timeout are always reached

Edit 1

i try in python (with pymongo) it's only need 0m2.159s to query 36k doc with that filter

2 Answers2

0

Try 7000, if it works try 12000, if it doesn't work try 4000, etc.

Make note of how long these requests take to figure out if the execution time is proportional to batch size.

D. SM
  • 13,584
  • 3
  • 12
  • 21
0

You are querying on just the timestamp field. If you create an index on that collection with the timestamp field first, you should get faster results, and get a free sort in the process.

Joe
  • 25,000
  • 3
  • 22
  • 44