1

I have a code which is successfully connected and consume messages from RabbitMQ. But after a while, consumers cannot receive messages however its connected while this issue happen.

package rabbitmq

import (
    "context"
    "fmt"
    "os"
    "runtime"
    "time"

    "github.com/getsentry/sentry-go"
    log "github.com/sirupsen/logrus"
    "github.com/streadway/amqp"
)

type RabbitMQ struct {
    conn             *amqp.Connection
    queues           map[string]amqp.Queue
    connString       string
    rabbitCloseError chan *amqp.Error
    recoveryConsumer []RecoveryConsumer
    // ch         *amqp.Channel
    // exchange_name string
}

type RecoveryConsumer struct {
    queueName   string
    routingKey  string
    handler     func(d amqp.Delivery)
    concurrency int8
}

type (
    Delivery = amqp.Delivery
)

func (r *RabbitMQ) IfExist(queueName string) bool {
    for _, item := range r.recoveryConsumer {
        if item.queueName == queueName {
            return false
        }
    }
    return true
}

func (r *RabbitMQ) RecoverConsumers() {
    for _, i := range r.recoveryConsumer {
        go r.StartConsumer(i.queueName, i.routingKey, i.handler, int(i.concurrency))
        log.Infof("Consumer for %v successfully recovered", i.queueName)
    }
}

func (r *RabbitMQ) Reconnector() {
    for { //nolint
        select {
        case err := <-r.rabbitCloseError:
            log.Errorf("[RabbitMQ] Connection Closed : {'Reason': '%v', 'Code': '%v', 'Recoverable': '%v', 'Server_Side': '%v'", err.Reason, err.Code, err.Recover, err.Server)
            log.Debug("Reconnecting after connection closed")
            sentry.CaptureException(fmt.Errorf("[RabbitMQ] Connection Closed : {'Reason': '%v', 'Code': '%v', 'Recoverable': '%v', 'Server_Side': '%v'", err.Reason, err.Code, err.Recover, err.Server))
            r.connection()
            r.RecoverConsumers()
        }
    }
}

func (r *RabbitMQ) Connect(host string, user string, pass string, virthost string) {
    r.connString = "amqp://" + user + ":" + pass + "@" + host + "/"
    if virthost != "/" || len(virthost) > 0 {
        r.connString += virthost
    }
    r.connection()
    go r.Reconnector()
}

func (r *RabbitMQ) connection() {
    if r.conn != nil {
        if !r.conn.IsClosed() {
            return
        } else {
            log.Info("Reconnecting to RabbitMQ...")
        }
    }

    var err error
    r.conn, err = amqp.Dial(r.connString)
    if err != nil {
        sentry.CaptureException(err)
        log.Fatalf("%s: %s", "Failed to connect to RabbitMQ", err)
    }
    r.conn.Config.Heartbeat = 5 * time.Second
    r.queues = make(map[string]amqp.Queue)

    r.rabbitCloseError = make(chan *amqp.Error)
    r.conn.NotifyClose(r.rabbitCloseError)
    log.Debug("[RabbitMQ] Successfully connected to RabbitMQ")
    log.Infof("Number of Active Thread/Goroutine %v", runtime.NumGoroutine())
}

func (r *RabbitMQ) CreateChannel() *amqp.Channel {
    ch, err := r.conn.Channel()
    if err != nil {
        log.Error(err)
        return nil
    }
    return ch
}

func (r *RabbitMQ) QueueAttach(ch *amqp.Channel, name string) {
    q, err := ch.QueueDeclare(
        name,  // name
        true,  // durable
        false, // delete when unused
        false, // exclusive
        false, // no-wait
        nil,   // arguments
    )
    if err != nil {
        log.Fatalf("%s: %s", "Failed to declare a queue", err)
    }
    r.queues[name] = q
    // r.ch.ExchangeDeclare()
}

func (r *RabbitMQ) TempQueueAttach(ch *amqp.Channel, name string) {
    _, err := ch.QueueDeclare(
        name,  // name
        true,  // durable
        false, // delete when unused
        false, // exclusive
        false, // no-wait
        nil,   // arguments
    )
    if err != nil {
        ch.Close()
        log.Fatalf("%s: %s", "Failed to declare a temporary queue", err)
        sentry.CaptureException(fmt.Errorf("%s: %s", "Failed consume message", err))
    }
}

func (r *RabbitMQ) Publish(ch *amqp.Channel, queue string, body []byte) {
    span := sentry.StartSpan(context.TODO(), "publish message")
    defer span.Finish()
    err := ch.Publish(
        "",                   // exchange
        r.queues[queue].Name, // routing key
        false,                // mandatory
        false,                // immediate
        amqp.Publishing{
            Headers:         map[string]interface{}{},
            ContentType:     "application/json",
            ContentEncoding: "",
            DeliveryMode:    amqp.Persistent,
            Priority:        0,
            CorrelationId:   "",
            ReplyTo:         "",
            Expiration:      "",
            MessageId:       "",
            Timestamp:       time.Now().UTC(),
            Type:            "",
            UserId:          "",
            AppId:           "",
            Body:            body,
        })
    if err != nil {
        sentry.CaptureException(err)
        log.Fatalf("%s: %s", "Failed to publish a message", err)
    }

    log.Debugf("Send message: %s", string(body))
}

func (r *RabbitMQ) StartConsumer(queueName string, routingKey string, handler func(d amqp.Delivery), concurrency int) {
    // prefetch 4x as many messages as we can handle at once
    ok := r.IfExist(queueName)
    if ok {
        r.recoveryConsumer = append(r.recoveryConsumer, RecoveryConsumer{
            queueName:   queueName,
            routingKey:  routingKey,
            handler:     handler,
            concurrency: int8(concurrency),
        })
    }

    ch, err := r.conn.Channel()
    if err != nil {
        log.Error(err)
    }
    prefetchCount := concurrency * 1
    err = ch.Qos(prefetchCount, 0, false)
    if err != nil {
        sentry.CaptureException(err)
        log.Errorf("%s: %s", "Failed QOS", err)
    }
    r.QueueAttach(ch, queueName)

    msgs, err := ch.Consume(
        queueName, // queue
        "",        // consumer
        true,      // auto-ack
        false,     // exclusive
        false,     // no-local
        false,     // no-wait
        nil,       // args
    )
    if err != nil {
        sentry.CaptureException(err)
        log.Fatalf("%s: %s", "Failed consume message", err)
        sentry.CaptureException(fmt.Errorf("%s: %s", "Failed consume message", err))
        os.Exit(1)
    }

    go func() {
        for msg := range msgs {
            handler(msg)
        }
    }()
}

func (r *RabbitMQ) WaitMessage(ch *amqp.Channel, queueName string, timeout time.Duration) []byte {
    st := time.Now()
    for time.Since(st).Seconds() < 1 {
        msg, ok, err := ch.Get(queueName, true)
        if err != nil {
            log.Errorf("Can't consume queue. Error: %s", err.Error())
            sentry.CaptureException(err)
            return nil
        }
        if ok {
            return msg.Body
        }
        time.Sleep(50 * time.Millisecond)
    }
    return nil
}

What can be reason for this ? I know that it should be in Rabbit side, but client library cannot shows any error.....

Because after start to work, consume continue to listen and work successfully.

GurbaniX
  • 187
  • 2
  • 12
  • Interesting issue ..... – GurbaniX Sep 04 '22 at 17:56
  • 1
    To improve your chances of getting an answer I'd suggest providing a [minimal, reproducible, example](https://stackoverflow.com/help/minimal-reproducible-example). This can be hard to do but currently you have provided a lot of code, which may not even contain the issue (i.e. what you are seeing could be due to `handler(msg)` blocking). As a minimum I'd suggest adding a logging to narrow down the cause. – Brits Sep 04 '22 at 20:21

1 Answers1

0

The only thing I can advice you to try is to use heartbeats. This will detect whether a connection is dead due to for example a network fail.

You can take a look on it here: https://www.rabbitmq.com/heartbeats.html.

I'm not sure about this one, long time since I used it, but if you put a try catch around the receiving messages bit, it might show up in the catch when the connection dies.

I hope this helps you a little for solving your problem.

Jordy
  • 320
  • 2
  • 15
  • Connection isn't closed, when I checked RabbitMQ Consuming side, its ok I can see consumers for queue. – GurbaniX Sep 06 '22 at 20:06
  • @GurbaniX And there are no crashes/errors within the application? Have you tried it on multiple networks? – Jordy Sep 08 '22 at 07:26
  • there is no crash/error.Its some problem on thread that stuck.But cannot found where. – GurbaniX Sep 23 '22 at 08:22
  • If you walk through it using breakpoints, can you see where it gets stuck? – Jordy Sep 26 '22 at 09:03
  • Added context to database operations which executed by goroutine. It seems its because of database operation cannot complete and that happens goroutine hangs up. – GurbaniX Dec 30 '22 at 12:42