1

I have a nestJS application with KafkaJS deployed in an EKS cluster. I am using Amazon MSK as my kafka cluster with three brokers and SASL SCRAM SHA512 Authentication. The Kafka cluster has default configuration.

Here is my consumer client:

@Global()
@Module({
  imports: [ConfigModule.forRoot()],
  providers: [
    {
      provide: '<application-name>',
      useFactory: () => {
        let KAFKA_CLIENT: any = {
          clientId: appConstants.KAFKA_CLIENT_ID,
          brokers: process.env.KAFKA_BROKER_URL.split(','),
          connectionTimeout: process.env.KAFKA_CONNECTION_TIMEOUT
            ? Number(process.env.KAFKA_CONNECTION_TIMEOUT)
            : 30000,
          requestTimeout: process.env.KAFKA_REQUEST_TIMEOUT ? Number(process.env.KAFKA_REQUEST_TIMEOUT) : 25000,
          logLevel: logLevel.DEBUG
        };
        if (process.env.KAFKA_SSL === 'true') {
          KAFKA_CLIENT = {
            clientId: appConstants.KAFKA_CLIENT_ID,
            brokers: process.env.KAFKA_BROKER_URL.split(','),
            ssl: process.env.KAFKA_SSL === 'true',
            connectionTimeout: process.env.KAFKA_CONNECTION_TIMEOUT
              ? Number(process.env.KAFKA_CONNECTION_TIMEOUT)
              : 30000,
            requestTimeout: process.env.KAFKA_REQUEST_TIMEOUT ? Number(process.env.KAFKA_REQUEST_TIMEOUT) : 25000,
            logLevel: logLevel.DEBUG,
            sasl: {
              mechanism: process.env.KAFKA_SASL_MECHANISM || 'scram-sha-512',
              username: process.env.KAFKA_USER,
              password: process.env.KAFKA_PASSWORD
            }
          };
        }
        return ClientProxyFactory.create({
          transport: Transport.KAFKA,
          options: {
            client: KAFKA_CLIENT,
            consumer: {
              groupId: appConstants.KAFKA_GROUP_ID
            },
            subscribe: {
              fromBeginning: appConstants.KAFKA_FROM_BEGINNING
            }
          }
        });
      }
    }
  ],
  exports: ['<app-name>']
})
export class KafkaModule {}

debug logs with redacted broker URLs/client name/group name:

[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Request SaslHandshake(key: 17, version: 1) {"timestamp":"2023-08-26T17:38:52.308Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":0,"expectResponse":true,"size":63}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Response SaslHandshake(key: 17, version: 1) {"timestamp":"2023-08-26T17:38:52.308Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":0,"size":25,"data":{"errorCode":0,"enabledMechanisms":["SCRAM-SHA-512"]}}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [SaslAuthenticator-SCRAM-SHA-512] Exchanging first client message {"timestamp":"2023-08-26T17:38:52.308Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>"}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Request SaslAuthenticate(key: 36, version: 1) {"timestamp":"2023-08-26T17:38:52.308Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":1,"expectResponse":true,"size":100}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Response SaslAuthenticate(key: 36, version: 1) {"timestamp":"2023-08-26T17:38:52.309Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":1,"size":116,"data":{"errorCode":0,"errorMessage":"","authBytes":{"type":"Buffer","data":[<REMOVED>]},"sessionLifetimeMs":"0"}}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [SaslAuthenticator-SCRAM-SHA-512] Sending final message {"timestamp":"2023-08-26T17:38:52.309Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>"}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Request SaslAuthenticate(key: 36, version: 1) {"timestamp":"2023-08-26T17:38:52.312Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":2,"expectResponse":true,"size":200}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Response SaslAuthenticate(key: 36, version: 1) {"timestamp":"2023-08-26T17:38:52.313Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":2,"size":110,"data":{"errorCode":0,"errorMessage":"","authBytes":{"type":"Buffer","data":[<REMOVED>]},"sessionLifetimeMs":"0"}}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [SaslAuthenticator-SCRAM-SHA-512] SASL SCRAM SHA512 authentication successful {"timestamp":"2023-08-26T17:38:52.315Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>"}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Request GroupCoordinator(key: 10, version: 2) {"timestamp":"2023-08-26T17:38:52.316Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":3,"expectResponse":true,"size":83}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Response GroupCoordinator(key: 10, version: 2) {"timestamp":"2023-08-26T17:38:52.316Z","logger":"kafkajs","broker":"<<BROKER_2_URL>>","clientId":"<CLIENT_NAME>","correlationId":3,"size":91,"data":{"throttleTime":0,"errorCode":0,"errorMessage":"NONE","coordinator":{"nodeId":3,"host":"<BROKER3_URL>","port":9096},"clientSideThrottleTime":0}}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Cluster] Found group coordinator {"timestamp":"2023-08-26T17:38:52.316Z","logger":"kafkajs","nodeId":3}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Request JoinGroup(key: 11, version: 5) {"timestamp":"2023-08-26T17:38:52.316Z","logger":"kafkajs","broker":"<BROKER_3_URL>","clientId":"<CLIENT_NAME>","correlationId":11,"expectResponse":true,"size":175}
[Nest] 8  - 08/26/2023, 5:38:52 PM   DEBUG [ClientKafka] DEBUG [Connection] Response JoinGroup(key: 11, version: 5) {"timestamp":"2023-08-26T17:38:52.317Z","logger":"kafkajs","broker":"<BROKER_3_URL>","clientId":"<CLIENT_NAME>","error":"This is not the correct coordinator for this group","correlationId":11,"payload":{"type":"Buffer","data":"[filtered]"}}
[Nest] 8  - 08/26/2023, 5:38:52 PM   ERROR [ClientKafka] ERROR [Consumer] Crash: KafkaJSNumberOfRetriesExceeded: This is not the correct coordinator for this group {"timestamp":"2023-08-26T17:38:52.317Z","logger":"kafkajs","groupId":"<GROUP_NAME>","retryCount":5,"stack":"KafkaJSNonRetriableError\n  Caused by: KafkaJSError: This is not the correct coordinator for this group\n    at /app/node_modules/kafkajs/src/consumer/consumerGroup.js:361:17\n    at runMicrotasks (<anonymous>)\n    at processTicksAndRejections (node:internal/process/task_queues:96:5)\n    at async Runner.start (/app/node_modules/kafkajs/src/consumer/runner.js:84:7)\n    at async start (/app/node_modules/kafkajs/src/consumer/index.js:243:7)"}

The same application without any changes in the Kafka client and cluster, is working without any errors. I have also deployed this to an ec2 server in the same network and it is working smoothly.

All the things I have tried to fix the issue:

Increased the number of retries. Changed the client name and consumer group name.

I have reached out to AWS Support and they do not think clearing the zookeeper logs is going to help.

Sampath
  • 11
  • 2
  • the client has required permissions on the kafka cluster, to produce, consume, describe and describe_configs on group name, cluster. For the same group name, consumer and producer using kafka cmdline works with out errors. – Sampath Aug 26 '23 at 17:56
  • The group coordinator is managed by the brokers, therefore Zookeeper operations indeed wouldn't help. Perhaps you should be using IAM connection for msk? – OneCricketeer Aug 27 '23 at 13:35

0 Answers0