0

I am slowly working my way into the world of docker compose. I would like to create a data pipeline. I think something is not working with my connector?! The CSV I send later to Kafka is read in. But then the data from the connector is not sent to the HDFS?! I think I have something wrong with the connector?

version: '3.9'

services:

db: image: postgres:9.6

container_name: kong-db
environment:
  POSTGRES_USER: kong
  POSTGRES_DB: kong
  POSTGRES_PASSWORD: kongpass  
restart: on-failure
volumes:
  - kong-data:/var/lib/postgresql/data      
healthcheck:
  test: ["CMD-SHELL", "pg_isready -U kong"]
  interval: 30s
  timeout: 10s
  retries: 3    

kong-migrations:

image: kong
container_name: kong-migration    
environment:
  KONG_DATABASE: postgres
  KONG_PG_HOST: db
  KONG_PG_PASSWORD: kongpass
command: kong migrations bootstrap
restart: on-failure
depends_on:
  - db

kong-gateway:

image: kong
container_name: kong-gateway
environment:
  KONG_DATABASE: postgres
  KONG_PG_HOST: db
  KONG_PG_USER: kong
  KONG_PG_PASSWORD: kongpass
  KONG_PROXY_ACCESS_LOG: /dev/stdout
  KONG_ADMIN_ACCESS_LOG: /dev/stdout
  KONG_PROXY_ERROR_LOG: /dev/stderr
  KONG_ADMIN_ERROR_LOG: /dev/stderr
  KONG_ADMIN_LISTEN: 0.0.0.0:8001
  KONG_ADMIN_GUI_URL: http://localhost:8002
restart: on-failure
ports:
  - "8000:8000"
  - "8443:8443"
  - "8001:8001"
  - "8444:8444"
  - "8002:8002"
  - "8445:8445"
  - "8003:8003"
  - "8004:8004"
depends_on:
  - kong-migrations
healthcheck:
  test: ["CMD-SHELL", "curl -s -o /dev/null http://localhost:8001/status || echo 'error'"]
  interval: 30s
  timeout: 10s
  retries: 3      
 

kong-dashboard:

image: pantsel/konga
container_name: konga-dashboard
ports:
  - 1337:1337
restart: on-failure     
depends_on:
  - kong-gateway
  - db
healthcheck:
  test: ["CMD-SHELL", "nc -z -w 1 localhost 1337"]
  interval: 30s
  timeout: 10s
  retries: 3
# under: curl -fL -o NUL -w "%{http_code}" http://localhost:1337/register must be http_code: 200 !

zookeeper:

image: confluentinc/cp-zookeeper:latest
environment:
  ZOOKEEPER_CLIENT_PORT: 2181
  ZOOKEEPER_TICK_TIME: 2000
ports:
  - 22181:2181
healthcheck:
  test: ["CMD", "nc", "-vz", "localhost", "2181"]
  interval: 30s
  timeout: 10s
  retries: 3  

kafka: image: confluentinc/cp-kafka:latest

ports:
  - 29092:29092
  
environment:
  KAFKA_BROKER_ID: 1
  KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
  KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
  KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
  KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
  KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
  KAFKA_CREATE_TOPICS: csv-topic:1:1
  #'csv-topic:1:1'
  
depends_on:

  - zookeeper
healthcheck:
  test: ["CMD", "nc", "-vz", "localhost", "9092"]
  interval: 30s
  timeout: 10s
  retries: 3

kafka-hdfs-connector:

image: confluentinc/cp-kafka-connect:latest
container_name: kafka-hdfs-connector
environment:
  CONNECT_BOOTSTRAP_SERVERS: kafka:9092
  CONNECT_REST_PORT: 8083
  CONNECT_GROUP_ID: connect-cluster
  CONNECT_CONFIG_STORAGE_TOPIC: connect-configs
  CONNECT_OFFSET_STORAGE_TOPIC: connect-offsets
  CONNECT_STATUS_STORAGE_TOPIC: connect-statuses
  CONNECT_KEY_CONVERTER: org.apache.kafka.connect.json.JsonConverter
  CONNECT_VALUE_CONVERTER: org.apache.kafka.connect.json.JsonConverter
  CONNECT_KEY_CONVERTER_SCHEMAS_ENABLE: false
  CONNECT_VALUE_CONVERTER_SCHEMAS_ENABLE: false
  CONNECT_PLUGIN_PATH: /usr/share/java,/etc/kafka-connect/jars
  CONNECT_HDFS_URL: hdfs://namenode:9000
  #8020
  CONNECT_HDFS_TOPIC_DIR: /kafka-data
  CONNECT_REST_ADVERTISED_HOST_NAME: kafka-con
  CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
  CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
  CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
  CONNECTOR_CLASS: io.confluent.connect.hdfs.HdfsSinkConnector
  CONNECTOR_NAME: hdfs-connector
  CONNECTOR_TASKS_MAX: 1
  CONNECTOR_TOPICS: csv-topic
  CONNECTOR_HDFS_FLUSH_SIZE: 1000
  CONNECTOR_HDFS_COMPRESSION_TYPE: gzip
    
depends_on:
  - kafka
  - namenode    

namenode:

image: bde2020/hadoop-namenode
#bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8
container_name: namenode
restart: always
ports:
  - 9870:9870
  - 9000:9000
volumes:
  - hadoop_namenode:/hadoop/dfs/name
environment:
  CLUSTER_NAME: test
env_file:
  - ./hadoop.env          

datanode:

image: bde2020/hadoop-datanode
#bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8
container_name: datanode
restart: always
volumes:
  - hadoop_datanode:/hadoop/dfs/data
environment:
   SERVICE_PRECONDITION: namenode:9870
env_file:
  - ./hadoop.env

jupyter-pyspark-notebook:

image: jupyter/pyspark-notebook
container_name: jupyter-pyspark-notebook
ports:
  - "8888:8888"
volumes:
  - hadoop_datanode:/home/jovyan/work:ro
environment:
  SERVICE_PRECONDITION: datanode:9864
depends_on:
  - kafka-hdfs-connector

volumes:

kong-data: name: kong-db-volume hadoop_namenode: name: hadoop-nn-volume hadoop_datanode: name: hadoop-dn-volume hadoop_historyserver: name: hadoop-hm-volume

networks: default: name: kong-net

Checking the logs, changing the image version, various changes in the environments, but nothing helped

Mauz
  • 1
  • 1

0 Answers0