0

I am trying to build a Kafka source connector for audio files. from my understanding, I have to read the audio files as a Byte array.

I am using the confluent-quick-start project as a skeleton for development. the connector is not working, I can't tell why, because I don't know how to make it print logs for errors. I need help to make this work, I am not an expert in java, you can probably tell by the code.

is my approach correct? and do I have to do anything to the pom.xml file or just leave it as is?

I have examined previously available projects and tried to apply the concept for audio files. the following are the classes:

AudioSourceConnectorConfig

package org.othman.example;

import org.apache.kafka.common.config.AbstractConfig;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.common.config.ConfigDef.Type;
import org.apache.kafka.common.config.ConfigDef.Importance;

import java.util.Map;


public class AudioSourceConnectorConfig extends AbstractConfig {

  public static final String FILENAME_CONFIG="fileName";
  private static final String FILENAME_DOC ="Enter the name of the audio file";

  public static final String TOPIC_CONFIG = "topic";
  private static final String TOPIC_DOC = "Enter the topic to write to..";



  public AudioSourceConnectorConfig(ConfigDef config, Map<String, String> parsedConfig) {
    super(config, parsedConfig);
  }

  public AudioSourceConnectorConfig(Map<String, String> parsedConfig) {
    this(conf(), parsedConfig);
  }

  public static ConfigDef conf() {
    return new ConfigDef()
        .define(FILENAME_CONFIG, Type.STRING, Importance.HIGH, FILENAME_DOC)
            .define(TOPIC_CONFIG, Type.STRING, Importance.HIGH, TOPIC_DOC);

  }

  public String getFilenameConfig(){
    return this.getString("fileName");
  }
  public String getTopicConfig(){
    return this.getString("topic");
  }
}

AudioSourceConnector

package org.othman.example;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.connect.connector.Task;
import org.apache.kafka.connect.source.SourceConnector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class AudioSourceConnector extends SourceConnector {
  private static Logger log = LoggerFactory.getLogger(AudioSourceConnector.class);
  private AudioSourceConnectorConfig config;
  private Map<String, String> configProps;

  @Override
  public String version() {
    return VersionUtil.getVersion();
  }

  @Override
  public void start(Map<String, String> map) {
    //config = new AudioSourceConnectorConfig(map);
    this.configProps = new HashMap(map);

    //TODO: Add things you need to do to setup your connector.
  }

  @Override
  public Class<? extends Task> taskClass() {
    //TODO: Return your task implementation.
    return AudioSourceTask.class;
  }

  @Override
  public List<Map<String, String>> taskConfigs(int maxTasks) {
    return (List) IntStream.range(0, maxTasks).mapToObj((i) -> {
      return new HashMap(this.configProps);
    }).collect(Collectors.toList());
    //TODO: Define the individual task configurations that will be executed.


  }

  @Override
  public void stop() {
    //TODO: Do things that are necessary to stop your connector.
    this.configProps=null;
  }

  @Override
  public ConfigDef config() {
    return AudioSourceConnectorConfig.conf();
  }
}

AudioSourceTask

package org.othman.example;

import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.source.SourceRecord;
import org.apache.kafka.connect.source.SourceTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

public class AudioSourceTask extends SourceTask {
  static final Logger log = LoggerFactory.getLogger(AudioSourceTask.class);
  AudioSourceConnectorConfig config;
  private Process inputProcess;
  byte [] audioFile;

  @Override
  public String version() {
    return VersionUtil.getVersion();
  }

  @Override
  public void start(Map<String, String> map) {
    //TODO: Do things here that are required to start your task.
    // This could be open a connection to a database, etc.
    this.config = new AudioSourceConnectorConfig(map);

    try{
      audioFile = Files.readAllBytes(Paths.get(this.config.getFilenameConfig()));
     // this.inputProcess = (new ProcessBuilder((new String[]{this.config.getFilenameConfig()}))).redirectError().start();
    }
    catch(IOException e){
      System.out.println("ERROR WHILE TRYING TO READ AUDIO FILE...");
      e.printStackTrace();
    }
  }

  @Override
  public List<SourceRecord> poll() throws InterruptedException {
    //TODO: Create SourceRecord objects that will be sent the kafka cluster.
    final ArrayList<SourceRecord> records = new ArrayList<>();
    SourceRecord record;
    for (int i=0;i < audioFile.length - 1;i++) {
      record= new SourceRecord(null, null, this.config.getTopicConfig(), 0, Schema.BYTES_SCHEMA, audioFile[i]);
      records.add(record);
    }
    return records;
  }

  @Override
  public void stop() {
    //TODO: Do whatever is required to stop your task.
  }
}
  • Have you seen https://github.com/C0urante/kafka-connect-sound? – Robin Moffatt Mar 23 '21 at 17:05
  • yes, I did, his code is about reading directly from a microphone, so I tried to use his approach to try and make it work for audio files. – AinzOwlGown Mar 23 '21 at 18:16
  • Relevant for debugging - https://stackoverflow.com/a/60862364/2308683 – OneCricketeer Mar 24 '21 at 05:06
  • If your purpose is to just read a file to a byte array, I would suggest simply using a standard producer within a shell script. There's not much benefit to deploying a connect server just to read a file into a topic once. Plus, Connect works best with structured data, not binary. Note: your code is currently attempting to send single byte messages from the file, not the entire file as one event – OneCricketeer Mar 24 '21 at 05:09
  • Other than that, "file transfers/processing" is frowned upon within Kafka topics... Use a shared filesystem; put your file there; send URI information to the consumers to then download + process files – OneCricketeer Mar 24 '21 at 05:13
  • Regarding my comment about a shell script - https://github.com/C0urante/kafka-tools#kafka-binary-producer – OneCricketeer Mar 24 '21 at 05:18

0 Answers0