0

Does anyone know if this pulsar nifi connector can publish to a deltasink ( delta lake sink connector ) https://github.com/david-streamlio/pulsar-nifi-bundle

I am getting data to pulsar with nifi and I will like to move this data from pulsar to delta lake using this connector.

https://streamnative.io/blog/release/2022-08-17-announcing-the-delta-lake-sink-connector-for-apache-pulsar/

I am able to get data to lakehouse if I publish with python script with jsonschema defined.

When I publish to same queue with nifi, the data is not persisted in the lakehouse. I have a flow definition that looks like this :

{"flowContents":{"identifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"demo","comments":"","position":{"x":0.0,"y":0.0},"processGroups":[],"remoteProcessGroups":[],"processors":[{"identifier":"bb9f85c9-02c9-3776-b3a2-e7f6c012ea71","name":"PublishPulsar","comments":"","position":{"x":1936.0,"y":-40.0},"bundle":{"group":"io.streamnative.connectors","artifact":"nifi-pulsar-nar","version":"1.15.2"},"style":{},"type":"org.apache.nifi.processors.pulsar.pubsub.PublishPulsar","properties":{"MAX_ASYNC_REQUESTS":"50","BATCH_INTERVAL":"10 ms","COMPRESSION_TYPE":"NONE","PULSAR_CLIENT_SERVICE":"e26e03f1-a9ba-3144-8427-60585fbd8e13","MESSAGE_ROUTING_MODE":"RoundRobinPartition","BLOCK_IF_QUEUE_FULL":"false","BATCHING_MAX_MESSAGES":"1000","MAPPED_MESSAGE_PROPERTIES":"","PENDING_MAX_MESSAGES":"1000","TOPIC":"persistent://public/default/raw.ro","MESSAGE_DEMARCATOR":null,"ASYNC_ENABLED":"false","MESSAGE_KEY":null,"BATCHING_ENABLED":"false"},"propertyDescriptors":{"MAX_ASYNC_REQUESTS":{"name":"MAX_ASYNC_REQUESTS","displayName":"Maximum Async Requests","identifiesControllerService":false,"sensitive":false},"BATCH_INTERVAL":{"name":"BATCH_INTERVAL","displayName":"Batch Interval","identifiesControllerService":false,"sensitive":false},"COMPRESSION_TYPE":{"name":"COMPRESSION_TYPE","displayName":"Compression Type","identifiesControllerService":false,"sensitive":false},"PULSAR_CLIENT_SERVICE":{"name":"PULSAR_CLIENT_SERVICE","displayName":"Pulsar Client Service","identifiesControllerService":true,"sensitive":false},"MESSAGE_ROUTING_MODE":{"name":"MESSAGE_ROUTING_MODE","displayName":"Message Routing Mode","identifiesControllerService":false,"sensitive":false},"BLOCK_IF_QUEUE_FULL":{"name":"BLOCK_IF_QUEUE_FULL","displayName":"Block if Message Queue Full","identifiesControllerService":false,"sensitive":false},"BATCHING_MAX_MESSAGES":{"name":"BATCHING_MAX_MESSAGES","displayName":"Batching Max Messages","identifiesControllerService":false,"sensitive":false},"MAPPED_MESSAGE_PROPERTIES":{"name":"MAPPED_MESSAGE_PROPERTIES","displayName":"Mapped Message Properties","identifiesControllerService":false,"sensitive":false},"PENDING_MAX_MESSAGES":{"name":"PENDING_MAX_MESSAGES","displayName":"Max Pending Messages","identifiesControllerService":false,"sensitive":false},"TOPIC":{"name":"TOPIC","displayName":"Topic Name","identifiesControllerService":false,"sensitive":false},"MESSAGE_DEMARCATOR":{"name":"MESSAGE_DEMARCATOR","displayName":"Message Demarcator","identifiesControllerService":false,"sensitive":false},"ASYNC_ENABLED":{"name":"ASYNC_ENABLED","displayName":"Async Enabled","identifiesControllerService":false,"sensitive":false},"MESSAGE_KEY":{"name":"MESSAGE_KEY","displayName":"Message Key","identifiesControllerService":false,"sensitive":false},"BATCHING_ENABLED":{"name":"BATCHING_ENABLED","displayName":"Batching Enabled","identifiesControllerService":false,"sensitive":false}},"schedulingPeriod":"0 sec","schedulingStrategy":"TIMER_DRIVEN","executionNode":"ALL","penaltyDuration":"30 sec","yieldDuration":"1 sec","bulletinLevel":"WARN","runDurationMillis":0,"concurrentlySchedulableTaskCount":1,"autoTerminatedRelationships":[],"scheduledState":"ENABLED","componentType":"PROCESSOR","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"ee8e6a42-aa33-3be1-80b5-f2fbb8e45dbd","name":"HandleHttpRequest","comments":"","position":{"x":1472.0000726201552,"y":-45.69454095191418},"bundle":{"group":"org.apache.nifi","artifact":"nifi-standard-nar","version":"1.15.2"},"style":{},"type":"org.apache.nifi.processors.standard.HandleHttpRequest","properties":{"multipart-request-max-size":"1 MB","parameters-to-attributes":null,"Allow POST":"true","Hostname":null,"Default URL Character Set":"UTF-8","Allow DELETE":"true","Additional HTTP Methods":null,"container-queue-size":"50","HTTP Context Map":"63974084-613e-3240-b6ed-b44b01a14a7b","multipart-read-buffer-size":"512 KB","SSL Context Service":null,"Allow OPTIONS":"false","Allowed Paths":"/ingest","Allow GET":"true","Allow HEAD":"false","Listening Port":"8585","Client Authentication":"No Authentication","Allow PUT":"true"},"propertyDescriptors":{"multipart-request-max-size":{"name":"multipart-request-max-size","displayName":"Multipart Request Max Size","identifiesControllerService":false,"sensitive":false},"parameters-to-attributes":{"name":"parameters-to-attributes","displayName":"Parameters to Attributes List","identifiesControllerService":false,"sensitive":false},"Allow POST":{"name":"Allow POST","displayName":"Allow POST","identifiesControllerService":false,"sensitive":false},"Hostname":{"name":"Hostname","displayName":"Hostname","identifiesControllerService":false,"sensitive":false},"Default URL Character Set":{"name":"Default URL Character Set","displayName":"Default URL Character Set","identifiesControllerService":false,"sensitive":false},"Allow DELETE":{"name":"Allow DELETE","displayName":"Allow DELETE","identifiesControllerService":false,"sensitive":false},"Additional HTTP Methods":{"name":"Additional HTTP Methods","displayName":"Additional HTTP Methods","identifiesControllerService":false,"sensitive":false},"container-queue-size":{"name":"container-queue-size","displayName":"Container Queue Size","identifiesControllerService":false,"sensitive":false},"HTTP Context Map":{"name":"HTTP Context Map","displayName":"HTTP Context Map","identifiesControllerService":true,"sensitive":false},"multipart-read-buffer-size":{"name":"multipart-read-buffer-size","displayName":"Multipart Read Buffer Size","identifiesControllerService":false,"sensitive":false},"SSL Context Service":{"name":"SSL Context Service","displayName":"SSL Context Service","identifiesControllerService":true,"sensitive":false},"Allow OPTIONS":{"name":"Allow OPTIONS","displayName":"Allow OPTIONS","identifiesControllerService":false,"sensitive":false},"Allowed Paths":{"name":"Allowed Paths","displayName":"Allowed Paths","identifiesControllerService":false,"sensitive":false},"Allow GET":{"name":"Allow GET","displayName":"Allow GET","identifiesControllerService":false,"sensitive":false},"Allow HEAD":{"name":"Allow HEAD","displayName":"Allow HEAD","identifiesControllerService":false,"sensitive":false},"Listening Port":{"name":"Listening Port","displayName":"Listening Port","identifiesControllerService":false,"sensitive":false},"Client Authentication":{"name":"Client Authentication","displayName":"Client Authentication","identifiesControllerService":false,"sensitive":false},"Allow PUT":{"name":"Allow PUT","displayName":"Allow PUT","identifiesControllerService":false,"sensitive":false}},"schedulingPeriod":"0 sec","schedulingStrategy":"TIMER_DRIVEN","executionNode":"ALL","penaltyDuration":"30 sec","yieldDuration":"1 sec","bulletinLevel":"WARN","runDurationMillis":0,"concurrentlySchedulableTaskCount":1,"autoTerminatedRelationships":[],"scheduledState":"ENABLED","componentType":"PROCESSOR","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"f6ccd600-0457-316b-a707-d2a87bf9efd7","name":"HandleHttpResponse","comments":"","position":{"x":1800.0000726201552,"y":218.30545904808582},"bundle":{"group":"org.apache.nifi","artifact":"nifi-standard-nar","version":"1.15.2"},"style":{},"type":"org.apache.nifi.processors.standard.HandleHttpResponse","properties":{"HTTP Context Map":"63974084-613e-3240-b6ed-b44b01a14a7b","Attributes to add to the HTTP Response (Regex)":null,"HTTP Status Code":"200"},"propertyDescriptors":{"HTTP Context Map":{"name":"HTTP Context Map","displayName":"HTTP Context Map","identifiesControllerService":true,"sensitive":false},"Attributes to add to the HTTP Response (Regex)":{"name":"Attributes to add to the HTTP Response (Regex)","displayName":"Attributes to add to the HTTP Response (Regex)","identifiesControllerService":false,"sensitive":false},"HTTP Status Code":{"name":"HTTP Status Code","displayName":"HTTP Status Code","identifiesControllerService":false,"sensitive":false}},"schedulingPeriod":"0 sec","schedulingStrategy":"TIMER_DRIVEN","executionNode":"ALL","penaltyDuration":"30 sec","yieldDuration":"1 sec","bulletinLevel":"WARN","runDurationMillis":0,"concurrentlySchedulableTaskCount":1,"autoTerminatedRelationships":[],"scheduledState":"ENABLED","componentType":"PROCESSOR","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"}],"inputPorts":[],"outputPorts":[],"connections":[{"identifier":"faa28273-5877-3957-adfa-3a7ed32a45d6","name":"","source":{"id":"f6ccd600-0457-316b-a707-d2a87bf9efd7","type":"PROCESSOR","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"HandleHttpResponse","comments":""},"destination":{"id":"f42b716a-43e0-366d-93f4-745243738b0e","type":"FUNNEL","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"Funnel","comments":""},"labelIndex":1,"zIndex":0,"selectedRelationships":["failure"],"backPressureObjectThreshold":10000,"backPressureDataSizeThreshold":"1 GB","flowFileExpiration":"0 sec","prioritizers":[],"bends":[],"loadBalanceStrategy":"DO_NOT_LOAD_BALANCE","partitioningAttribute":"","loadBalanceCompression":"DO_NOT_COMPRESS","componentType":"CONNECTION","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"07fe335e-da50-38af-8206-5ea1253fba89","name":"","source":{"id":"f6ccd600-0457-316b-a707-d2a87bf9efd7","type":"PROCESSOR","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"HandleHttpResponse","comments":""},"destination":{"id":"bb9f85c9-02c9-3776-b3a2-e7f6c012ea71","type":"PROCESSOR","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"PublishPulsar","comments":""},"labelIndex":1,"zIndex":0,"selectedRelationships":["success"],"backPressureObjectThreshold":10000,"backPressureDataSizeThreshold":"1 GB","flowFileExpiration":"0 sec","prioritizers":[],"bends":[],"loadBalanceStrategy":"DO_NOT_LOAD_BALANCE","partitioningAttribute":"","loadBalanceCompression":"DO_NOT_COMPRESS","componentType":"CONNECTION","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"88d2acbf-9fe9-3cd4-82aa-74b9742ad8ae","name":"","source":{"id":"bb9f85c9-02c9-3776-b3a2-e7f6c012ea71","type":"PROCESSOR","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"PublishPulsar","comments":""},"destination":{"id":"b8ac3efc-6cba-3a96-aefb-ed7bd617b124","type":"FUNNEL","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"Funnel","comments":""},"labelIndex":1,"zIndex":0,"selectedRelationships":["failure"],"backPressureObjectThreshold":10000,"backPressureDataSizeThreshold":"1 GB","flowFileExpiration":"0 sec","prioritizers":[],"bends":[],"loadBalanceStrategy":"DO_NOT_LOAD_BALANCE","partitioningAttribute":"","loadBalanceCompression":"DO_NOT_COMPRESS","componentType":"CONNECTION","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"6d135917-74dc-3170-b09f-841aba6c6fe6","name":"","source":{"id":"bb9f85c9-02c9-3776-b3a2-e7f6c012ea71","type":"PROCESSOR","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"PublishPulsar","comments":""},"destination":{"id":"0e9582ec-28f9-3ee8-a366-ccc1833dbffb","type":"FUNNEL","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"Funnel","comments":""},"labelIndex":1,"zIndex":0,"selectedRelationships":["success"],"backPressureObjectThreshold":10000,"backPressureDataSizeThreshold":"1 GB","flowFileExpiration":"0 sec","prioritizers":[],"bends":[],"loadBalanceStrategy":"DO_NOT_LOAD_BALANCE","partitioningAttribute":"","loadBalanceCompression":"DO_NOT_COMPRESS","componentType":"CONNECTION","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"53c0c89f-ca40-3a3b-bae2-6947f2e5124b","name":"","source":{"id":"ee8e6a42-aa33-3be1-80b5-f2fbb8e45dbd","type":"PROCESSOR","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"HandleHttpRequest","comments":""},"destination":{"id":"f6ccd600-0457-316b-a707-d2a87bf9efd7","type":"PROCESSOR","groupId":"05f56bb4-a817-3052-aa89-d20e5dde41f0","name":"HandleHttpResponse","comments":""},"labelIndex":1,"zIndex":0,"selectedRelationships":["success"],"backPressureObjectThreshold":10000,"backPressureDataSizeThreshold":"1 GB","flowFileExpiration":"0 sec","prioritizers":[],"bends":[],"loadBalanceStrategy":"DO_NOT_LOAD_BALANCE","partitioningAttribute":"","loadBalanceCompression":"DO_NOT_COMPRESS","componentType":"CONNECTION","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"}],"labels":[],"funnels":[{"identifier":"f42b716a-43e0-366d-93f4-745243738b0e","position":{"x":2536.000072620155,"y":257.1899500148827},"componentType":"FUNNEL","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"b8ac3efc-6cba-3a96-aefb-ed7bd617b124","position":{"x":2600.0,"y":-88.0},"componentType":"FUNNEL","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"0e9582ec-28f9-3ee8-a366-ccc1833dbffb","position":{"x":2544.000072620155,"y":129.1899500148827},"componentType":"FUNNEL","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"}],"controllerServices":[{"identifier":"e26e03f1-a9ba-3144-8427-60585fbd8e13","name":"StandardPulsarClientService","comments":"","type":"org.apache.nifi.pulsar.StandardPulsarClientService","bundle":{"group":"io.streamnative.connectors","artifact":"nifi-pulsar-nar","version":"1.15.2"},"controllerServiceApis":[{"type":"org.apache.nifi.pulsar.PulsarClientService","bundle":{"group":"io.streamnative.connectors","artifact":"nifi-pulsar-client-service-nar","version":"1.15.2"}}],"properties":{"KEEP_ALIVE_INTERVAL":"30 sec","CONCURRENT_LOOKUP_REQUESTS":"5000","USE_TCP_NO_DELAY":"false","STATS_INTERVAL":"60 sec","MAXIMUM_LOOKUP_REQUESTS":"50000","OPERATION_TIMEOUT":"30 sec","CONNECTIONS_PER_BROKER":"1","LISTENER_THREADS":"1","AUTHENTICATION_SERVICE":null,"MAXIMUM_REJECTED_REQUESTS":"50","PULSAR_SERVICE_URL":"pulsar://pulsar:6650","IO_THREADS":"1","ALLOW_TLS_INSECURE_CONNECTION":"false","ENABLE_TLS_HOSTNAME_VERIFICATION":"false"},"propertyDescriptors":{"KEEP_ALIVE_INTERVAL":{"name":"KEEP_ALIVE_INTERVAL","displayName":"Keep Alive interval","identifiesControllerService":false,"sensitive":false},"CONCURRENT_LOOKUP_REQUESTS":{"name":"CONCURRENT_LOOKUP_REQUESTS","displayName":"Maximum concurrent lookup-requests","identifiesControllerService":false,"sensitive":false},"USE_TCP_NO_DELAY":{"name":"USE_TCP_NO_DELAY","displayName":"Use TCP no-delay flag","identifiesControllerService":false,"sensitive":false},"STATS_INTERVAL":{"name":"STATS_INTERVAL","displayName":"Stats interval","identifiesControllerService":false,"sensitive":false},"MAXIMUM_LOOKUP_REQUESTS":{"name":"MAXIMUM_LOOKUP_REQUESTS","displayName":"Maximum lookup requests","identifiesControllerService":false,"sensitive":false},"OPERATION_TIMEOUT":{"name":"OPERATION_TIMEOUT","displayName":"Operation Timeout","identifiesControllerService":false,"sensitive":false},"CONNECTIONS_PER_BROKER":{"name":"CONNECTIONS_PER_BROKER","displayName":"Maximum connects per Pulsar broker","identifiesControllerService":false,"sensitive":false},"LISTENER_THREADS":{"name":"LISTENER_THREADS","displayName":"Listener Threads","identifiesControllerService":false,"sensitive":false},"AUTHENTICATION_SERVICE":{"name":"AUTHENTICATION_SERVICE","displayName":"Pulsar Client Authentication Service","identifiesControllerService":true,"sensitive":false},"MAXIMUM_REJECTED_REQUESTS":{"name":"MAXIMUM_REJECTED_REQUESTS","displayName":"Maximum rejected requests per connection","identifiesControllerService":false,"sensitive":false},"PULSAR_SERVICE_URL":{"name":"PULSAR_SERVICE_URL","displayName":"Pulsar Service URL","identifiesControllerService":false,"sensitive":false},"IO_THREADS":{"name":"IO_THREADS","displayName":"I/O Threads","identifiesControllerService":false,"sensitive":false},"ALLOW_TLS_INSECURE_CONNECTION":{"name":"ALLOW_TLS_INSECURE_CONNECTION","displayName":"Allow TLS Insecure Connection","identifiesControllerService":false,"sensitive":false},"ENABLE_TLS_HOSTNAME_VERIFICATION":{"name":"ENABLE_TLS_HOSTNAME_VERIFICATION","displayName":"Enable TLS Hostname Verification","identifiesControllerService":false,"sensitive":false}},"componentType":"CONTROLLER_SERVICE","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"},{"identifier":"63974084-613e-3240-b6ed-b44b01a14a7b","name":"StandardHttpContextMap","comments":"","type":"org.apache.nifi.http.StandardHttpContextMap","bundle":{"group":"org.apache.nifi","artifact":"nifi-http-context-map-nar","version":"1.15.2"},"controllerServiceApis":[{"type":"org.apache.nifi.http.HttpContextMap","bundle":{"group":"org.apache.nifi","artifact":"nifi-standard-services-api-nar","version":"1.15.2"}}],"properties":{"Request Expiration":"1 min","Maximum Outstanding Requests":"5000"},"propertyDescriptors":{"Request Expiration":{"name":"Request Expiration","displayName":"Request Expiration","identifiesControllerService":false,"sensitive":false},"Maximum Outstanding Requests":{"name":"Maximum Outstanding Requests","displayName":"Maximum Outstanding Requests","identifiesControllerService":false,"sensitive":false}},"componentType":"CONTROLLER_SERVICE","groupIdentifier":"05f56bb4-a817-3052-aa89-d20e5dde41f0"}],"variables":{},"defaultFlowFileExpiration":"0 sec","defaultBackPressureObjectThreshold":10000,"defaultBackPressureDataSizeThreshold":"1 GB","flowFileConcurrency":"UNBOUNDED","flowFileOutboundPolicy":"STREAM_WHEN_AVAILABLE","componentType":"PROCESS_GROUP"},"externalControllerServices":{},"parameterContexts":{},"flowEncodingVersion":"1.0"}

My initial setup is like the minio example here: Delta Lake Sink Connector for Apache Pulsar with miniO throws (java.lang.IllegalArgumentException)

sejuba
  • 63
  • 5

0 Answers0