I'm evaluating Esper as a system for loss-less processing of billing data. It is expected that system can handle ~20000 events per second and run ~400 statements with continuos aggregation (without storing events in memory). In order to get expected performance I've started to send events in multiple threads and found that esper often looses data.
Simple example that shows data loss
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import com.espertech.esper.client.Configuration;
import com.espertech.esper.client.EPAdministrator;
import com.espertech.esper.client.EPRuntime;
import com.espertech.esper.client.EPServiceProvider;
import com.espertech.esper.client.EPServiceProviderManager;
import com.espertech.esper.client.EPStatement;
public class Example {
public static void main(String[] args) throws Exception {
new Example().run();
}
public void run() throws Exception {
Configuration config = new Configuration();
// use default configuration
EPServiceProvider epService = EPServiceProviderManager.getDefaultProvider(config);
EPAdministrator epAdministrator = epService.getEPAdministrator();
// simple schema
epAdministrator.getConfiguration().addEventType(LogLine.class);
// event for terminating context partition
createEPL(epAdministrator, "create schema TerminateEvent() ");
// Start context partition on LogLine event and terminate on TerminateEvent.
createEPL(epAdministrator, "create context InitCtx start LogLine end TerminateEvent");
// select to collect count of events per account_name.
EPStatement statement = createEPL(epAdministrator, "context InitCtx select context.id as partition_id, count(*), sum(bytes) from LogLine output last when terminated");
// register listener to output all newEvents properties values
statement.addListener((newEvents, oldEvents) -> {
String resultEvents = Arrays.stream(newEvents).map((event) -> {
return Arrays.stream(event.getEventType().getPropertyNames()).map((prop) -> {
return prop + "=" + event.get(prop);
}).collect(Collectors.joining(", "));
}).collect(Collectors.joining("]; ["));
System.out.println("=== results: [" + resultEvents + "]");
});
//lets use 4 threads for sending data
ExecutorService myexecutor = Executors.newFixedThreadPool(4);
List<CompletableFuture<Void>> listOfTasks = new ArrayList<>();
//get data to be processed
List<LogLine> list = getData();
for (int i = 1; i <= list.size(); i++) {
//concurrently send each logline
final LogLine logLine = list.get(i - 1);
CompletableFuture<Void> task = CompletableFuture.runAsync(() -> {
epService.getEPRuntime().sendEvent(logLine);
System.out.println("== sending data " + logLine);
}, myexecutor);
listOfTasks.add(task);
if (i % 4 == 0) {
// terminate context partition after every 4 events.
sendTerminateEvent(listOfTasks, epService.getEPRuntime());
}
}
// terminate context partition at the end of the execution.
sendTerminateEvent(listOfTasks, epService.getEPRuntime());
// shutdow all services.
myexecutor.shutdown();
epService.destroy();
}
private void sendTerminateEvent(List<CompletableFuture<Void>> listOfTasks, EPRuntime epRuntime) throws Exception {
// wait for all submitted tasks to finish
CompletableFuture[] array = listOfTasks.toArray(new CompletableFuture[listOfTasks.size()]);
CompletableFuture.allOf(array).get(1, TimeUnit.MINUTES);
listOfTasks.clear();
System.out.println("== sending terminate event.");
// send partition termination event
epRuntime.sendEvent(Collections.emptyMap(), "TerminateEvent");
}
private List<LogLine> getData() {
List<LogLine> dataEventsList = new ArrayList<>();
dataEventsList.add(new LogLine(0, 1));
dataEventsList.add(new LogLine(0, 2));
dataEventsList.add(new LogLine(0, 3));
dataEventsList.add(new LogLine(0, 4));
dataEventsList.add(new LogLine(0, 5));
dataEventsList.add(new LogLine(1, 1));
dataEventsList.add(new LogLine(1, 2));
dataEventsList.add(new LogLine(1, 3));
dataEventsList.add(new LogLine(1, 4));
dataEventsList.add(new LogLine(1, 5));
return dataEventsList;
}
private EPStatement createEPL(EPAdministrator admin, String statement) {
System.out.println("creating EPL: " + statement);
return admin.createEPL(statement);
}
public static class LogLine {
int account_id;
int bytes;
public LogLine(int account_id, int bytes) {
this.account_id = account_id;
this.bytes = bytes;
}
public int getAccount_id() {
return account_id;
}
public int getBytes() {
return bytes;
}
@Override
public String toString() {
return "[account_id=" + account_id + ", bytes=" + bytes + "]";
}
}
}
Execution output:
creating EPL: create schema TerminateEvent()
creating EPL: create context InitCtx start LogLine end TerminateEvent
creating EPL: context InitCtx select context.id as partition_id, count(*), sum(bytes) from LogLine output last when terminated
== data [account_id=0, bytes=3] was send
== data [account_id=0, bytes=1] was send
== data [account_id=0, bytes=4] was send
== data [account_id=0, bytes=2] was send
== sending terminate event.
=== results: [partition_id=0, count(*)=4, sum(bytes)=10]
== data [account_id=1, bytes=2] was send
== data [account_id=1, bytes=3] was send
== data [account_id=0, bytes=5] was send
== data [account_id=1, bytes=1] was send
== sending terminate event.
=== results: [partition_id=1, count(*)=2, sum(bytes)=6]
== data [account_id=1, bytes=5] was send
== data [account_id=1, bytes=4] was send
== sending terminate event.
=== results: [partition_id=2, count(*)=1, sum(bytes)=4]
There are correct results for the first partition, the next 2 partitions output invalid results:
// OK
actual [partition_id=0, count(*)=4, sum(bytes)=10]
expected [partition_id=0, count(*)=4, sum(bytes)=10]
// LOSS
actual [partition_id=1, count(*)=2, sum(bytes)=6]
expected [partition_id=1, count(*)=4, sum(bytes)=11]
// LOSS
actual [partition_id=2, count(*)=1, sum(bytes)=4]
expected [partition_id=2, count(*)=2, sum(bytes)=9]
What's wrong with this example code?
Enabling priority execution order didn't help
creating EPL: create schema TerminateEvent()
creating EPL: @Priority(1) create context InitCtx start LogLine end TerminateEvent
creating EPL: @Priority(0) context InitCtx select context.id as partition_id, count(*), sum(bytes) from LogLine output last when terminated
== data [account_id=0, bytes=3] was send
== data [account_id=0, bytes=4] was send
== data [account_id=0, bytes=1] was send
== data [account_id=0, bytes=2] was send
== sending terminate event.
=== results: [partition_id=0, count(*)=4, sum(bytes)=10]
== data [account_id=1, bytes=2] was send
== data [account_id=1, bytes=3] was send
== data [account_id=0, bytes=5] was send
== data [account_id=1, bytes=1] was send
== sending terminate event.
=== results: [partition_id=1, count(*)=2, sum(bytes)=6]
== data [account_id=1, bytes=5] was send
== data [account_id=1, bytes=4] was send
== sending terminate event.
=== results: [partition_id=2, count(*)=1, sum(bytes)=4]