0

I am trying to get total number of features present in accumulo datastore. Here is my sample code:

package com.hps;

import java.util.Arrays;
import java.util.Map;

import org.geotools.data.DataStoreFinder;
import org.geotools.data.FeatureSource;
import org.geotools.data.FeatureStore;
import org.geotools.data.Query;
import org.geotools.data.Transaction;
import org.geotools.data.simple.SimpleFeatureIterator;
import org.geotools.data.simple.SimpleFeatureSource;
import org.geotools.factory.CommonFactoryFinder;
import org.geotools.factory.Hints;
import org.geotools.filter.text.cql2.CQL;
import org.geotools.filter.text.ecql.ECQL;
import org.geotools.geometry.jts.JTSFactoryFinder;
import org.locationtech.geomesa.accumulo.data.AccumuloAppendFeatureWriter;
import org.locationtech.geomesa.accumulo.data.AccumuloDataStore;
import org.locationtech.geomesa.utils.interop.SimpleFeatureTypes;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.opengis.filter.FilterFactory2;
import org.opengis.filter.sort.SortBy;
import org.opengis.filter.sort.SortOrder;

import com.hps.schemas.Event;
import com.hps.schemas.QueryAdvanced;
import com.hps.schemas.QueryBasic;
import com.vividsolutions.jts.geom.Coordinate;

public class GeomesaClient {
  private AccumuloDataStore ds = null;
  private AccumuloAppendFeatureWriter fw = null;
  private SimpleFeatureSource sfs = null;
  private FeatureSource fs = null;
  private String tableName = "";
  private boolean isFlushed = true;
  private long lastFlushTs = 0;
  private FeatureStore fst = null;
  private SimpleFeatureType sft;

  public GeomesaClient(Map<String, String> dsConf) throws Exception {
    this.ds = (AccumuloDataStore) DataStoreFinder.getDataStore(dsConf);
    this.tableName = dsConf.get("tableName");

    sft = createFeatureType();
    sft.getUserData().put(SimpleFeatureTypes.DEFAULT_DATE_KEY, "ts");

    if(!Arrays.asList(this.ds.getTypeNames()).contains(sft.getTypeName())){
      ds.createSchema(sft);
    }

    this.fw = (AccumuloAppendFeatureWriter) (this.ds.getFeatureWriterAppend(sft.getTypeName(),
        Transaction.AUTO_COMMIT));
    this.sfs = ds.getFeatureSource(sft.getTypeName());
    this.fs = sfs;

    this.fst = (FeatureStore) sfs;
  }

  private SimpleFeatureType createFeatureType() {
    try{
      StringBuilder  simpleFeatureTypeBuilder = new StringBuilder();
      simpleFeatureTypeBuilder.append("r:Long:index=full:cardinality=high,");
      simpleFeatureTypeBuilder.append("*g:Point:srid=4326,");
      simpleFeatureTypeBuilder.append("di:Integer:index=full:cardinality=high,");
      simpleFeatureTypeBuilder.append("al:Float:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("s:Float:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("b:Float:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("an:Float:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("he:Float:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("ve:Float:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("t:Float:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("m:Boolean:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("i:Boolean:index=join:cardinality=low,");
      simpleFeatureTypeBuilder.append("ts:Long:index=join:cardinality=low;");
      simpleFeatureTypeBuilder.append("geomesa.indexes.enabled='attr_idx,records,z2'");
      return SimpleFeatureTypes.createType(tableName, simpleFeatureTypeBuilder.toString());
    }
    catch(Exception ex){
      ex.printStackTrace();
      return null;
    }
  }
  public int getRideCount(Long rideId) throws Exception {
    int count = 0;
    if(rideId != null)
      count = ((Long) (ds.stats().getCount(sft, CQL.toFilter("r=" + rideId), true).get())).intValue();
    return count;
  }
}

Total feature present is 2636698 and time to complete the getRideCount() function execution is ~10sec. I have created index for my feature type (please refer function createFeatureType()), however that doesn't seems to be helping in minimising the fetch duration. I am not able to understand the exact problem which is degrading performance. How can i get feature collection size faster?

Here is my Query Plan

[2017-04-26 16:16:31,537] TRACE Explainer: Planning 'aj_v53' r = 31
0 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  - Planning 'aj_v53' r = 31
[2017-04-26 16:16:31,539] TRACE Explainer:   Original filter: r = 31
2 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Original filter: r = 31
[2017-04-26 16:16:31,540] TRACE Explainer:   Hints: density[false] bin[false] stats[false] map-aggregate[false] sampling[none]
3 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Hints: density[false] bin[false] stats[false] map-aggregate[false] sampling[none]
[2017-04-26 16:16:31,541] TRACE Explainer:   Sort: none
4 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Sort: none
[2017-04-26 16:16:31,542] TRACE Explainer:   Transforms: g=g
5 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Transforms: g=g
[2017-04-26 16:16:31,543] TRACE Explainer:   Strategy selection:
6 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Strategy selection:
[2017-04-26 16:16:31,565] TRACE Explainer:     Query processing took 19ms and produced 1 options
28 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Query processing took 19ms and produced 1 options
[2017-04-26 16:16:31,567] TRACE Explainer:     Filter plan: FilterPlan[AttributeIndex[r = 31][None]]
30 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Filter plan: FilterPlan[AttributeIndex[r = 31][None]]
[2017-04-26 16:16:31,568] TRACE Explainer:     Strategy selection took 2ms for 1 options
31 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Strategy selection took 2ms for 1 options
[2017-04-26 16:16:31,569] TRACE Explainer:   Strategy 1 of 1: AttributeIndex
32 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Strategy 1 of 1: AttributeIndex
[2017-04-26 16:16:31,570] TRACE Explainer:     Strategy filter: AttributeIndex[r = 31][None]
33 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Strategy filter: AttributeIndex[r = 31][None]
[2017-04-26 16:16:31,620] TRACE Explainer:     Plan: org.locationtech.geomesa.accumulo.index.BatchScanPlan
83 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Plan: org.locationtech.geomesa.accumulo.index.BatchScanPlan
[2017-04-26 16:16:31,621] TRACE Explainer:       Table: aj_v53_attr_v4
84 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Table: aj_v53_attr_v4
[2017-04-26 16:16:31,622] TRACE Explainer:       Deduplicate: false
85 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Deduplicate: false
[2017-04-26 16:16:31,622] TRACE Explainer:       Column Families (1): List(I)
85 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Column Families (1): List(I)
[2017-04-26 16:16:31,624] TRACE Explainer:       Ranges (1): [%01;%00;%00;800000000000001f%00;::%01;%00;%00;800000000000001f%01;)
87 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Ranges (1): [%01;%00;%00;800000000000001f%00;::%01;%00;%00;800000000000001f%01;)
[2017-04-26 16:16:31,626] TRACE Explainer:       Iterators (1):
89 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Iterators (1):
[2017-04-26 16:16:31,628] TRACE Explainer:         name:filter-transform-iter, priority:25, class:org.locationtech.geomesa.accumulo.iterators.KryoLazyFilterTransformIterator, properties:{sft=*g:Point:srid=4326,ts:Date;geomesa.index.dtg='ts',geomesa.table.sharing='true',geomesa.indices='attr:4:3,records:2:3,z2:3:3',geomesa.table.sharing.prefix='\u0001', tdefs=g=g, index=attr:4, tsft=*g:Point:srid=4326}
91 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -         name:filter-transform-iter, priority:25, class:org.locationtech.geomesa.accumulo.iterators.KryoLazyFilterTransformIterator, properties:{sft=*g:Point:srid=4326,ts:Date;geomesa.index.dtg='ts',geomesa.table.sharing='true',geomesa.indices='attr:4:3,records:2:3,z2:3:3',geomesa.table.sharing.prefix='\u0001', tdefs=g=g, index=attr:4, tsft=*g:Point:srid=4326}
[2017-04-26 16:16:31,629] TRACE Explainer:     Plan creation took 49ms
92 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Plan creation took 49ms
[2017-04-26 16:16:31,630] TRACE Explainer:   Query planning took 137ms
93 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Query planning took 137ms
[2017-04-26 16:16:31,644] TRACE Explainer: Planning 'aj_v53' r = 31
107 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  - Planning 'aj_v53' r = 31
[2017-04-26 16:16:31,644] TRACE Explainer:   Original filter: r = 31
107 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Original filter: r = 31
[2017-04-26 16:16:31,644] TRACE Explainer:   Hints: density[false] bin[false] stats[true] map-aggregate[false] sampling[none]
107 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Hints: density[false] bin[false] stats[true] map-aggregate[false] sampling[none]
[2017-04-26 16:16:31,644] TRACE Explainer:   Sort: none
107 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Sort: none
[2017-04-26 16:16:31,644] TRACE Explainer:   Transforms: None
107 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Transforms: None
[2017-04-26 16:16:31,644] TRACE Explainer:   Strategy selection:
107 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Strategy selection:
[2017-04-26 16:16:31,646] TRACE Explainer:     Query processing took 1ms and produced 1 options
109 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Query processing took 1ms and produced 1 options
[2017-04-26 16:16:31,646] TRACE Explainer:     Filter plan: FilterPlan[AttributeIndex[r = 31][None]]
109 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Filter plan: FilterPlan[AttributeIndex[r = 31][None]]
[2017-04-26 16:16:31,647] TRACE Explainer:     Strategy selection took 0ms for 1 options
110 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Strategy selection took 0ms for 1 options
[2017-04-26 16:16:31,647] TRACE Explainer:   Strategy 1 of 1: AttributeIndex
110 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Strategy 1 of 1: AttributeIndex
[2017-04-26 16:16:31,647] TRACE Explainer:     Strategy filter: AttributeIndex[r = 31][None]
110 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Strategy filter: AttributeIndex[r = 31][None]
[2017-04-26 16:16:31,656] TRACE Explainer:     Plan: org.locationtech.geomesa.accumulo.index.BatchScanPlan
119 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Plan: org.locationtech.geomesa.accumulo.index.BatchScanPlan
[2017-04-26 16:16:31,656] TRACE Explainer:       Table: aj_v53_attr_v4
119 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Table: aj_v53_attr_v4
[2017-04-26 16:16:31,656] TRACE Explainer:       Deduplicate: false
119 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Deduplicate: false
[2017-04-26 16:16:31,656] TRACE Explainer:       Column Families (1): List(I)
119 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Column Families (1): List(I)
[2017-04-26 16:16:31,657] TRACE Explainer:       Ranges (1): [%01;%00;%00;800000000000001f%00;::%01;%00;%00;800000000000001f%01;)
120 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Ranges (1): [%01;%00;%00;800000000000001f%00;::%01;%00;%00;800000000000001f%01;)
[2017-04-26 16:16:31,657] TRACE Explainer:       Iterators (1):
120 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -       Iterators (1):
[2017-04-26 16:16:31,657] TRACE Explainer:         name:stats-iter, priority:30, class:org.locationtech.geomesa.accumulo.iterators.KryoLazyStatsIterator, properties:{sft=*g:Point:srid=4326,ts:Date;geomesa.index.dtg='ts',geomesa.table.sharing='true',geomesa.indices='attr:4:3,records:2:3,z2:3:3',geomesa.table.sharing.prefix='\u0001', dupes=false, index=attr:4, geomesa.stats.string=Count()}
120 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -         name:stats-iter, priority:30, class:org.locationtech.geomesa.accumulo.iterators.KryoLazyStatsIterator, properties:{sft=*g:Point:srid=4326,ts:Date;geomesa.index.dtg='ts',geomesa.table.sharing='true',geomesa.indices='attr:4:3,records:2:3,z2:3:3',geomesa.table.sharing.prefix='\u0001', dupes=false, index=attr:4, geomesa.stats.string=Count()}
[2017-04-26 16:16:31,657] TRACE Explainer:     Plan creation took 9ms
120 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -     Plan creation took 9ms
[2017-04-26 16:16:31,658] TRACE Explainer:   Query planning took 17ms
121 [Thread-5] TRACE org.locationtech.geomesa.index.utils.Explainer  -   Query planning took 17ms
Suresh Prajapati
  • 3,991
  • 5
  • 26
  • 38
  • Hi Suresh, can you use the explain query capability to see what the GeoMesa query planner is doing? With that information, we'll be able to figure out if the extra indices are being used. The docs here (http://www.geomesa.org/documentation/user/datastores/explaining.html) are a bit terse, but should point you in the right direction for outputting the query plan. If you have questions about it, feel free to ask. – GeoJim Apr 22 '17 at 13:51
  • @GeoMesaJim Any idea how feature count operation is executed?. I saw the similar question on ShapeFile which explains opening the shx file, reading the header and get the count. If it's so then operation might get slow due to high i/o operations proportional to number of files? – Suresh Prajapati Apr 25 '17 at 07:55
  • Yes, I do have an idea how the count operation is executed.;) Getting a count either requires running the query indicated by the filter and manually counting the results (this operation should take about as long as querying for those records) or using the estimated stats for a given filter. For estimated stats, not all queries can be optimized the same. – GeoJim Apr 25 '17 at 12:44
  • As a general note, it looks like you are able to scan 250k features per second which is on the ok end for GeoMesa. If you provide the query plan, we can be sure that your using the index you created. – GeoJim Apr 25 '17 at 12:45
  • @GeoMesaJim adding query plan. – Suresh Prajapati Apr 27 '17 at 08:21

0 Answers0