I have a column of high cardinality and i need to index that column, because i have to perform range queries on that column. I know that secondary indexes are not fit for high cardinality column in cassandra, so i tried to create materialized view on that table with that column as partition key, but range queries are not working without allow filtering on that view. Always perform allow filtering queries is not a good practice for large amount of data. What schema should I use?
CREATE TABLE testing_status.input_log_profile_1 (
cid text,
ctdon bigint,
ctdat bigint,
email text,
addrs set<frozen<udt_addrs>>,
asset set<frozen<udt_asset>>,
cntno set<frozen<udt_cntno>>,
dob frozen<udt_date>,
dvc set<frozen<udt_dvc>>,
eaka set<text>,
edmn text,
educ set<frozen<udt_educ>>,
error_status text,
gen tinyint,
hobby set<text>,
income set<frozen<udt_income>>,
interest set<text>,
lang set<frozen<udt_lang>>,
levnt set<frozen<udt_levnt>>,
like map<text, frozen<set<text>>>,
loc set<frozen<udt_loc>>,
mapp set<text>,
name frozen<udt_name>,
params map<text, frozen<set<text>>>,
prfsn set<frozen<udt_prfsn>>,
rel set<frozen<udt_rel>>,
rel_s tinyint,
skills_prfsn set<frozen<udt_skill_prfsn>>,
snw set<frozen<udt_snw>>,
sport set<text>,
status_id tinyint,
PRIMARY KEY (cid, ctdon, ctdat, email)
) WITH CLUSTERING ORDER BY (ctdon ASC, ctdat ASC, email ASC)
CREATE CUSTOM INDEX status_idx ON testing_status.input_log_profile_1 (status_id) USING 'org.apache.cassandra.index.sasi.SASIIndex';
CREATE CUSTOM INDEX err_idx ON testing_status.input_log_profile_1 (error_status) USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS = {'mode': 'CONTAINS', 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', 'case_sensitive': 'false'};
where status_id contains sequence of ids which is high cardinality field.
And my query is like
select * from input_log_profile_1 where cid='1_1' and status_id >= 1 and status_id <= 100 ;