This script will create the required cumulative distribution function (CDF) query for any table:
#!/bin/bash
tableName=$1
cdfParam=$2
otherParams=$3
usage(){
echo "Usage: $0 <Table name> <Column/Columns for CDF> [Additional parameters to output]"
echo ""
echo "Get CDF of the Age column from the Persons table and print the Name and Gender of each person"
echo "e.g., $0 Persons Age Name,Gender"
echo ""
echo "Get CDF of the difference between Salary & profil from the Persons table and print the Name and Gender of each person"
echo "e.g., $0 Persons 'Salary-Profit' Name,Gender"
echo ""
echo "Get CDF of Age from a derived table called employeeTable"
echo "e.g., $0 '(select * from Employees) as employeeTable' Age"
exit 1
}
[[ $# -lt 2 ]] && usage
if [ -z "$otherParams" ]
then
echo "SELECT $cdfParam as cdf_paramValue, (SELECT count(*) FROM $tableName WHERE $cdfParam <= cdf_paramValue) AS cumulative_sum FROM $tableName ORDER BY ($cdfParam) desc";
else
echo "SELECT $otherParams,$cdfParam as cdf_paramValue, (SELECT count(*) FROM $tableName WHERE $cdfParam <= cdf_paramValue) AS cumulative_sum FROM $tableName ORDER BY ($cdfParam) desc";
fi