0

I created cumulative density visualization using this example of vega-lite as shown below:

var data = [{"student_name": "student 0", "e": "100.15", "d": "127.81"}, {"student_name": "student 1", "e": "100.30", "d": "189.94"}, {"student_name": "student 2", "e": "100.15", "d": "105.33"}, {"student_name": "student 3", "e": "99.41", "d": "85.36"}, {"student_name": "student 4", "e": "100.00", "d": "203.70"}, {"student_name": "student 5", "e": "100.15", "d": "139.05"}, {"student_name": "student 19", "e": "100.15", "d": "102.66"}, {"student_name": "student 20", "e": "95.71", "d": "52.96"}, {"student_name": "student 21", "e": "99.85", "d": "99.41"}, {"student_name": "student 22", "e": "98.96", "d": "100.44"}, {"student_name": "student 23", "e": "100.15", "d": "131.07"}, {"student_name": "student 24", "e": "99.56", "d": "76.92"}, {"student_name": "student 25", "e": "100.15", "d": "213.46"}, {"student_name": "student 26", "e": "100.15", "d": "311.24"}, {"student_name": "student 27", "e": "100.15", "d": "21.89"}, {"student_name": "student 28", "e": "96.60", "d": "6.36"}, {"student_name": "student 29", "e": "53.70", "d": "3.70"}, {"student_name": "student 30", "e": "96.75", "d": "46.60"}, {"student_name": "student 31", "e": "100.15", "d": "100.15"}, {"student_name": "student 32", "e": "100.30", "d": "115.68"}, {"student_name": "student 33", "e": "87.13", "d": "103.85"}, {"student_name": "student 34", "e": "100.15", "d": "104.14"}, {"student_name": "student 35", "e": "99.26", "d": "59.17"}, {"student_name": "student 36", "e": "100.15", "d": "171.30"}, {"student_name": "student 37", "e": "99.11", "d": "94.08"}, {"student_name": "student 38", "e": "81.66", "d": "57.40"}, {"student_name": "student 39", "e": "96.01", "d": "154.59"}, {"student_name": "student 90", "e": "1.04", "d": "1.33"}, {"student_name": "student 91", "e": "99.70", "d": "26.18"}, {"student_name": "student 92", "e": "96.30", "d": "78.11"}, {"student_name": "student 93", "e": "99.85", "d": "11.83"}, {"student_name": "student 94", "e": "100.15", "d": "172.93"}, {"student_name": "student 95", "e": "100.00", "d": "198.82"}, {"student_name": "student 96", "e": "100.15", "d": "155.92"}, {"student_name": "student 97", "e": "92.01", "d": "97.19"}, {"student_name": "student 98", "e": "98.52", "d": "71.30"}, {"student_name": "student 99", "e": "100.15", "d": "111.69"}, {"student_name": "student 100", "e": "0.30", "d": "0.30"}, {"student_name": "student 175", "e": "98.96", "d": "91.12"}, {"student_name": "student 176", "e": "100.00", "d": "226.04"}, {"student_name": "student 177", "e": "98.67", "d": "150.89"}, {"student_name": "student 178", "e": "97.49", "d": "68.79"}, {"student_name": "student 179", "e": "100.15", "d": "133.58"}, {"student_name": "student 180", "e": 0, "d": 0}, {"student_name": "student 181", "e": 0, "d": 0}, {"student_name": "student 182", "e": 0, "d": 0}, {"student_name": "student 183", "e": 0, "d": 0}, {"student_name": "student 184", "e": 0, "d": 0}, {"student_name": "student 185", "e": 0, "d": 0}, {"student_name": "student 186", "e": 0, "d": 0}, {"student_name": "student 187", "e": 0, "d": 0}, {"student_name": "student 188", "e": 0, "d": 0}, {"student_name": "student 189", "e": 0, "d": 0}, {"student_name": "student 190", "e": 0, "d": 0}, {"student_name": "student 191", "e": 0, "d": 0}];

createChart = function (data) {
  let max_d = d3.max(data, record => parseFloat(record.d));
  let max_e = d3.max(data, record => parseFloat(record.e));
  let max_y_scale_value_for_d = d3.max([100, max_d]);
  let max_y_scale_value_for_e = d3.max([100, max_e]);

  
  const plot = vl.markArea()
    .data(data)
    .transform([{ "calculate": "toNumber(datum.d)", "as": "d2" }
                , { "calculate": "toNumber(datum.e)", "as": "e2" }
                , {
                    "sort": [{"field": "d2"}],
                    "window": [{"op": "count", "field": "student_name", "as": "Cumulative Count"}],
                    "frame": [null, 0]
                  }
               ])
    .encode(
      vl.y().fieldQ('Cumulative Count')
      .title('# students'),
      vl.x()
      .fieldQ('d2')//.bin(true)
      .scale({ "domain": [0, max_y_scale_value_for_d] })
      .title('D')
    ).width(500).height(250); 
    
    return plot.toObject();
}

const chart_spec_json = this.createChart(data)
const opt = {
               renderer: "canvas",
               actions: false
            };
vegaEmbed("#stats", chart_spec_json, opt);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.13.0/d3.min.js"></script>
<script src="https://unpkg.com/vega@5.21.0/build/vega.min.js"></script>
<script src="https://unpkg.com/vega-lite@5.2.0/build/vega-lite.min.js"></script>
<script src="https://www.unpkg.com/vega-embed@6.20.8/build/vega-embed.min.js"></script>
<script src="https://unpkg.com/vega-lite-api@5.0.0/build/vega-lite-api.min.js"></script>
<div id="stats" />

This is how it looks (You can also click "Run code snippet" button to see it in action):

enter image description here

Now, I wanted to convert it to bar chart. So I changed markArea() to markRect() and added bin(true):

var data = [{"student_name": "student 0", "e": "100.15", "d": "127.81"}, {"student_name": "student 1", "e": "100.30", "d": "189.94"}, {"student_name": "student 2", "e": "100.15", "d": "105.33"}, {"student_name": "student 3", "e": "99.41", "d": "85.36"}, {"student_name": "student 4", "e": "100.00", "d": "203.70"}, {"student_name": "student 5", "e": "100.15", "d": "139.05"}, {"student_name": "student 19", "e": "100.15", "d": "102.66"}, {"student_name": "student 20", "e": "95.71", "d": "52.96"}, {"student_name": "student 21", "e": "99.85", "d": "99.41"}, {"student_name": "student 22", "e": "98.96", "d": "100.44"}, {"student_name": "student 23", "e": "100.15", "d": "131.07"}, {"student_name": "student 24", "e": "99.56", "d": "76.92"}, {"student_name": "student 25", "e": "100.15", "d": "213.46"}, {"student_name": "student 26", "e": "100.15", "d": "311.24"}, {"student_name": "student 27", "e": "100.15", "d": "21.89"}, {"student_name": "student 28", "e": "96.60", "d": "6.36"}, {"student_name": "student 29", "e": "53.70", "d": "3.70"}, {"student_name": "student 30", "e": "96.75", "d": "46.60"}, {"student_name": "student 31", "e": "100.15", "d": "100.15"}, {"student_name": "student 32", "e": "100.30", "d": "115.68"}, {"student_name": "student 33", "e": "87.13", "d": "103.85"}, {"student_name": "student 34", "e": "100.15", "d": "104.14"}, {"student_name": "student 35", "e": "99.26", "d": "59.17"}, {"student_name": "student 36", "e": "100.15", "d": "171.30"}, {"student_name": "student 37", "e": "99.11", "d": "94.08"}, {"student_name": "student 38", "e": "81.66", "d": "57.40"}, {"student_name": "student 39", "e": "96.01", "d": "154.59"}, {"student_name": "student 90", "e": "1.04", "d": "1.33"}, {"student_name": "student 91", "e": "99.70", "d": "26.18"}, {"student_name": "student 92", "e": "96.30", "d": "78.11"}, {"student_name": "student 93", "e": "99.85", "d": "11.83"}, {"student_name": "student 94", "e": "100.15", "d": "172.93"}, {"student_name": "student 95", "e": "100.00", "d": "198.82"}, {"student_name": "student 96", "e": "100.15", "d": "155.92"}, {"student_name": "student 97", "e": "92.01", "d": "97.19"}, {"student_name": "student 98", "e": "98.52", "d": "71.30"}, {"student_name": "student 99", "e": "100.15", "d": "111.69"}, {"student_name": "student 100", "e": "0.30", "d": "0.30"}, {"student_name": "student 175", "e": "98.96", "d": "91.12"}, {"student_name": "student 176", "e": "100.00", "d": "226.04"}, {"student_name": "student 177", "e": "98.67", "d": "150.89"}, {"student_name": "student 178", "e": "97.49", "d": "68.79"}, {"student_name": "student 179", "e": "100.15", "d": "133.58"}, {"student_name": "student 180", "e": 0, "d": 0}, {"student_name": "student 181", "e": 0, "d": 0}, {"student_name": "student 182", "e": 0, "d": 0}, {"student_name": "student 183", "e": 0, "d": 0}, {"student_name": "student 184", "e": 0, "d": 0}, {"student_name": "student 185", "e": 0, "d": 0}, {"student_name": "student 186", "e": 0, "d": 0}, {"student_name": "student 187", "e": 0, "d": 0}, {"student_name": "student 188", "e": 0, "d": 0}, {"student_name": "student 189", "e": 0, "d": 0}, {"student_name": "student 190", "e": 0, "d": 0}, {"student_name": "student 191", "e": 0, "d": 0}];

createChart = function (data) {
  let max_d = d3.max(data, record => parseFloat(record.d));
    let max_e = d3.max(data, record => parseFloat(record.e));
    let max_y_scale_value_for_d = d3.max([100, max_d]);
    let max_y_scale_value_for_e = d3.max([100, max_e]);

  
  const plot = vl.markRect()
    .data(data)
    .transform([{ "calculate": "toNumber(datum.d)", "as": "d2" }
                , { "calculate": "toNumber(datum.e)", "as": "e2" }
                , {
                    "sort": [{"field": "d2"}],
                    "window": [{"op": "count", "field": "student_name", "as": "Cumulative Count"}],
                    "frame": [null, 0]
                  }
               ])
    .encode(
      vl.y().fieldQ('Cumulative Count')
      .title('# students'),
      vl.x()
      .fieldQ('d2').bin(true)
      .scale({ "domain": [0, max_y_scale_value_for_d] })
      .title('D')
      .bin({maxbins: 20})
    ).width(500).height(250); 
    
    return plot.toObject();
}

const chart_spec_json = this.createChart(data)
const opt = {
               renderer: "canvas",
               actions: false
            };
vegaEmbed("#stats", chart_spec_json, opt);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.13.0/d3.min.js"></script>
<script src="https://unpkg.com/vega@5.21.0/build/vega.min.js"></script>
<script src="https://unpkg.com/vega-lite@5.2.0/build/vega-lite.min.js"></script>
<script src="https://www.unpkg.com/vega-embed@6.20.8/build/vega-embed.min.js"></script>
<script src="https://unpkg.com/vega-lite-api@5.0.0/build/vega-lite-api.min.js"></script>
<div id="stats" />

This is how it looks (You can also click "Run code snippet" button to see it in action):

enter image description here

The second plot shows no bars for range 240-300, presumably because there are no students with D value between 240 and 300. But why is this so? My understanding is that it should show bar of exact same height as that for 220-240 something like first chart. At least first chart does not show such gap at 240-300.

PS: Here is the observable notebook link.

Mahesha999
  • 22,693
  • 29
  • 116
  • 189

1 Answers1

2

The intended purpose of bin transform is to create a histogram which counts the number of elements on each bucket. As you said in the original post, there are buckets if no values.

Basically, you have the following data. I binned the d value first, then calculated the cumulative count.

Binned data points

If you use the line mark you almost have the desired chart but you want to use rect mark.

The rect mark requires the width and height of the rectangle, or the encoding channels x2 and y2. How can you provide the data to those channels? If you are using Vega-lite, you should use an ordinal scale in order to supply those values. Vega-lite is a simplification of the more complex grammar Vega and is not that flexible.

Fortunately, Vega-lite has the density transformation. From the title of your post, it seems to be what are you looking for. Here is the code using the density transformation.

var data = [{"student_name": "student 0", "e": "100.15", "d": "127.81"}, {"student_name": "student 1", "e": "100.30", "d": "189.94"}, {"student_name": "student 2", "e": "100.15", "d": "105.33"}, {"student_name": "student 3", "e": "99.41", "d": "85.36"}, {"student_name": "student 4", "e": "100.00", "d": "203.70"}, {"student_name": "student 5", "e": "100.15", "d": "139.05"}, {"student_name": "student 19", "e": "100.15", "d": "102.66"}, {"student_name": "student 20", "e": "95.71", "d": "52.96"}, {"student_name": "student 21", "e": "99.85", "d": "99.41"}, {"student_name": "student 22", "e": "98.96", "d": "100.44"}, {"student_name": "student 23", "e": "100.15", "d": "131.07"}, {"student_name": "student 24", "e": "99.56", "d": "76.92"}, {"student_name": "student 25", "e": "100.15", "d": "213.46"}, {"student_name": "student 26", "e": "100.15", "d": "311.24"}, {"student_name": "student 27", "e": "100.15", "d": "21.89"}, {"student_name": "student 28", "e": "96.60", "d": "6.36"}, {"student_name": "student 29", "e": "53.70", "d": "3.70"}, {"student_name": "student 30", "e": "96.75", "d": "46.60"}, {"student_name": "student 31", "e": "100.15", "d": "100.15"}, {"student_name": "student 32", "e": "100.30", "d": "115.68"}, {"student_name": "student 33", "e": "87.13", "d": "103.85"}, {"student_name": "student 34", "e": "100.15", "d": "104.14"}, {"student_name": "student 35", "e": "99.26", "d": "59.17"}, {"student_name": "student 36", "e": "100.15", "d": "171.30"}, {"student_name": "student 37", "e": "99.11", "d": "94.08"}, {"student_name": "student 38", "e": "81.66", "d": "57.40"}, {"student_name": "student 39", "e": "96.01", "d": "154.59"}, {"student_name": "student 90", "e": "1.04", "d": "1.33"}, {"student_name": "student 91", "e": "99.70", "d": "26.18"}, {"student_name": "student 92", "e": "96.30", "d": "78.11"}, {"student_name": "student 93", "e": "99.85", "d": "11.83"}, {"student_name": "student 94", "e": "100.15", "d": "172.93"}, {"student_name": "student 95", "e": "100.00", "d": "198.82"}, {"student_name": "student 96", "e": "100.15", "d": "155.92"}, {"student_name": "student 97", "e": "92.01", "d": "97.19"}, {"student_name": "student 98", "e": "98.52", "d": "71.30"}, {"student_name": "student 99", "e": "100.15", "d": "111.69"}, {"student_name": "student 100", "e": "0.30", "d": "0.30"}, {"student_name": "student 175", "e": "98.96", "d": "91.12"}, {"student_name": "student 176", "e": "100.00", "d": "226.04"}, {"student_name": "student 177", "e": "98.67", "d": "150.89"}, {"student_name": "student 178", "e": "97.49", "d": "68.79"}, {"student_name": "student 179", "e": "100.15", "d": "133.58"}, {"student_name": "student 180", "e": 0, "d": 0}, {"student_name": "student 181", "e": 0, "d": 0}, {"student_name": "student 182", "e": 0, "d": 0}, {"student_name": "student 183", "e": 0, "d": 0}, {"student_name": "student 184", "e": 0, "d": 0}, {"student_name": "student 185", "e": 0, "d": 0}, {"student_name": "student 186", "e": 0, "d": 0}, {"student_name": "student 187", "e": 0, "d": 0}, {"student_name": "student 188", "e": 0, "d": 0}, {"student_name": "student 189", "e": 0, "d": 0}, {"student_name": "student 190", "e": 0, "d": 0}, {"student_name": "student 191", "e": 0, "d": 0}];

createChart = function (data) {
  let max_d = d3.max(data, record => parseFloat(record.d));
  let max_e = d3.max(data, record => parseFloat(record.e));
  let max_y_scale_value_for_d = d3.max([100, max_d]);
  let max_y_scale_value_for_e = d3.max([100, max_e]);

  
  const plot = vl.markRect()
    .data(data)
    .transform([ { "calculate": "toNumber(datum.d)", "as": "d2" }
                ,{ "calculate": "toNumber(datum.e)", "as": "e2" }
                ,{
                    "density": "d2",
                    "cumulative": true,
                    "counts":true,
                    "steps": 20
                 }
               ])
    .encode(
      vl.y().fieldQ('density')
      .title('# students'),
      vl.x()
      .fieldO('value')//.bin(true)
      .axis({labelAngle:0,format:"d"})
      .title('D')
    ).width(500).height(250); 
    
    return plot.toObject();
}

const chart_spec_json = this.createChart(data)
const opt = {
               renderer: "canvas",
               actions: false
            };
vegaEmbed("#stats", chart_spec_json, opt);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/4.13.0/d3.min.js"></script>
<script src="https://unpkg.com/vega@5.21.0/build/vega.min.js"></script>
<script src="https://unpkg.com/vega-lite@5.2.0/build/vega-lite.min.js"></script>
<script src="https://www.unpkg.com/vega-embed@6.20.8/build/vega-embed.min.js"></script>
<script src="https://unpkg.com/vega-lite-api@5.0.0/build/vega-lite-api.min.js"></script>
<div id="stats" />
Marcelo
  • 4,234
  • 1
  • 18
  • 18
  • I have some doubts. **Q1.** What is `value` field in `vl.x().fieldO('value')`? Is it explicitly populated by `density` transform based on `steps` and `counts` specified? **Q2.** What is `"steps":20` here? It doesnt seem to be a bin size (which is16 your last plot)? Somehow, I managed to create the desired plot as shown in [this](https://observablehq.com/d/8d2d7579bec62e2f#cdf_in_js_with_minbins) notebook cell. In this I calculated cdf in plain JS (that is outside vega-lite) and plotted plain bar chart without any transform. Is it possible to imitate the same with pure vega-lite-api code? ... – Mahesha999 Jun 13 '22 at 08:26
  • ( ... continued from last comment) Notice in [this](https://observablehq.com/d/8d2d7579bec62e2f#cdf_in_js_with_minbins) notebook cell, the bars occupy whole ranges (`0-10`, `10-20` etc) and ticks (that is `0, 10, 20, ...`) appear at the end of bars and not at the center. Also, can you please share the code for your first plot? Also, what do you mean by "if you use the line mark"? Just changing `markCircle` to `markLine`? – Mahesha999 Jun 13 '22 at 08:29
  • **A1.** value field in created by the density transformation. See vega-lite [docs](https://vega.github.io/vega-lite/docs/density.html#usage). **A2.** Steps is " The exact number of samples to take along the extent domain for plotting the density". I also created a solution using js and `d3.bin` and it only created 16 buckets when I specified 20. That could be a hint why it is only using 16 buckets. The [notebook](https://observablehq.com/d/a0f2ebf37b0adafb) also has the code for the first chart. – Marcelo Jun 13 '22 at 17:56