1

I am trying to create box plots with whiskers on d3plus, to compare a metric between "this" provider and "all" providers in my database. I have already calculated the 5-statistics summary from very large arrays in a pre-processing stage, and I am passing these into the data variable. If you run the attached snippet (modified from the d3plus website example), you can see how the code and the plots look like. However, in reality, there are some extreme outliers in the "All" category, given into my code as a commented line: 663373.22 for "max" (for "All") and -18427.39 for "min" (again, for "All").

I saw there is a mute method I can use for .y, but I am not sure how I would make it work with a function.

Is there a fairly straightforward way to "suppress" the drawing of these extreme outliers (which would collapse both box plots to flat lines if allowed), or any extreme outliers that are, say, higher than (or lower than) some multiple of the IQR?

<script src="//d3plus.org/js/d3.js"></script>
<script src="//d3plus.org/js/d3plus.js"></script>

<div id="viz"></div>

<script>
  var data = [
    {"provider": "This", "name":"min", "value": -1055.79},
    {"provider": "This", "name":"q1", "value": -172.819},
    {"provider": "This", "name":"med", "value": -46.795},
    {"provider": "This", "name":"q3", "value": 8.378},
    {"provider": "This", "name":"max", "value": 1033.15},
    {"provider": "All", "name":"min", "value": -1000},
    {"provider": "All", "name":"q1", "value": -111.60999999999999},
    {"provider": "All", "name":"med", "value": -13.92},
    {"provider": "All", "name":"q3", "value": 124.22},
    {"provider": "All", "name":"max", "value": 1000}
  ]
  //663373.22, -18427.39
  var visualization = d3plus.viz()
    .container("#viz")
    .data(data)
    .type("box")
    .id("name")
    .x("provider")
    .y("value")
    .ui([{ 
        "label": "Visualization Type",
        "method": "type", 
        "value": ["scatter","box"]
      }])
    .draw()
</script>
nvergos
  • 432
  • 3
  • 15

1 Answers1

2

From what i understand of the docs, mute is used as an object key, where the value can be a string, function, or array. So if you wanted to mute all datapoints whose name is q1, you would do .y({value:'value',mute:'q1'}).

As for the solution, you need a function to determine outliers. I found and edited a function to return the acceptable value range (IQR * multiple), you can modify the multiple variable to your needs. Then you use a simple function isOutlier, to check if the passed in value is outside the range.

<script src="//d3plus.org/js/d3.js"></script>
<script src="//d3plus.org/js/d3plus.js"></script>

<div id="viz"></div>

<script>
  var data = [
    {"provider": "This", "name":"min", "value": -1055.79},
    {"provider": "This", "name":"q1", "value": -172.819},
    {"provider": "This", "name":"med", "value": -46.795},
    {"provider": "This", "name":"q3", "value": 8.378},
    {"provider": "This", "name":"max", "value": 1033.15},
    {"provider": "All", "name":"min", "value": -1000},
    {"provider": "All", "name":"q1", "value": -111.60999999999999},
    {"provider": "All", "name":"med", "value": -13.92},
    {"provider": "All", "name":"q3", "value": 124.22},
    {"provider": "All", "name":"max", "value": 1000}
  ]
 
  var outlierRange = outlierRange(data.map(d=>d.value))
  var isOutlier = val =>  (val < outlierRange[0] || val > outlierRange[1])

  //663373.22, -18427.39
  var visualization = d3plus.viz()
    .container("#viz")
    .data(data)
    .type("box")
    .id("name")
    .x("provider")
    .y({value:'value', mute: isOutlier })
    .ui([{ 
        "label": "Visualization Type",
        "method": "type", 
        "value": ["scatter","box"]
      }])
    .draw()
  
function outlierRange(someArray) {

  if(someArray.length < 4)
return someArray;

  let values, q1, q3, iqr, maxValue, minValue, multiple = 0.5;

  values = someArray.slice().sort( (a, b) => a - b);//copy array fast and sort

  if((values.length / 4) % 1 === 0){//find quartiles
q1 = 1/2 * (values[(values.length / 4)] + values[(values.length / 4) + 1]);
q3 = 1/2 * (values[(values.length * (3 / 4))] + values[(values.length * (3 / 4)) + 1]);
  } else {
q1 = values[Math.floor(values.length / 4 + 1)];
q3 = values[Math.ceil(values.length * (3 / 4) + 1)];
  }

  iqr = q3 - q1;
  maxValue = q3 + iqr * multiple;
  minValue = q1 - iqr * multiple;

  return [minValue, maxValue]
}




</script>
Eric Guan
  • 15,474
  • 8
  • 50
  • 61