0

I have a small function that is intended to take two lists of numbers and to compare them using their respective histograms overlaid and using a ratio plot. The bin width of the ratio plot is inconsistent with the bin width of the overlaid histograms. How could the ratio plot be made to have bin widths identical to those of the overlaid histograms?

import numpy
import matplotlib.pyplot
import datavision # sudo pip install datavision
import shijian    # sudo pip install shijian

def main():

    a = numpy.random.normal(2, 2, size = 120)
    b = numpy.random.normal(2, 2, size = 120)

    save_histogram_comparison_matplotlib(
        values_1      = a,
        values_2      = b,
        label_1       = "a",
        label_2       = "b",
        normalize     = True,
        label_ratio_x = "frequency",
        label_y       = "",
        title         = "comparison of a and b",
        filename      = "test.png"
    )

def save_histogram_comparison_matplotlib(
    values_1       = None,
    values_2       = None,
    filename       = None,
    number_of_bins = None,
    normalize      = True,
    label_x        = "",
    label_y        = None,
    label_ratio_x  = "frequency",
    label_ratio_y  = "ratio",
    title          = None,
    label_1        = "1",
    label_2        = "2",
    overwrite      = True,
    LaTeX          = False
    ):

    matplotlib.pyplot.ioff()
    if LaTeX is True:
        matplotlib.pyplot.rc("text", usetex = True)
        matplotlib.pyplot.rc("font", family = "serif")
    if number_of_bins is None:
        number_of_bins_1 = datavision.propose_number_of_bins(values_1)
        number_of_bins_2 = datavision.propose_number_of_bins(values_2)
        number_of_bins   = int((number_of_bins_1 + number_of_bins_2) / 2)
    if filename is None:
        filename = shijian.propose_filename(
            filename  = title.replace(" ", "_") + ".png",
            overwrite = overwrite
        )

    values = []
    values.append(values_1)
    values.append(values_2)
    figure, (axis_1, axis_2) = matplotlib.pyplot.subplots(nrows = 2)
    ns, bins, patches = axis_1.hist(
        values,
        normed   = normalize,
        histtype = "stepfilled",
        bins     = number_of_bins,
        alpha    = 0.5,
        label    = [label_1, label_2]
    )
    axis_1.legend()
    axis_2.bar(
        bins[:-1],
        ns[0] / ns[1],
        alpha = 1,
    )
    axis_1.set_xlabel(label_x)
    axis_1.set_ylabel(label_y)
    axis_2.set_xlabel(label_ratio_x)
    axis_2.set_ylabel(label_ratio_y)
    matplotlib.pyplot.title(title)
    matplotlib.pyplot.savefig(filename)
    matplotlib.pyplot.close()

if __name__ == "__main__":
    main()

EDIT: temporary scratchboard because coding in comments isn't reasonable

import numpy
import matplotlib.pyplot
import datavision
import shijian

def main():

    a = numpy.random.normal(2, 2, size = 120)
    b = numpy.random.normal(2, 2, size = 120)

    save_histogram_comparison_matplotlib(
        values_1      = a,
        values_2      = b,
        label_1       = "a",
        label_2       = "b",
        normalize     = True,
        label_ratio_x = "frequency",
        label_y       = "",
        title         = "comparison of a and b",
        filename      = "test.png"
    )

def save_histogram_comparison_matplotlib(
    values_1       = None,
    values_2       = None,
    filename       = None,
    number_of_bins = None,
    normalize      = True,
    label_x        = "",
    label_y        = None,
    label_ratio_x  = "frequency",
    label_ratio_y  = "ratio",
    title          = None,
    label_1        = "1",
    label_2        = "2",
    overwrite      = True,
    LaTeX          = False
    ):

    matplotlib.pyplot.ioff()
    if LaTeX is True:
        matplotlib.pyplot.rc("text", usetex = True)
        matplotlib.pyplot.rc("font", family = "serif")
    if number_of_bins is None:
        number_of_bins_1 = datavision.propose_number_of_bins(values_1)
        number_of_bins_2 = datavision.propose_number_of_bins(values_2)
        number_of_bins   = int((number_of_bins_1 + number_of_bins_2) / 2)
    if filename is None:
        filename = shijian.propose_filename(
            filename  = title.replace(" ", "_") + ".png",
            overwrite = overwrite
        )

    bar_width = 1
    values = []
    values.append(values_1)
    values.append(values_2)
    figure, (axis_1, axis_2) = matplotlib.pyplot.subplots(nrows = 2)
    ns, bins, patches = axis_1.hist(
        values,
        normed   = normalize,
        histtype = "stepfilled",
        bins     = number_of_bins,
        alpha    = 0.5,
        label    = [label_1, label_2],
        rwidth   = bar_width
    )
    axis_1.legend()
    axis_2.bar(
        bins[:-1],
        ns[0] / ns[1],
        alpha = 1,
        width = bar_width
    )
    axis_1.set_xlabel(label_x)
    axis_1.set_ylabel(label_y)
    axis_2.set_xlabel(label_ratio_x)
    axis_2.set_ylabel(label_ratio_y)
    matplotlib.pyplot.title(title)
    matplotlib.pyplot.savefig(filename)
    matplotlib.pyplot.close()

if __name__ == "__main__":
    main()
d3pd
  • 7,935
  • 24
  • 76
  • 127
  • 1
    Have you tried passing `rwidth=1` to `hist` and `width=1` to `bar`? Assuming that the width of each bin is `1`. the `r` in `rwidth` means "relative". – Andras Deak -- Слава Україні Jan 20 '16 at 13:46
  • @AndrasDeak Thanks for your comment. I wasn't familiar with these options. Assuming I've got the histogram overlay plotted, how can I then know what value to give to `bar` for `width`? – d3pd Jan 20 '16 at 14:15
  • Since it seems that you want to plot your bars back-to-back, you probably need to set `rwidth=1` in `hist` and correspondingly `width=bins[1]-bins[0]` in `bar` or something similar. You might have to also adjust the alignment of the bars in either plots. – Andras Deak -- Слава Україні Jan 20 '16 at 14:18

1 Answers1

1

You need the rwidth parameter in your axis_1.hist(..) call

You can adjust rwidth and bins to match your axis_2.bar(...) call (default width in bar is 0.8).

e.g.

matplotlib.pyplot.hist(a,bins=6,rwidth=0.8)

enter image description here

Lee
  • 29,398
  • 28
  • 117
  • 170
  • Hey there. Thanks for your help on this. I'm not sure I follow. I've tried this (code shown above in the "scratchboard" section -- feel free to edit) and the bin widths are not equal. Assuming I've got the histogram overlay plotted, how can I then know what value to give to `bar` for `width`? – d3pd Jan 20 '16 at 14:18