0

I am using matplotlib and a stackedbarchart program for it that someone wrote to graph a stacked bar chart.

My graph:

x-axis has 8 income distributions, one for each bar

y-axis is the % of people in each income distribution. person type-a is the first stack, person type-b is the secon dstack, person type-c is the third stack.

My barchart is center aligned, and I am trying to figure out how to space out the bars so the graph looks better and so the labels are easier to read. Any suggestions, or clarifications?

The program is stackedBarGraph.py and the code looks like this, where widths is an array of 8 values, each corresponding to the width of a bar chart.

Let me know if you need any more information (I tried to keep everything relevant). Thanks!

Full code (I hope it's not too difficult to read):

   from __future__ import division
from pylab import * 
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher

data = csv2rec('coa.csv', delimiter=',')

x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']

df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))

#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source:
        source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))

#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source2:
        source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])

total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10

#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])

#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths,  showFirst = 8, xLabels=d_labels,scale=True)

Stackedbarchart program:

    def stackedBarPlot(self,
                       ax,                                 # axes to plot onto
                       data,                               # data to plot
                       cols,                               # colors for each level
                       xLabels = None,                     # bar specific labels
                       yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                       edgeCols=None,                      # colors for edges
                       showFirst=-1,                       # only plot the first <showFirst> bars
                       scale=False,                        # scale bars to same height
                       widths=None,                        # set widths for each bar
                       heights=None,                       # set heights for each bar
                       ylabel='',                          # label for x axis
                       xlabel=''                          # label for y axis
                       ):

#------------------------------------------------------------------------------
# data fixeratering

        # make sure this makes sense
        if showFirst != -1:
            showFirst = np.min([showFirst, np.shape(data)[0]])
            data_copy = np.copy(data[:showFirst]).transpose().astype('float')
            data_shape = np.shape(data_copy)
            if heights is not None:
                heights = heights[:showFirst]
            if widths is not None:
                widths = widths[:showFirst]
            showFirst = -1
        else:
            data_copy = np.copy(data).transpose()
        data_shape = np.shape(data_copy)

        # determine the number of bars and corresponding levels from the shape of the data
        num_bars = data_shape[1]
        levels = data_shape[0]

        if widths is None:
            widths = np.array([1] * num_bars)
            x = np.arange(num_bars)
        else:
            x = [0]
            for i in range(1, len(widths)):
                x.append(x[i-1] + (widths[i-1] + widths[i])/2)


        # stack the data --
        # replace the value in each level by the cumulative sum of all preceding levels
        data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)

        # scale the data is needed
        if scale:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            if heights is not None:
                print "WARNING: setting scale and heights does not make sense."
                heights = None
        elif heights is not None:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            for i in np.arange(num_bars):
                data_copy[:,i] *= heights[i]
                data_stack[:,i] *= heights[i]

#------------------------------------------------------------------------------
# ticks

        if yTicks is not "none":
            # it is either a set of ticks or the number of auto ticks to make
            real_ticks = True
            try:
                k = len(yTicks[1])
            except:
                real_ticks = False

            if not real_ticks:
                yTicks = float(yTicks)
                if scale:
                    # make the ticks line up to 100 %
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)
                    y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
                else:
                    # space the ticks along the y axis
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
                    y_tick_labels = np.array([str(i) for i in y_ticks_at])
                yTicks=(y_ticks_at, y_tick_labels)

#------------------------------------------------------------------------------
# plot

        if edgeCols is None:
            edgeCols = ["none"]*len(cols)

        # bars
        ax.bar(x,
               data_stack[0],
               color=cols[0],alpha=0.7,
               edgecolor=edgeCols[0],
               width=widths,
               linewidth=0.5,
               align='center'
               )
        for i in np.arange(1,levels):
            ax.bar(x,
                   data_copy[i],
                   bottom=data_stack[i-1],
                   color=cols[i],alpha=0.7,
                   edgecolor=edgeCols[i],
                   width=widths,
                   linewidth=0.5,
                   align='center'
                   )

        # borders
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["left"].set_visible(False)

        # make ticks if necessary
        if yTicks is not "none":
            ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
            ax.yaxis.tick_left()
            plt.yticks(yTicks[0], yTicks[1])
        else:
            plt.yticks([], [])

        if xLabels is not None:
            ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
            ax.xaxis.tick_bottom()
            plt.xticks(x, xLabels, rotation='horizontal')
        else:
            plt.xticks([], [])

        # limits
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
        ax.set_ylim(0, np.max(data_stack))

        # labels
        if xlabel != '':
            ax.xlabel(xlabel)
        if ylabel != '':
            ax.ylabel(ylabel)

What it looks like so far

As3adTintin
  • 2,406
  • 12
  • 33
  • 59
  • Can you provide your full code? – Ffisegydd Apr 28 '14 at 19:33
  • @Ffisegydd For sure! I updated the question w/ all the code. I hope it makes sense/ I can clarify any questions – As3adTintin Apr 28 '14 at 19:40
  • Could you post what the plot looks like now? I assume you could just update the widths to not be as large. – wflynny Apr 28 '14 at 19:53
  • 1
    Additionally, some of your code could benefit from list comprehensions and the like, such as `d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]` or `d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])`. – wflynny Apr 28 '14 at 19:55
  • @Bill Thanks for that improvement, Bill. I'm new to python and will look into that (it looks like that'll make my life way simpler). I've also added a picture, regarding the original question about spacing. – As3adTintin Apr 28 '14 at 19:57
  • I've tried using `align = 'edge'` too, but then the bars overlap – As3adTintin Apr 28 '14 at 20:39
  • Have you tried just setting a width? Like width = 0.35? I pulled it from [this example] (http://matplotlib.org/examples/pylab_examples/bar_stacked.html) – mauve Apr 29 '14 at 12:57
  • @mauve Thanks for your input. If you can see the picture I attached in the original question, I have set custom widths for each bar, however I am still trying to figure out how to coordinate that with spacing. Does that make sense? – As3adTintin Apr 29 '14 at 13:44
  • It does. Perhaps try multiplying the widths by 0.75 and see how it looks. i.e., as it currently stands, they are exactly as wide as their range on the x-axis and if you want space between them, you need to make them a % of that value. – mauve Apr 29 '14 at 13:58
  • oh interesting. i'll play with that and get back. that would be such a great solution – As3adTintin Apr 29 '14 at 14:00
  • @mauve I tried doing that, in addition to setting the ax.set_xlim. Unfortunately it just adds space on the right and not in between the bars. – As3adTintin Apr 29 '14 at 14:07

1 Answers1

0

Alright thanks everyone for the input (and Bill for showing me how to use list comprehensions effectively).

I was able to alter the program to achieve what I wanted (I think). I added a new variable, axspacing to the below parts of the program:

def stackedBarPlot(self,
                   ax,                                 # axes to plot onto
                   data,                               # data to plot
                   cols,                               # colors for each level
                   xLabels = None,                     # bar specific labels
                   yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                   edgeCols=None,                      # colors for edges
                   showFirst=-1,                       # only plot the first <showFirst> bars
                   scale=False,                        # scale bars to same height
                   widths=None,                        # set widths for each bar
                   heights=None,                       # set heights for each bar
                   ylabel='',                          # label for x axis
                   xlabel='',                          # label for y axis
                   xaxlim=None,
                   axspacing=0,
                   ):

.

    if widths is None:
        widths = np.array([1] * num_bars)
        x = np.arange(num_bars)
    else:
        x = [0]
        for i in range(1, len(widths)):
            x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)

.

    # limits
    #ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
    ax.set_ylim(0, np.max(data_stack))
    if xaxlim is None:
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
    else:
        ax.set_xlim(xaxlim)
As3adTintin
  • 2,406
  • 12
  • 33
  • 59
  • the ax.set_xlim still adds a little extra space to the right, but if I make the facecolor white and bgcolor white, it becomes unnoticeable. – As3adTintin Apr 29 '14 at 14:34