-2

I am using the following code:

tmp = (merged_df_2014.groupby(['CRG', pd.to_datetime(merged_df_2014['yearmonth'], format='%Y%m')])
       ['credit_application'].mean().reset_index(name='probability of application')
      )

ax = sns.lineplot(data=tmp, x='yearmonth', y='probability of application', hue='CRG')
ax.tick_params(axis='x', rotation=45)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

on the following data (tmp.to_dict()):

{'CRG': {0: 0.0,
  1: 0.0,
  2: 0.0,
  3: 0.0,
  4: 0.0,
  5: 0.0,
  6: 0.0,
  7: 0.0,
  8: 0.0,
  9: 0.0,
  10: 0.0,
  11: 0.0,
  12: 1.0,
  13: 1.0,
  14: 1.0,
  15: 1.0,
  16: 1.0,
  17: 1.0,
  18: 1.0,
  19: 1.0,
  20: 1.0,
  21: 1.0,
  22: 1.0,
  23: 1.0,
  24: 2.0,
  25: 2.0,
  26: 2.0,
  27: 2.0,
  28: 2.0,
  29: 2.0,
  30: 2.0,
  31: 2.0,
  32: 2.0,
  33: 2.0,
  34: 2.0,
  35: 2.0,
  36: 3.0,
  37: 3.0,
  38: 3.0,
  39: 3.0,
  40: 3.0,
  41: 3.0,
  42: 3.0,
  43: 3.0,
  44: 3.0,
  45: 3.0,
  46: 3.0,
  47: 3.0,
  48: 4.0,
  49: 4.0,
  50: 4.0,
  51: 4.0,
  52: 4.0,
  53: 4.0,
  54: 4.0,
  55: 4.0,
  56: 4.0,
  57: 4.0,
  58: 4.0,
  59: 4.0,
  60: 5.0,
  61: 5.0,
  62: 5.0,
  63: 5.0,
  64: 5.0,
  65: 5.0,
  66: 5.0,
  67: 5.0,
  68: 5.0,
  69: 5.0,
  70: 5.0,
  71: 5.0,
  72: 7.0,
  73: 7.0,
  74: 7.0,
  75: 7.0,
  76: 7.0,
  77: 7.0,
  78: 7.0,
  79: 7.0,
  80: 7.0,
  81: 7.0,
  82: 7.0,
  83: 7.0},
 'yearmonth': {0: Timestamp('2014-01-01 00:00:00'),
  1: Timestamp('2014-02-01 00:00:00'),
  2: Timestamp('2014-03-01 00:00:00'),
  3: Timestamp('2014-04-01 00:00:00'),
  4: Timestamp('2014-05-01 00:00:00'),
  5: Timestamp('2014-06-01 00:00:00'),
  6: Timestamp('2014-07-01 00:00:00'),
  7: Timestamp('2014-08-01 00:00:00'),
  8: Timestamp('2014-09-01 00:00:00'),
  9: Timestamp('2014-10-01 00:00:00'),
  10: Timestamp('2014-11-01 00:00:00'),
  11: Timestamp('2014-12-01 00:00:00'),
  12: Timestamp('2014-01-01 00:00:00'),
  13: Timestamp('2014-02-01 00:00:00'),
  14: Timestamp('2014-03-01 00:00:00'),
  15: Timestamp('2014-04-01 00:00:00'),
  16: Timestamp('2014-05-01 00:00:00'),
  17: Timestamp('2014-06-01 00:00:00'),
  18: Timestamp('2014-07-01 00:00:00'),
  19: Timestamp('2014-08-01 00:00:00'),
  20: Timestamp('2014-09-01 00:00:00'),
  21: Timestamp('2014-10-01 00:00:00'),
  22: Timestamp('2014-11-01 00:00:00'),
  23: Timestamp('2014-12-01 00:00:00'),
  24: Timestamp('2014-01-01 00:00:00'),
  25: Timestamp('2014-02-01 00:00:00'),
  26: Timestamp('2014-03-01 00:00:00'),
  27: Timestamp('2014-04-01 00:00:00'),
  28: Timestamp('2014-05-01 00:00:00'),
  29: Timestamp('2014-06-01 00:00:00'),
  30: Timestamp('2014-07-01 00:00:00'),
  31: Timestamp('2014-08-01 00:00:00'),
  32: Timestamp('2014-09-01 00:00:00'),
  33: Timestamp('2014-10-01 00:00:00'),
  34: Timestamp('2014-11-01 00:00:00'),
  35: Timestamp('2014-12-01 00:00:00'),
  36: Timestamp('2014-01-01 00:00:00'),
  37: Timestamp('2014-02-01 00:00:00'),
  38: Timestamp('2014-03-01 00:00:00'),
  39: Timestamp('2014-04-01 00:00:00'),
  40: Timestamp('2014-05-01 00:00:00'),
  41: Timestamp('2014-06-01 00:00:00'),
  42: Timestamp('2014-07-01 00:00:00'),
  43: Timestamp('2014-08-01 00:00:00'),
  44: Timestamp('2014-09-01 00:00:00'),
  45: Timestamp('2014-10-01 00:00:00'),
  46: Timestamp('2014-11-01 00:00:00'),
  47: Timestamp('2014-12-01 00:00:00'),
  48: Timestamp('2014-01-01 00:00:00'),
  49: Timestamp('2014-02-01 00:00:00'),
  50: Timestamp('2014-03-01 00:00:00'),
  51: Timestamp('2014-04-01 00:00:00'),
  52: Timestamp('2014-05-01 00:00:00'),
  53: Timestamp('2014-06-01 00:00:00'),
  54: Timestamp('2014-07-01 00:00:00'),
  55: Timestamp('2014-08-01 00:00:00'),
  56: Timestamp('2014-09-01 00:00:00'),
  57: Timestamp('2014-10-01 00:00:00'),
  58: Timestamp('2014-11-01 00:00:00'),
  59: Timestamp('2014-12-01 00:00:00'),
  60: Timestamp('2014-01-01 00:00:00'),
  61: Timestamp('2014-02-01 00:00:00'),
  62: Timestamp('2014-03-01 00:00:00'),
  63: Timestamp('2014-04-01 00:00:00'),
  64: Timestamp('2014-05-01 00:00:00'),
  65: Timestamp('2014-06-01 00:00:00'),
  66: Timestamp('2014-07-01 00:00:00'),
  67: Timestamp('2014-08-01 00:00:00'),
  68: Timestamp('2014-09-01 00:00:00'),
  69: Timestamp('2014-10-01 00:00:00'),
  70: Timestamp('2014-11-01 00:00:00'),
  71: Timestamp('2014-12-01 00:00:00'),
  72: Timestamp('2014-01-01 00:00:00'),
  73: Timestamp('2014-02-01 00:00:00'),
  74: Timestamp('2014-03-01 00:00:00'),
  75: Timestamp('2014-04-01 00:00:00'),
  76: Timestamp('2014-05-01 00:00:00'),
  77: Timestamp('2014-06-01 00:00:00'),
  78: Timestamp('2014-07-01 00:00:00'),
  79: Timestamp('2014-08-01 00:00:00'),
  80: Timestamp('2014-09-01 00:00:00'),
  81: Timestamp('2014-10-01 00:00:00'),
  82: Timestamp('2014-11-01 00:00:00'),
  83: Timestamp('2014-12-01 00:00:00')},
 'probability of application': {0: 0.029411764705882353,
  1: 0.029411764705882353,
  2: 0.0058823529411764705,
  3: 0.03488372093023256,
  4: 0.029411764705882353,
  5: 0.01764705882352941,
  6: 0.028901734104046242,
  7: 0.023529411764705882,
  8: 0.047337278106508875,
  9: 0.04046242774566474,
  10: 0.04093567251461988,
  11: 0.04142011834319527,
  12: 0.03676470588235294,
  13: 0.03676470588235294,
  14: 0.051470588235294115,
  15: 0.014705882352941176,
  16: 0.058823529411764705,
  17: 0.029197080291970802,
  18: 0.021897810218978103,
  19: 0.0364963503649635,
  20: 0.021897810218978103,
  21: 0.043795620437956206,
  22: 0.0364963503649635,
  23: 0.021897810218978103,
  24: 0.08870967741935484,
  25: 0.04032258064516129,
  26: 0.07258064516129033,
  27: 0.0967741935483871,
  28: 0.07258064516129033,
  29: 0.08064516129032258,
  30: 0.056451612903225805,
  31: 0.072,
  32: 0.06349206349206349,
  33: 0.03937007874015748,
  34: 0.06299212598425197,
  35: 0.15625,
  36: 0.050359712230215826,
  37: 0.05755395683453238,
  38: 0.07553956834532374,
  39: 0.06115107913669065,
  40: 0.07885304659498207,
  41: 0.06785714285714285,
  42: 0.07857142857142857,
  43: 0.06382978723404255,
  44: 0.09574468085106383,
  45: 0.06028368794326241,
  46: 0.0711743772241993,
  47: 0.06761565836298933,
  48: 0.08955223880597014,
  49: 0.11940298507462686,
  50: 0.08955223880597014,
  51: 0.014925373134328358,
  52: 0.16417910447761194,
  53: 0.1791044776119403,
  54: 0.13432835820895522,
  55: 0.04477611940298507,
  56: 0.13432835820895522,
  57: 0.05970149253731343,
  58: 0.1044776119402985,
  59: 0.08955223880597014,
  60: 0.16666666666666666,
  61: 0.08333333333333333,
  62: 0.125,
  63: 0.20833333333333334,
  64: 0.041666666666666664,
  65: 0.125,
  66: 0.2916666666666667,
  67: 0.125,
  68: 0.16666666666666666,
  69: 0.16666666666666666,
  70: 0.16666666666666666,
  71: 0.16666666666666666,
  72: 0.0916030534351145,
  73: 0.11363636363636363,
  74: 0.06060606060606061,
  75: 0.07633587786259542,
  76: 0.10606060606060606,
  77: 0.1076923076923077,
  78: 0.1297709923664122,
  79: 0.1,
  80: 0.15384615384615385,
  81: 0.11450381679389313,
  82: 0.10687022900763359,
  83: 0.09448818897637795}}

and I am getting this plot: enter image description here

As you can see, the legend is not displaying all 7 values of the categorical variable "CRG" (0,1,2,3,4,5,7). Instead, it shows only 5 of them and somehow "averages" the labels creating non-existing values such as "1.5" and "4.5"

How can I fix this?

Nick
  • 2,924
  • 4
  • 36
  • 43

0 Answers0