4

I am trying to draw sequence logos using matplotlib. The entire code is available on gist

The relevant portion is:

class Scale(matplotlib.patheffects.RendererBase):
    def __init__(self, sx, sy=None):
        self._sx = sx
        self._sy = sy

    def draw_path(self, renderer, gc, tpath, affine, rgbFace):
        affine = affine.identity().scale(self._sx, self._sy)+affine
        renderer.draw_path(gc, tpath, affine, rgbFace)


def draw_logo(all_scores):
   fig = plt.figure()
   fig.set_size_inches(len(all_scores),2.5)
   ax = fig.add_subplot(111)
   ax.set_xticks(range(len(all_scores)))

   xshift = 0
   trans_offset = transforms.offset_copy(ax.transAxes, 
                                  fig=fig, 
                                  x=0, 
                                  y=0, 
                                  units='points')


   for scores in all_scores:
       yshift = 0
       for base, score in scores:
           txt = ax.text(0, 
                        0, 
                       base, 
                      transform=trans_offset,
                      fontsize=80, 
                      color=COLOR_SCHEME[base],
                      weight='bold',
                      ha='center',
                      family='sans-serif'
                      )
        txt.set_clip_on(False) 
        txt.set_path_effects([Scale(1.0, score)])
        fig.canvas.draw()
        window_ext = txt.get_window_extent(txt._renderer)
        yshift = window_ext.height*score
        trans_offset = transforms.offset_copy(txt._transform, fig=fig, y=yshift, units='points')
    xshift += window_ext.width
    trans_offset = transforms.offset_copy(ax.transAxes, fig=fig, x=xshift, units='points')


   ax.set_yticks(range(0,3))


   seaborn.despine(ax=ax, offset=30, trim=True)
   ax.set_xticklabels(range(1,len(all_scores)+1), rotation=90)
   ax.set_yticklabels(np.arange(0,3,1))
   plt.show()


ALL_SCORES1 = [[('C', 0.02247014831444764),
          ('T', 0.057903843733384308),
          ('A', 0.10370837683591219),
          ('G', 0.24803586793255664)],
         [('T', 0.046608227674354567),
          ('G', 0.048827667087419063),
          ('A', 0.084338697696451109),
          ('C', 0.92994511407402669)],
         [('G', 0.0),
          ('T', 0.011098351287382456),
          ('A', 0.022196702574764911),
          ('C', 1.8164301607015951)],
         [('C', 0.020803153636453006),
          ('T', 0.078011826136698756),
          ('G', 0.11268374886412044),
          ('A', 0.65529933954826969)],
         [('T', 0.017393530660176126),
          ('A', 0.030438678655308221),
          ('G', 0.22611589858228964),
          ('C', 0.45078233627623127)],
         [('G', 0.022364103549245576),
          ('A', 0.043412671595594352),
          ('T', 0.097349627214363091),
          ('C', 0.1657574733649966)],
         [('C', 0.03264675899941203),
          ('T', 0.045203204768416654),
          ('G', 0.082872542075430544),
          ('A', 1.0949220710572034)],
         [('C', 0.0),
          ('T', 0.0076232429756614498),
          ('A', 0.011434864463492175),
          ('G', 1.8867526364762088)],
         [('C', 0.0018955903000026028),
          ('T', 0.0094779515000130137),
          ('A', 0.35637097640048931),
          ('G', 0.58005063180079641)],
         [('A', 0.01594690817903021),
          ('C', 0.017541598996933229),
          ('T', 0.2774762023151256),
          ('G', 0.48638069946042134)],
         [('A', 0.003770051401807444),
          ('C', 0.0075401028036148881),
          ('T', 0.011310154205422331),
          ('G', 1.8624053924928772)],
         [('C', 0.036479877757360731),
          ('A', 0.041691288865555121),
          ('T', 0.072959755514721461),
          ('G', 1.1517218549109602)],
         [('G', 0.011831087684038642),
          ('T', 0.068620308567424126),
          ('A', 0.10174735408273231),
          ('C', 1.0009100180696691)],
         [('C', 0.015871770937774379),
          ('T', 0.018757547471915176),
          ('A', 0.32176408355669878),
          ('G', 0.36505073156881074)],
         [('A', 0.022798100897300954),
          ('T', 0.024064662058262118),
          ('G', 0.24571286522646588),
          ('C', 0.34070495229855319)]]

ALL_SCORES2 = [[('A', 0.01653482213365913),
          ('G', 0.026710097292833978),
          ('C', 0.035613463057111966),
          ('T', 0.057235922770358522)],
         [('C', 0.020055669245080433),
          ('G', 0.023816107228533015),
          ('A', 0.031336983195438178),
          ('T', 0.058913528407423782)],
         [('T', 0.018666958185377256),
          ('G', 0.084001311834197651),
          ('A', 0.093334790926886277),
          ('C', 0.30333807051238043)],
         [('C', 0.0),
          ('G', 0.0),
          ('A', 0.32027512306044359),
          ('T', 0.82203948252180525)],
         [('C', 0.012698627658037786),
          ('A', 0.053334236163758708),
          ('T', 0.096509570201087178),
          ('G', 0.10920819785912497)],
         [('C', 0.0),
          ('G', 0.089472611853783468),
          ('A', 0.1930724782107959),
          ('T', 0.22132698721725386)],
         [('C', 0.020962390607965918),
          ('A', 0.026202988259957396),
          ('G', 0.066380903591892068),
          ('T', 0.07336836712788071)],
         [('G', 0.0),
          ('A', 0.10236420974570831),
          ('C', 0.15354631461856247),
          ('T', 0.29173799777526871)],
         [('G', 0.027681850851852024),
          ('C', 0.089966015268519078),
          ('A', 0.089966015268519078),
          ('T', 0.53287562889815143)],
         [('A', 0.034165612000664765),
          ('C', 0.06833122400132953),
          ('G', 0.072601925501412631),
          ('T', 0.28186629900548432)],
         [('G', 0.0),
          ('A', 0.037325935579058833),
          ('C', 0.23328709736911771),
          ('T', 0.72785574379164719)],
         [('A', 0.017470244196759552),
          ('C', 0.062892879108334396),
          ('G', 0.094339318662501587),
          ('T', 0.19916078384305891)],
         [('G', 0.0),
          ('A', 0.096447131567581681),
          ('C', 0.15844885900388422),
          ('T', 0.48223565783790845)],
         [('G', 0.0),
          ('A', 0.069291952024925829),
          ('C', 0.20787585607477749),
          ('T', 0.46425607856700307)],
         [('G', 0.0),
          ('A', 0.0),
          ('C', 0.21713201856318373),
          ('T', 1.1495224512168551)],
         [('G', 0.0),
          ('A', 0.048934292002649343),
          ('T', 0.27263391258618919),
          ('C', 0.42642740173737281)],
         [('A', 0.0),
          ('G', 0.053607190685875404),
          ('C', 0.2054942309625224),
          ('T', 0.69689347891638032)],
         [('G', 0.0),
          ('A', 0.0),
          ('C', 0.31312908494534769),
          ('T', 0.84220926295645249)],
         [('G', 0.0),
          ('C', 0.068079835765814778),
          ('A', 0.068079835765814778),
          ('T', 1.3207488138568066)],
         [('G', 0.020257705570431345),
          ('A', 0.020257705570431345),
          ('C', 0.048618493369035232),
          ('T', 0.055371061892512348)],
         [('G', 0.0),
          ('A', 0.076286510680262556),
          ('C', 0.20538675952378382),
          ('T', 0.34622339462580698)]]

Output for `ALL_SCORE2': enter image description here

Desired output: enter image description here

As seen in the notebook, the xticklabels do not align well with the alphabets. I would want to be able to apply offset_copy transforms on the xticks too, so that the centers of the alphabets align with the ticks.

Update

I have wrapped this up as a python package in pyseqlogo

rightskewed
  • 624
  • 2
  • 11
  • 24

3 Answers3

9

I would suggest to work in data coordinates and unit coordinates throughout the script.

gact.py
Script to plot a letter at a given position with a given scale. One obstacle is that the letters created by TextPath have their lower left corner at relative position (0,0) and that they don't span the complete range up to (1,1). We therefore need to shift the TextPath to the left such that the lower center of the letter is at (0,0) and introduce a globscale scaling parameter, which makes the letter 1 unit in height. Unfortunately this depends on the font being used, so for a different font, one has to adapt the x coordinate and the globscale parameter again.

import matplotlib as mpl
from matplotlib.text import TextPath
from matplotlib.patches import PathPatch
from matplotlib.font_manager import FontProperties

fp = FontProperties(family="Arial", weight="bold") 
globscale = 1.35
LETTERS = { "T" : TextPath((-0.305, 0), "T", size=1, prop=fp),
            "G" : TextPath((-0.384, 0), "G", size=1, prop=fp),
            "A" : TextPath((-0.35, 0), "A", size=1, prop=fp),
            "C" : TextPath((-0.366, 0), "C", size=1, prop=fp) }
COLOR_SCHEME = {'G': 'orange', 
                'A': 'red', 
                'C': 'blue', 
                'T': 'darkgreen'}

def letterAt(letter, x, y, yscale=1, ax=None):
    text = LETTERS[letter]

    t = mpl.transforms.Affine2D().scale(1*globscale, yscale*globscale) + \
        mpl.transforms.Affine2D().translate(x,y) + ax.transData
    p = PathPatch(text, lw=0, fc=COLOR_SCHEME[letter],  transform=t)
    if ax != None:
        ax.add_artist(p)
    return p

running code

import matplotlib.pyplot as plt
import gact

fig, ax = plt.subplots(figsize=(10,3))

all_scores = ALL_SCORES2
x = 1
maxi = 0
for scores in all_scores:
    y = 0
    for base, score in scores:
        gact.letterAt(base, x,y, score, ax)
        y += score
    x += 1
    maxi = max(maxi, y)

plt.xticks(range(1,x))
plt.xlim((0, x)) 
plt.ylim((0, maxi)) 
plt.tight_layout()      
plt.show()

enter image description here

ImportanceOfBeingErnest
  • 321,279
  • 53
  • 665
  • 712
  • 2
    this looks great! How did you decide the `TextPath` coordinates? – rightskewed Mar 06 '17 at 22:19
  • I edited the answer to hopefully be more clear about the coordinates. If you need a higher accuracy, you might need to measure the height of the letters more accurtely to account for in the `globscale` parameter. – ImportanceOfBeingErnest Mar 07 '17 at 07:20
  • Yeah this isn't very accurate with `TextPath`: I found that each letter needs a different `globscale` parameter to be the same height, so I wouldn't use this function for e.g. a scientific paper... – Cokes Jun 22 '17 at 17:59
1

I was able to workaround by using the screen coordinates:

def draw_logo(all_scores, fontfamily='Arial', size=80):
    mpl.rcParams['font.family'] = fontfamily

    fig, ax = plt.subplots(figsize=(len(all_scores), 2.5))

    font = FontProperties()
    font.set_size(size)
    font.set_weight('bold')

    #font.set_family(fontfamily)

    ax.set_xticks(range(1,len(all_scores)+1))    
    ax.set_yticks(range(0,3))
    ax.set_xticklabels(range(1,len(all_scores)+1), rotation=90)
    ax.set_yticklabels(np.arange(0,3,1))    
    seaborn.despine(ax=ax, trim=True)

    trans_offset = transforms.offset_copy(ax.transData, 
                                      fig=fig, 
                                      x=1, 
                                      y=0, 
                                      units='dots')

   for index, scores in enumerate(all_scores):
      yshift = 0
      for base, score in scores:
         txt = ax.text(index+1, 
                      0, 
                      base, 
                      transform=trans_offset,
                      fontsize=80, 
                      color=COLOR_SCHEME[base],
                      ha='center',
                      fontproperties=font,

                     )
        txt.set_path_effects([Scale(1.0, score)])
        fig.canvas.draw()
        window_ext = txt.get_window_extent(txt._renderer)
        yshift = window_ext.height*score
        trans_offset = transforms.offset_copy(txt._transform, 
                                              fig=fig,
                                              y=yshift,
                                              units='points')
    trans_offset = transforms.offset_copy(ax.transData, 
                                          fig=fig, 
                                          x=1, 
                                          y=0, 
                                          units='points')    
plt.show()

Examples:

enter image description here

enter image description here

Link to Jupyter Notebook

pyseqlogo

dmslogo

rightskewed
  • 624
  • 2
  • 11
  • 24
  • 1
    I recommend against working directly in screen coordinates, it will break if you change the figure size or dpi. – tacaswell Apr 01 '17 at 20:47
  • @tacaswell The figure size is dependent on the motif length. I want to keep dpi fixed. – rightskewed Apr 05 '17 at 17:14
  • 1
    @rightskewed Do you know how to eliminate the space of the stacked letter? Thanks. – J.G May 14 '18 at 18:46
  • I have same question as @JiaGuo – a06e May 15 '20 at 12:22
  • I am trying to call this from Julia, so it is very awkward to define a custom type like `Scale.` Is there a way to bypass defining a `Scale` class? Any other approach to do the scaling? Thanks! – a06e May 15 '20 at 12:38
1

I simplified the code to make it generalizable to DPI/screen coordinates. The major change is that it draws the first character and then measures its screen width/height and then uses those parameters for the rest.

from matplotlib.patheffects import RendererBase
from matplotlib.transforms import offset_copy
from matplotlib.patches import Rectangle

class Scale(RendererBase):
    def __init__(self, sx, sy=None):
        self._sx = sx
        self._sy = sy

    def draw_path(self, renderer, gc, tpath, affine, rgbFace):
        affine = affine.identity().scale(self._sx, self._sy) + affine
        renderer.draw_path(gc, tpath, affine, rgbFace)

iupac = "XACMGRSVTWYHKDBN"

iupac_colors = ['black'] * 16
iupac_colors[1<<0] = 'blue'
iupac_colors[1<<1] = 'red'
iupac_colors[1<<2] = 'green'
iupac_colors[1<<3] = 'gold'     

fig, ax = plt.subplots()
fig.set_size_inches(4, 1)


w=25
ax.set_xlim(0, w)

xoffset=int((w-pwm.shape[1])/2)

wscale=1
for i in range(pwm.shape[1]):
    yshift = 0
    for j in range(4):
        base=iupac[(1<<j)]
        color=iupac_colors[(1<<j)]
        scale=pwm[j, i]

        t = ax.text(i+xoffset, yshift, base, ha='left', va='baseline', color=color, fontsize=80, family='monospace', weight='bold')

        if i==0 and j==0:
            fig.canvas.draw()
            ext = t.get_window_extent(t._renderer)

            x0, x1, y0, y1 = ext.x0, ext.x1, ext.y0, ext.y1
            nx0, ny0 = ax.transData.inverted().transform((x0, y0))
            nx1, ny1 = ax.transData.inverted().transform((x1, y1))

            wscale=1.0/(nx1-nx0)
            hscale=1.0/(ny1)
            height=ext.height

        t.set_path_effects([Scale(wscale, scale*hscale)])

        w=0
        h=(height)*scale*hscale

        x, y = fig.transFigure.inverted().transform((w, h))

        yshift += y

ax.set_xticks(np.arange(xoffset, xoffset+pwm.shape[1])+0.5)
ax.set_xticklabels(np.arange(pwm.shape[1])+1)
sns.despine(ax=ax)

Sample output

Dave
  • 5,108
  • 16
  • 30
  • 40
  • I am trying to call this from Julia, so it is very awkward to define a custom type like Scale. Is there a way to bypass defining a Scale class? Any other approach to do the scaling? Thanks! – a06e May 15 '20 at 15:18