oplaTech About Teaching Archive
Oplatek's external memory

Vim rocks - publish code on Web

How to publish your source code using Vim? Open your source code

:TOhtml
and hit enter. It produces html source code of source code. I copied it and pasted here. Pretty nice, right? I guess gist is to complicated;) This is just copy paste! ~/school/dotplot/dotplot.py.html

#!/usr/bin/env python
# Author: Ondrej Platek 2012; contact: ondrej.platek at seznam.cz
# Code is provided as is with no warranty. Use it on your own risk.
# Use the source code according GPL3 and distribute the code also with this 3 line header
import numpy as np
#import matplotlib.pyplot as plt
#import matplotlib as mpl
#mpl.use('Agg')
#import pylab
#import matplotlib.cm as cm
from PIL import Image

def dotplot(s1,s2,win_size,threshold,outfile=None,outorigfile=None):
print 'computing dotplot base matrix'
m=dotplotArr(s1,s2)
print 'plotting'
plotMatplot(m,outorigfile)
print 'computing dotplot slide window matrix'
wm=dotplotSlideWinArr(m,win_size, threshold)
print 'plotting'
plotMatplot(wm,outfile)
return (m,wm)

def dotplotArr(s1,s2):
''' Quite dense function dotplot computes 2D array m
with size |s1|*|s2| where m[i,j]=1 if s1[i]==s2[j] (intended for strings s1, s2).
For understanding the function, see how the diagonal is computed with diagind index.
On diagonal is 1 if s1[i]==s2[i]. All 1s on diagonal => s1==s2.
Return 2D array of Booleans stored in one Byte.
'''
if len(s1)>len(s2):
s1,s2=s2,s1
# |s1| <= |s2|
m=np.zeros((len(s1),len(s2)),dtype=np.bool)
for i in range(len(s2)):
shift_s2=s2[i:]+s2[:i]
for j in range(len(s1)):
if shift_s2[j]==s1[j]:
diagind=np.mod(i+j,len(s1))
m[diagind,j]=True
return m

def dotplotSlideWinArr(m,win_size,threshold):
''' Complexity is O(|m|)=O(|w|*|h|) assuming cumsum takes O(|m|).
Return 2D array of numpy.uint16'
'''
h,w=m.shape
assert win_size<=h and win_size<=w, 'Window is bigger than the basic dotplot array!'
assert win_size<256, 'win_size*win_size has to be less that max value of numpy.uint16!'
wn=np.zeros( (h-win_size+1,w-win_size+1), dtype=np.uint16)
h_wn,w_wn=wn.shape

# 2 more arrays of size m
cumsum_w=np.zeros((h,w),dtype=np.uint64)
cumsum_h=np.zeros((h,w),dtype=np.uint64)
np.cumsum(m,axis=1,dtype=np.uint64,out=cumsum_w)
np.cumsum(m,axis=0,dtype=np.uint64,out=cumsum_h)

# computing wn[0,0]
for i in range(win_size):
wn[0,0]+=cumsum_w[i,win_size-1]
# computing first row wn[0,:]
for j in range(1,w_wn):
wn[0,j]=wn[0,j-1]-cumsum_h[win_size-1,j-1]+cumsum_h[win_size-1,j-1+win_size]
# computing first column wn[:,0]
for i in range(1,h_wn):
wn[i,0]=wn[i-1,0]-cumsum_w[i-1,win_size-1]+cumsum_w[i-1+win_size,win_size-1]
# computing the rest
for i in range(1,h_wn):
for j in range(1,w_wn):
col_before=cumsum_h[i+win_size-1,j-1]-cumsum_h[i-1,j-1]
col_after=cumsum_h[i+win_size-1,j-1+win_size]-cumsum_h[i-1,j-1+win_size]
wn[i,j]=wn[i,j-1]-col_before+col_after

# substracting threshold -> interested in values that pass the threshold (otherwise 0)
for i in range(h_wn):
for j in range(w_wn):
wn[i,j]=max(0,wn[i,j]-threshold)

return wn

#def plotMatplot(m,name=None):
# minm=np.min(m)
# maxm=np.max(m)
# fig=plt.figure()
# ax=fig.add_subplot(111)
# cax=ax.imshow(m,interpolation='nearest',cmap=cm.gist_gray)
# cbar = fig.colorbar(cax, ticks=[minm,maxm])
# cbar.ax.set_yticklabels([str(minm),str(maxm)])
# if name==None:
# plt.show()
# print 'Figure drawned.'
# else:
# plt.savefig(name)
# print 'Figure %s saved ' % name

def display_PIL(m,name,win_size=None):
if win_size==None:
maxm=np.max(m)
else:
maxm=win_size*win_size
if maxm > 255:
k=maxm/255
i=Image.fromarray(np.uint8(m/k))
else:
k=255/maxm
i=Image.fromarray(np.uint8(m*k))
if name==None:
i.show()
else:
i.save(name)
print 'Figure %s saved ' % name
del i # force GC


if __name__=='__main__':
print 'No tests'