永远相信美好的事情终将发生

Python 中进行 lowess 平滑

  模拟得到的数据,有时会存在较大的局部波动,导致画出来的曲线不够平滑,此时可以通过 lowess 平滑进行处理。 lowess(locally weighted scatterplot smoothing,局部加权回归散点平滑法)的主要思想是提取一定比例的局部数据,在这部分局部数据中拟合多项式回归曲线,从而使曲线更加平滑。
  下面记录lowess平滑的不同实现方法:

Python

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from __future__ import division
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

x=np.linspace(0,3.14*3,100)
y=np.sin(x) + np.random.normal(loc=0.0,scale=0.1,size=len(x))

# statsmodels.api
import statsmodels.api as sm
lowess=sm.nonparametric.lowess
y_sm=lowess(y,x,frac=0.1)
plt.plot(x,y,lw=1,color='gray',label='y')
plt.plot(y_sm[:,0],y_sm[:,1],lw=1,color='g',label='sm')


# Python seaborn.lmplot()
import seaborn as sns
d=np.hstack((x.reshape(-1,1),y.reshape(-1,1)))
df=DataFrame(d,columns=['xdata','ydata'])
sns.lmplot(x='xdata', y='ydata', data=df,lowess=True) # 实际是调用 statsmodels,且使用默认参数 frac=0.667


# 自定义函数 np.convolve()
def smooth(y, box_pts):
box = np.ones(box_pts)/box_pts
y_smooth = np.convolve(y, box, mode='same')
return y_smooth

y_np3=smooth(y,3) # 点的个数
y_np67=smooth(y,67)
plt.plot(x,y_np3,color='cyan',lw=1)
plt.plot(x,y_np67,color='orange',lw=1)


# Savitzky-Golay filter 平滑
from scipy.signal import savgol_filter
y_sg=savgol_filter(y, 21, 3) # window size 21, polynomial order 3
plt.plot(x,y_sg,color='g',lw=1)


# 周期性信号 傅里叶变换
import scipy.fftpack

N=len(x)
w = scipy.fftpack.rfft(y)
f = scipy.fftpack.rfftfreq(N, x[1]-x[0])
spectrum = w**2

cutoff_idx = spectrum < (spectrum.max()/5)
w2 = w.copy()
w2[cutoff_idx] = 0
y_fft = scipy.fftpack.irfft(w2)
plt.plot(x,y_fft,color='g',lw=1)


# 样条插值
import numpy as np
from scipy.interpolate import splev, splrep
xnew=x
spl=splrep(x,y,k=3) # 3次
y_spl=splev(xnew,spl)
plt.plot(xnew,y_spl,color='g',lw=1)


# Rbf 插值
from scipy.interpolate import Rbf
xnew=x
rbf=Rbf(x,y)
y_rbf=rbf(xnew)
plt.plot(xnew,y_rbf,color='g',lw=1)


# use fitpack2 method
from scipy.interpolate import Rbf, InterpolatedUnivariateSpline
xnew=x
ius = InterpolatedUnivariateSpline(x, y)
y_ius=rbf(xnew)
plt.plot(xnew,y_ius,color='g',lw=1)


# KernelReg
from statsmodels.nonparametric.kernel_regression import KernelReg

xnew=x
# The third parameter specifies the type of the variable x;
# 'c' stands for continuous;
# 'u' stands for discrete(unordered)
kr = KernelReg(y,x,'c')
y_kr= kr.fit(xnew)[0]
plt.plot(xnew,y_kr,color='g',lw=1)

######################
# plot
fig,ax=plt.subplots(2,5,figsize=(18,9))
ax=ax.flatten()
for a in ax:
a.set_xticklabels([])
a.set_yticklabels([])
a.plot(x,sin(x),lw=1,color='gray')
#a.plot(x,y,lw=1,color='gray')
a.set_ylim(-1.1,1.1)

ax[0].plot(x,y_sm[:,1],color='g',lw=1) # statsmodels lowess
ax[1].plot(x,y_np3,color='g',lw=1) # 自定义函数
ax[2].plot(x,y_np67,color='g',lw=1) # 自定义函数
ax[3].plot(x,y_sg,color='g',lw=1) # savgol_filter
ax[4].plot(x,y_fft,color='g',lw=1) # fft
ax[5].plot(x,y_spl,color='g',lw=1) # bspline
ax[6].plot(x,y_rbf,color='g',lw=1) # Rbf
ax[7].plot(x,y_ius,color='g',lw=1) # ius
ax[8].plot(x,y_kr,color='g',lw=1) # KernelReg
plt.tight_layout()

另一个例子

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def smooth(x,window_len=11,window='hanning'):
"""smooth the data using a window with requested size.

This method is based on the convolution of a scaled window with the signal.
The signal is prepared by introducing reflected copies of the signal
(with the window size) in both ends so that transient parts are minimized
in the begining and end part of the output signal.

input:
x: the input signal
window_len: the dimension of the smoothing window; should be an odd integer
window: the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
flat window will produce a moving average smoothing.

output:
the smoothed signal

example:

t=linspace(-2,2,0.1)
x=sin(t)+randn(len(t))*0.1
y=smooth(x)

see also:

numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
scipy.signal.lfilter

TODO: the window parameter could be the window itself if an array instead of a string
NOTE: length(output) != length(input), to correct this: return y[(window_len/2-1):-(window_len/2)] instead of just y.
"""

if x.ndim != 1:
raise ValueError("smooth only accepts 1 dimension arrays.")

if x.size < window_len:
raise ValueError("Input vector needs to be bigger than window size.")

if window_len<3:
return x

if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")

s=np.r_[2*x[0]-x[window_len:1:-1], x, 2*x[-1]-x[-1:-window_len:-1]]
#print(len(s))
if window == 'flat': #moving average
w=np.ones(window_len,'d')
else:
w=getattr(np, window)(window_len)

y = np.convolve(w/w.sum(), s, mode='same')
return y[window_len-1:-window_len+1]

#########
import numpy as np

t=np.linspace(-4,4,100)
x=np.sin(t)
xn=x+np.random.randn(len(t))*0.1
y=smooth(x)

ws=31
windows=['flat', 'hanning', 'hamming', 'bartlett', 'blackman']

subplot(211)
plot(ones(ws))

for w in windows[1:]:
eval('plot(%s(%d) )'%(w,ws))

plt.axis([0,30,0,1.1])
plt.legend(windows)
plt.title("The smoothing windows")

subplot(212)
plot(x)
plot(xn)
for w in windows:
plot(smooth(xn,10,w))
l=['original signal', 'signal with noise'] + windows
plt.legend(l)
plt.title("Smoothing a noisy signal")

2d example

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import numpy as np
from scipy.interpolate import Rbf

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import cm

x = np.random.rand(100)*4.0-2.0
y = np.random.rand(100)*4.0-2.0
z = x*np.exp(-x**2-y**2)
ti = np.linspace(-2.0, 2.0, 100)
XI, YI = np.meshgrid(ti, ti)

# use RBF
rbf = Rbf(x, y, z, epsilon=2)
ZI = rbf(XI, YI)

# plot the result
plt.pcolor(XI, YI, ZI, cmap='jet')
plt.scatter(x, y, 100, z, cmap='jet')

2d example 2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from scipy import signal

def gauss_kern(size, sizey=None):
""" Returns a normalized 2D gauss kernel array for convolutions """
size = int(size)
if not sizey:
sizey = size
else:
sizey = int(sizey)
x, y = np.mgrid[-size:size+1, -sizey:sizey+1]
g = np.exp(-(x**2/float(size) + y**2/float(sizey)))
return g / g.sum()

def blur_image(im, n, ny=None) :
""" blurs the image by convolving with a gaussian kernel of typical
size n. The optional keyword argument ny allows for a different
size in the y direction.
"""
g = gauss_kern(n, sizey=ny)
improc = signal.convolve(im, g, mode='valid')
return(improc)

# part 2: 2d
X, Y = np.mgrid[-70:70, -70:70]
Z = np.cos((X**2+Y**2)/200.)+ np.random.normal(size=X.shape)
Z2 = blur_image(Z, 3)
plt.figure()
plt.imshow(Z)
plt.figure()
plt.imshow(Z2)
plt.show()

R 中的实现

1
lowess(x,y)

Refer: