基于GM(0,N)的时间序列预测R实现
本人新手数据分析师一枚,最近由于工作原因,需要使用灰色预测GM(0,N)模型进行预测分析,但是在网上搜基本没有搜到相关的R代码,只能自己根据灰色预测GM(0,N)理论进行编写,这里主要介绍灰色预测GM(0,N)模型的两种参数估计的R代码实现。
数据说明
预测目标:x7
训练集:traindata
year |
x1 |
x2 |
x3 |
x4 |
x5 |
x6 |
x7 |
1985 |
473424 |
613.8 |
224042 |
102.30 |
19.2 |
19.01 |
11671 |
1986 |
497698 |
616.3 |
224962 |
102.71 |
18.8 |
18.71 |
11942 |
1987 |
495632 |
612.3 |
223502 |
102.06 |
20.5 |
17.84 |
10923 |
1988 |
393917 |
710.9 |
260205 |
92.69 |
24.5 |
14.91 |
10601 |
1989 |
334555 |
712.6 |
260092 |
92.09 |
25.6 |
14.26 |
10158 |
1990 |
365547 |
681.8 |
248838 |
87.07 |
26.1 |
13.99 |
9541 |
1991 |
411201 |
618.4 |
225707 |
95.14 |
25.2 |
14.48 |
8957 |
1992 |
342052 |
589.1 |
215040 |
90.64 |
24.8 |
14.72 |
8673 |
1993 |
304740 |
512.9 |
187204 |
78.91 |
22.0 |
16.61 |
8519 |
1994 |
302116 |
560.8 |
204676 |
86.27 |
22.5 |
16.23 |
9100 |
1995 |
288631 |
576.6 |
210476 |
99.94 |
21.9 |
16.68 |
9644 |
1996 |
232778 |
588.6 |
215434 |
102.01 |
20.6 |
17.74 |
10522 |
传统GM(0,N)模型参数估计R代码实现(累加法)
R实现
# 累加法
gm0n = function(data, factor){
data1 = data[, -1]
m = length(data1)
h = length(factor)
obje = cumsum(factor)
data1 = cumsum(data1)
Y = as.matrix(obje[2:h])
data1 = data1[, -m]
data1$hr = c(rep(1, h))
B = as.matrix(data1[-1,])
uk = solve(t(B)%*%B)%*%t(B)%*%Y
uk2 = matrix(uk, ncol = 1)
yc = c()
ycr = c()
for(r in 1:h){
ak = c()
for(i in 1:(m-1)){
ak[i] = uk2[i]*data1[r, i]
yc[r] = sum(ak)
}
}
ycr = c(ycr, yc)
ycr = ycr + uk2[m]
yc1 = c()
yc1[1] = ycr[1]
for(l in 2:h){
yc1[l] = ycr[l]-ycr[l-1]
}
percent = 100 * abs(yc1 - factor) / factor
precent_mean = mean(percent)
residual = yc1 - factor
fit = NULL
fit$percent = percent
fit$precent_mean = precent_mean
fit$residual = residual
fit$predict = yc1
fit$uk2 = uk2
cat("相对误差 =", percent, '\n', '\n')
cat("残差 =", residual, '\n', '\n')
cat("平均相对误差 =", precent_mean, "%", '\n', '\n')
cat("相对精度 =", 100-precent_mean, "%", '\n', '\n')
cat("预测值 =", yc1, '\n')
a = data$year
plot(yc1, col = 'red', type = 'b', ylim = c(0.6*max(c(yc1, factor)), 1.1*max(c(yc1, factor))), pch = 16, xlab = '时间', ylab = '', xaxt = "n")
points(factor, col = 'blue', type = 'b', pch = 4)
legend('topright',c('预测值', '真实值'), pch = c(16, 4), col = c('red', 'blue'), bty = "n")
axis(1, at = 1:length(a), labels = a)
yc1
fit
}
fit1 = gm0n(traindata, traindata$x7)
预测结果:训练集traindata预测精度99.23%
traindata预测结果:
年份 |
真实值 |
预测值 |
残差 |
误差% |
1985 |
11671 |
12056.87 |
385.87 |
3.31 |
1986 |
11942 |
11561.40 |
-380.60 |
3.19 |
1987 |
10923 |
10901.08 |
-21.92 |
0.20 |
1988 |
10601 |
10624.62 |
23.62 |
0.22 |
1989 |
10158 |
10170.80 |
12.80 |
0.13 |
1990 |
9541 |
9503.07 |
-37.93 |
0.40 |
1991 |
8957 |
8985.87 |
28.87 |
0.32 |
1992 |
8673 |
8650.91 |
-22.09 |
0.25 |
1993 |
8519 |
8530.52 |
11.52 |
0.14 |
1994 |
9100 |
9085.54 |
-14.46 |
0.16 |
1995 |
9644 |
9690.32 |
46.32 |
0.48 |
1996 |
10522 |
10475.69 |
-46.31 |
0.44 |

累乘法参数估计R代码实现
R实现
# 累乘法
gm0n = function(data, factor){
data1 = data[, -1]
data1 = cumsum(data1)
obje = cumsum(factor)
m = length(data1)
h = length(factor)
bkr = c()
for(g in 1:(m-1)){
bk = c()
for(j in 1:m){
ak = c()
for(k in 1:h){
ak[k] = choose(h-k+j-1, j-1)*(data1[, g][k])
bk[j] = sum(ak)
}
}
bkr = c(bkr, bk)
}
Bk = matrix(c(bkr, rep(1, m)), nrow = m, ncol = m, byrow = F)
yk = c()
for(j in 1:m){
ak1 = c()
for(k in 1:h){
ak1[k] = choose(h-k+j-1, j-1)*(obje[k])
yk[j] = sum(ak1)
}
}
Yk = matrix(yk, nrow = m, ncol = 1, byrow = F)
#uk = solve(t(Bk)%*%Bk)%*%t(Bk)%*%Yk
uk = solve(Bk)%*%Yk
uk2 = matrix(uk, ncol = 1)
yc = c()
ycr = c()
for(r in 1:h){
ak = c()
for(i in 1:(m-1)){
ak[i] = uk2[i]*data1[r, i]
yc[r] = sum(ak)
}
}
ycr = c(ycr, yc)
ycr = ycr + uk2[m]
yc1 = c()
yc1[1] = ycr[1]
for(l in 2:h){ #运用后减运算还原得模型输入序列x0预测序列
yc1[l] = ycr[l]-ycr[l-1]
}
percent =-= 100 * abs(yc1 - factor) / factor # 计算相对误差
precent_mean = mean(percent) # 计算平均相对误差
residual = yc1 - factor # 计算残差序列
fit = NULL
fit$percent = percent
fit$precent_mean = precent_mean
fit$residual = residual
fit$predict = yc1
fit$uk2 = uk2
cat("相对误差 =", percent, '\n', '\n')
cat("残差 =", residual, '\n', '\n')
cat("平均相对误差 =", precent_mean, "%", '\n', '\n')
cat("相对精度 =", 100-precent_mean, "%", '\n', '\n')
cat("预测值 =", yc1, '\n')
#画出序列预测值、真实值图像
a = data$year
plot(yc1, col = 'red', type = 'b', main = '预测值与实际值对比',
ylim = c(0.6*max(c(yc1, factor)), 1.1*max(c(yc1, factor))),
pch = 16, xlab = '时间', ylab = '', xaxt = "n")
points(factor, col = 'blue', type = 'b', pch = 4)
legend('topright',c('预测值', '真实值'), pch = c(16, 4), col = c('red', 'blue'), bty = "n")
axis(1, at = 1:length(a), labels = a)
yc1
fit
}
fit1 = gm0n(traindata, traindata$x7)
预测结果:训练集traindata预测精度98.27%
traindata预测结果:
年份 |
真实值 |
预测值 |
残差 |
误差% |
1985 |
11671 |
11417.54 |
-253.46 |
2.17 |
1986 |
11942 |
11794.12 |
-147.88 |
1.24 |
1987 |
10923 |
11170.59 |
247.59 |
2.27 |
1988 |
10601 |
10493.92 |
-107.08 |
1.01 |
1989 |
10158 |
10116.04 |
-41.96 |
0.41 |
1990 |
9541 |
9381.88 |
-159.12 |
1.67 |
1991 |
8957 |
9221.83 |
264.83 |
2.96 |
1992 |
8673 |
8757.02 |
84.02 |
0.97 |
1993 |
8519 |
8282.93 |
-236.07 |
2.77 |
1994 |
9100 |
9000.32 |
-99.68 |
1.10 |
1995 |
9644 |
9956.95 |
312.95 |
3.25 |
1996 |
10522 |
10622.35 |
100.35 |
0.95 |

两个参数估计方法最终的预测结果没有太大的差别,预测精度都达到95%以上,当建立完模型之后,可采用下列函数直接进行预测
# 预测函数
predict = function(data, uk){
data1 = data[, -1]
data1 = cumsum(data1)
yc = c()
ycr = c()
for(r in 1:length(data1[,1])){
ak = c()
for(i in 1:length(data1)){
ak[i] = uk[i]*data1[r, i]
yc[r] = sum(ak)
}
}
ycr = c(ycr, yc)
ycr = ycr + uk[length(uk)]
yc1 = c()
yc1[1] = ycr[1]
for(l in 2:length(data1[, 1])){ #运用后减运算还原得模型输入序列x0预测序列
yc1[l] = ycr[l]-ycr[l-1]
}
yc1
}
testdata = subset(traindata, select = -x7)
fit2 = predict(testdata, fit1$uk2)
由于本人经验有限,如有不足之处请指正,谢谢!
|