R 语言 特征提取 变量筛选 好用的包和语句

论坛 期权论坛 脚本     
匿名技术用户   2020-12-27 09:40   11   0

一个是Boruta, 一个是carat

双重降维(Boruta包和caret包)

#correlation of texture
setwd("C:/Users/Administrator/Desktop/mission/correlation")
library(readxl)
T2WI<- read_excel("Texture features and eGFR.xlsx",
sheet = "T2WI")
SWI<- read_excel("Texture features and eGFR.xlsx",
sheet = "SWI")
BOLD<- read_excel("Texture features and eGFR.xlsx",
sheet = "BOLD")
blood<- read_excel("Texture features and eGFR.xlsx",
sheet = "blood")

##step 1
#T2WI
T2WI_selected<-c()
T2WI_feature<-T2WI[,-c(1,2)]
for (feature in names(T2WI_feature)){
result_temp <- kruskal.test(T2WI[[feature]]~T2WI$Group)
if (result_temp$p.value<0.05) {
T2WI_selected<-c(T2WI_selected,feature)
}
}
#22
#SWI
SWI_selected<-c()
SWI_feature<-SWI[,-c(1,2)]
for (feature in names(SWI_feature)){
result_temp <- kruskal.test(SWI[[feature]]~SWI$Group)
if (result_temp$p.value<0.05) {
SWI_selected<-c(SWI_selected,feature)
}
}
#7
#BOLD
BOLD_selected<-c()
BOLD_feature<-BOLD[,-c(1,2)]
for (feature in names(BOLD_feature)){
result_temp <- kruskal.test(BOLD[[feature]]~BOLD$Group)
print(result_temp$p.value)
if (result_temp$p.value<0.05) {
BOLD_selected<-c(BOLD_selected,feature)
}
}
#94

#step2 skip for now
#分别计算T2WI、SWI、BOLD各纹理特征与eGFR的相关性,剔除与eGFR相关性r绝对值< 0.3的特征
target1<-T2WI[T2WI_selected]
target2<-SWI[SWI_selected]
target3<-BOLD[BOLD_selected]

#<>0.3
sum(cor(x = cbind(target1,target2,target3),y = blood$eGFR)<0.3)
length(cor(x = cbind(target1,target2,target3),y = blood$eGFR))

sum(cor(x = target1,y = blood$eGFR)>0.3)
sum(cor(x = target2,y = blood$eGFR)>0.3)
sum(cor(x = target3,y = blood$eGFR)>0.3)


####step 3
####selecting the important variables for group variable of egfr
library(Boruta)

#for T2WI
data1<-cbind(T2WI[c(2)],target1)
set.seed(888)
Boruta.train<-Boruta(Group~.,data=data1,maxRuns=10000,doTrace=2)
#Nonsense attributes should be rejected
print(Boruta.train)
plot(Boruta.train, xlab = "", xaxt = "n")
lz<-lapply(1:ncol(Boruta.train$ImpHistory),function(i) Boruta.train$ImpHistory[is.finite(Boruta.train$ImpHistory[,i]),i])
names(lz) <- colnames(Boruta.train$ImpHistory)
Labels <- sort(sapply(lz,median))
axis(side = 1,las=2,labels = names(Labels),at = 1:ncol(Boruta.train$ImpHistory), cex.axis = 0.7)

final.boruta <- TentativeRoughFix(Boruta.train)
print(final.boruta)
plot(final.boruta, xlab = "", xaxt = "n")
lz<-lapply(1:ncol(final.boruta$ImpHistory),function(i) final.boruta$ImpHistory[is.finite(final.boruta$ImpHistory[,i]),i])
names(lz) <- colnames(final.boruta$ImpHistory)
Labels <- sort(sapply(lz,median))
axis(side = 1,las=2,labels = names(Labels),at = 1:ncol(final.boruta$ImpHistory), cex.axis = 0.7)

data1_select1<-getSelectedAttributes(final.boruta, withTentative = F)

#for SWI
data2<-cbind(SWI[c(2)],target2)
set.seed(999)
Boruta2.train2<-Boruta(Group~.,data=data2,maxRuns=10000,doTrace=2)
#Nonsense attributes should be rejected
print(Boruta2.train2)
plot(Boruta2.train2, xlab = "", xaxt = "n")
lz<-lapply(1:ncol(Boruta2.train2$ImpHistory),function(i) Boruta2.train2$ImpHistory[is.finite(Boruta2.train2$ImpHistory[,i]),i])
names(lz) <- colnames(Boruta2.train2$ImpHistory)
Labels <- sort(sapply(lz,median))
axis(side = 1,las=2,labels = names(Labels),at = 1:ncol(Boruta2.train2$ImpHistory), cex.axis = 0.7)

final.Boruta2 <- TentativeRoughFix(Boruta2.train2)
print(final.Boruta2)
plot(final.Boruta2, xlab = "", xaxt = "n")
lz<-lapply(1:ncol(final.Boruta2$ImpHistory),function(i) final.Boruta2$ImpHistory[is.finite(final.Boruta2$ImpHistory[,i]),i])
names(lz) <- colnames(final.Boruta2$ImpHistory)
Labels <- sort(sapply(lz,median))
axis(side = 1,las=2,labels = names(Labels),at = 1:ncol(final.Boruta2$ImpHistory), cex.axis = 0.7)

data2_select2<-getSelectedAttributes(final.Boruta2, withTentative = F)


#for BOLD
data3<-cbind(BOLD[c(2)],target3)
set.seed(999)
Boruta3.train3<-Boruta(Group~.,data=data3,maxRuns=10000,doTrace=2)
#Nonsense attributes should be rejected
print(Boruta3.train3)
plot(Boruta3.train3, xlab = "", xaxt = "n")
lz<-lapply(1:ncol(Boruta3.train3$ImpHistory),function(i) Boruta3.train3$ImpHistory[is.finite(Boruta3.train3$ImpHistory[,i]),i])
names(lz) <- colnames(Boruta3.train3$ImpHistory)
Labels <- sort(sapply(lz,median))
axis(side = 1,las=2,labels = names(Labels),at = 1:ncol(Boruta3.train3$ImpHistory), cex.axis = 0.7)

final.Boruta3 <- TentativeRoughFix(Boruta3.train3)
print(final.Boruta3)
plot(final.Boruta3, xlab = "", xaxt = "n")
lz<-lapply(1:ncol(final.Boruta3$ImpHistory),function(i) final.Boruta3$ImpHistory[is.finite(final.Boruta3$ImpHistory[,i]),i])
names(lz) <- colnames(final.Boruta3$ImpHistory)
Labels <- sort(sapply(lz,median))
axis(side = 1,las=2,labels = names(Labels),at = 1:ncol(final.Boruta3$ImpHistory), cex.axis = 0.7)

data3_select3<-getSelectedAttributes(final.Boruta3, withTentative = F)

data1$data1_select1


分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:7942463
帖子:1588486
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP