-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcheck_MCAR.R
More file actions
51 lines (35 loc) · 1.54 KB
/
check_MCAR.R
File metadata and controls
51 lines (35 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
check_MCAR <- function(data, LABEL, nIt = 50 , thrVar = 0.025,
imputed_data= imp) {
library(MissMech)
library(VIM)
library(pracma)
source(file.path('.','RF_on_RF_and_Boruta.R'))
cat('Data Imputation', '\n')
# calcolo la covarianza di ogni variabile e la rimuovo se ha troppa poca variabilita
resCov = round(cov(apply(data,2,as.numeric), use = "pairwise.complete.obs"),3)
# la diagonale mi da la covarianza della variabile
idxDel = which(diag(resCov)<thrVar)
if (length(idxDel)>0){
cat('Removed for too low variability: ', '\n', colnames(data)[idxDel], '\n')
data = data[, -idxDel]
}
for (nc in 1: length(levels(LABEL))){
cat("class ", levels(LABEL)[nc], " has ", length(which(LABEL == levels(LABEL)[nc])),"samples\n")
}
if (any(is.na(data))){
# imputo con missForest in increasing order: mi servono per passarli a check_MCAR perchè
# speddo distFree si impalla
res = tryCatch(TestMCARNormality(data = data, del.lesscases = 1, imputed.data =imputed_data ),
error = function(e){print(e)},
finally = print("data is MCAR"))
}else{
imputed_data = data
}
dataMCAR = data
names(dataMCAR) = colnames(data)
par(pin=c(0,0))
data_aggr = VIM::aggr(dataMCAR, col=c("skyblue", "red", "orange"), numbers=TRUE, sortVars=TRUE,
cex.axis=.7,
ylab=c("Proportion of missingness","Missingness Pattern"))
return(res)
}