These scripts are associated with the following publication: “Classification and Ranking of Fermi LAT Gamma-ray Sources from the 3FGL Catalog Using Machine Learning Techniques”, Saz Parkinson, P. M. (HKU/LSR, SCIPP), Xu, H. (HKU), Yu, P. L. H. (HKU), Salvetti, D. (INAF-Milan), Marelli, M. (INAF-Milan), and Falcone, A. D. (Penn State), The Astrophysical Journal, 2016, in press (http://arxiv.org/abs/1602.00385)
NB: You are welcome to use these scripts for your own purposes, but if you do so, we kindly ask that you cite the above publication.
load(".RData")
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
set.seed(1)
k <- 10
for (i in 1:k) {
Model <- randomForest(pulsarness~., data = FGL3_train[-Block_index[i, ], ], importance = TRUE)
predictions_FGL3_train_CV[((i-1)*(nrow(FGL3_train_CV)/k) + 1):((i)*(nrow(FGL3_train_CV)/k))] <-
predict(Model, newdata = FGL3_train[Block_index[i, ], ], type = "Prob")[, 2]
print(i)
}
## [1] 1
## [1] 2
## [1] 3
## [1] 4
## [1] 5
## [1] 6
## [1] 7
## [1] 8
## [1] 9
## [1] 10
Best_threshold_train_CV <- ROC_threshold_plots_tables(FGL3_train_CV$pulsarness, predictions_FGL3_train_CV)
rf.full.FGL3 <- randomForest(pulsarness~., data = FGL3_train, importance = TRUE)
# The importance of each variable
importance(rf.full.FGL3)
## Non-Pulsar Pulsar MeanDecreaseAccuracy
## Spectral_Index 25.56036 17.632979 29.46206
## Variability_Index 31.64328 18.641224 33.66110
## Flux_Density 13.86492 7.538865 14.86319
## Unc_Energy_Flux100 32.74000 23.883328 36.91021
## Signif_Curve 33.10258 29.794806 40.35179
## hr12 10.67109 5.647244 12.24037
## hr23 19.39184 10.596104 21.76137
## hr34 9.85729 9.927413 12.63419
## hr45 19.86705 21.374217 24.13753
## MeanDecreaseGini
## Spectral_Index 41.850994
## Variability_Index 20.837394
## Flux_Density 9.430220
## Unc_Energy_Flux100 28.467787
## Signif_Curve 63.025025
## hr12 9.666265
## hr23 13.250555
## hr34 6.010382
## hr45 19.210694
varImpPlot(rf.full.FGL3)
predictions_FGL3_train <- predict(rf.full.FGL3, newdata = FGL3_train, type = "Prob")[, 2]
predictions_FGL3_test <- predict(rf.full.FGL3, newdata = FGL3_test, type = "Prob")[,2 ]
ROC_threshold_plots_tables(FGL3_train$pulsarness, predictions_FGL3_train, FGL3_test$pulsarness, predictions_FGL3_test,
threshold = Best_threshold_train_CV[1])
## real_category
## Predict_class_train Non-Pulsar Pulsar
## AGN 1215 0
## Pulsar 2 116
## real_category
## Predict_class_test Non-Pulsar Pulsar
## AGN 504 2
## Pulsar 17 48
## [1] 0.172
cat("Best threshold from cross-validation:", Best_threshold_train_CV)
## Best threshold from cross-validation: 0.172 0.9695222 0.9310345
FGL3_results$RF_P <- predict(rf.full.FGL3, newdata = FGL3_tidy, type = "Prob")[,2 ]
FGL3_results$RF_Pred <- ifelse(FGL3_results$RF_P > Best_threshold_train_CV[1],
c("PSR"), c("AGN"))
par(mfrow = c(1,1))
varImpPlot(rf.full.FGL3, pch = 19, type = 1, main = "")
Environ <- ls()
Environ <- Environ[Environ != "FGL3_tidy"
& Environ != "FGL3_results"
& Environ != "FGL3_test"
& Environ != "FGL3_train"
& Environ != "predictions_FGL3_train_CV"
& Environ != "Block_index"
& Environ != "FGL3_train_CV"
& Environ != "ROC_threshold_plots_tables"
& Environ != "FGL3_Pulsars_train"
& Environ != "FGL3_Pulsars_test"
& Environ != "rf.full.FGL3"]
rm(list = Environ)
save.image()