These scripts are associated with the following publication: “Classification and Ranking of Fermi LAT Gamma-ray Sources from the 3FGL Catalog Using Machine Learning Techniques”, Saz Parkinson, P. M. (HKU/LSR, SCIPP), Xu, H. (HKU), Yu, P. L. H. (HKU), Salvetti, D. (INAF-Milan), Marelli, M. (INAF-Milan), and Falcone, A. D. (Penn State), The Astrophysical Journal, 2016, in press (http://arxiv.org/abs/1602.00385)
NB: You are welcome to use these scripts for your own purposes, but if you do so, we kindly ask that you cite the above publication.
load(".RData")
library(randomForest)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
set.seed(1)
rf.P <- randomForest(pulsarness ~., data = FGL3_Pulsars_train, importance = TRUE)
print(rf.P)
##
## Call:
## randomForest(formula = pulsarness ~ ., data = FGL3_Pulsars_train, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 10.1%
## Confusion matrix:
## MSP YNG class.error
## MSP 43 4 0.08510638
## YNG 6 46 0.11538462
importance(rf.P)
## MSP YNG MeanDecreaseAccuracy
## GLAT 11.8362611 15.4978565 15.4138510
## Spectral_Index 3.3411886 0.9654453 3.0658757
## Variability_Index -1.0281422 -3.4615356 -3.1882980
## Conf_95_PosAng 0.3733218 -1.4002385 -0.5706251
## Flux_Density 10.5865542 4.4787057 10.1160192
## Unc_Energy_Flux100 26.6749033 25.3375128 30.3358989
## Signif_Curve 8.1572730 4.7320722 8.7531204
## hr12 -3.1783394 -0.4783802 -2.7191745
## hr23 5.7989452 4.5546635 6.6805685
## hr34 1.9491817 3.7728334 3.6962043
## hr45 -1.0843239 4.3592551 2.1444392
## MeanDecreaseGini
## GLAT 8.600107
## Spectral_Index 2.055341
## Variability_Index 1.415482
## Conf_95_PosAng 1.481118
## Flux_Density 3.950428
## Unc_Energy_Flux100 18.073279
## Signif_Curve 3.764994
## hr12 1.242599
## hr23 3.412330
## hr34 2.615104
## hr45 2.240045
varImpPlot(rf.P)
predictions_FGL3_Pulsars_train <- predict(rf.P,
newdata = FGL3_Pulsars_train, type = "Prob")[,2 ]
predictions_FGL3_Pulsars_test <- predict(rf.P,
newdata = FGL3_Pulsars_test, type = "Prob")[,2 ]
Best_threshold_train <- ROC_threshold_plots_tables(FGL3_Pulsars_train$pulsarness,
predictions_FGL3_Pulsars_train,
FGL3_Pulsars_test$pulsarness,
predictions_FGL3_Pulsars_test,
cat1 = "YNG", cat2 = "MSP")
## real_category
## Predict_class_train MSP YNG
## MSP 47 0
## YNG 0 52
## real_category
## Predict_class_test MSP YNG
## MSP 16 2
## YNG 2 23
FGL3_results$RF_PSR_P <- round(predict(rf.P,
newdata = FGL3_tidy, type = "Prob")[,2 ],
digits = 3)
FGL3_results$RF_PSR_Pred <- ifelse(FGL3_results$RF_PSR_P > Best_threshold_train[1],
c("YNG"), c("MSP"))
FGL3_results$RF_PSR_Pred[(FGL3_results$LR_Pred=="AGN")&(FGL3_results$RF_Pred=="AGN")]=""
par(mfrow = c(1,1))
varImpPlot(rf.P, pch = 19, type = 1, main = "")
Environ <- ls()
Environ <- Environ[Environ != "FGL3_tidy"
& Environ != "FGL3_results"
& Environ != "FGL3_test"
& Environ != "FGL3_train"
& Environ != "predictions_FGL3_train_CV"
& Environ != "Block_index"
& Environ != "FGL3_train_CV"
& Environ != "ROC_threshold_plots_tables"
& Environ != "FGL3_Pulsars_train"
& Environ != "FGL3_Pulsars_test"]
rm(list = Environ)
save.image()