These scripts are associated with the following publication: “Classification and Ranking of Fermi LAT Gamma-ray Sources from the 3FGL Catalog Using Machine Learning Techniques”, Saz Parkinson, P. M. (HKU/LSR, SCIPP), Xu, H. (HKU), Yu, P. L. H. (HKU), Salvetti, D. (INAF-Milan), Marelli, M. (INAF-Milan), and Falcone, A. D. (Penn State), The Astrophysical Journal, 2016, in press (http://arxiv.org/abs/1602.00385)
NB: You are welcome to use these scripts for your own purposes, but if you do so, we kindly ask that you cite the above publication.
load(".RData")
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
null <- glm(pulsarness ~1, family = binomial, data = FGL3_train)
glm.step.backward.AIC <- step(glm(pulsarness ~., family = binomial, data = FGL3_train),
scope = list(lower = null, upper = glm(pulsarness ~.,
family = binomial, data = FGL3_train)),
direction = "backward")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Start: AIC=147.46
## pulsarness ~ Spectral_Index + Variability_Index + Flux_Density +
## Unc_Energy_Flux100 + Signif_Curve + hr12 + hr23 + hr34 +
## hr45
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## - hr12 1 128.34 146.34
## <none> 127.46 147.46
## - hr23 1 130.94 148.94
## - Signif_Curve 1 131.61 149.61
## - Flux_Density 1 136.41 154.41
## - hr34 1 138.76 156.76
## - Spectral_Index 1 143.38 161.38
## - hr45 1 156.84 174.84
## - Unc_Energy_Flux100 1 173.80 191.80
## - Variability_Index 1 223.21 241.21
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##
## Step: AIC=146.34
## pulsarness ~ Spectral_Index + Variability_Index + Flux_Density +
## Unc_Energy_Flux100 + Signif_Curve + hr23 + hr34 + hr45
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Df Deviance AIC
## <none> 128.34 146.34
## - hr23 1 130.95 146.95
## - Signif_Curve 1 134.10 150.10
## - Flux_Density 1 136.45 152.45
## - hr34 1 140.34 156.34
## - Spectral_Index 1 148.98 164.98
## - hr45 1 158.06 174.06
## - Unc_Energy_Flux100 1 173.90 189.90
## - Variability_Index 1 223.37 239.37
print(summary(glm.step.backward.AIC))
##
## Call:
## glm(formula = pulsarness ~ Spectral_Index + Variability_Index +
## Flux_Density + Unc_Energy_Flux100 + Signif_Curve + hr23 +
## hr34 + hr45, family = binomial, data = FGL3_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0784 -0.0647 -0.0190 -0.0043 4.0956
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 151.7880 23.5877 6.435 1.23e-10 ***
## Spectral_Index -5.8615 1.8620 -3.148 0.00164 **
## Variability_Index -5.5473 0.9253 -5.995 2.03e-09 ***
## Flux_Density 0.7541 0.2726 2.766 0.00567 **
## Unc_Energy_Flux100 3.7512 0.6978 5.376 7.64e-08 ***
## Signif_Curve 1.2275 0.5569 2.204 0.02750 *
## hr23 1.7846 1.1325 1.576 0.11506
## hr34 -4.2981 1.3213 -3.253 0.00114 **
## hr45 -4.3291 0.9729 -4.450 8.59e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 788.05 on 1332 degrees of freedom
## Residual deviance: 128.34 on 1324 degrees of freedom
## AIC: 146.34
##
## Number of Fisher Scoring iterations: 9
predictions_FGL3_train <- predict(glm.step.backward.AIC, FGL3_train, type = "response")
predictions_FGL3_test <- predict(glm.step.backward.AIC, FGL3_test, type = "response")
Best_threshold_train <- ROC_threshold_plots_tables(FGL3_train$pulsarness,
predictions_FGL3_train,
FGL3_test$pulsarness,
predictions_FGL3_test)
## real_category
## Predict_class_train Non-Pulsar Pulsar
## AGN 1167 4
## Pulsar 50 112
## real_category
## Predict_class_test Non-Pulsar Pulsar
## AGN 492 1
## Pulsar 29 49
FGL3_results$LR_P <- round(predict(glm.step.backward.AIC, FGL3_tidy,
type = "response"), digits = 3)
FGL3_results$LR_Pred <- ifelse(FGL3_results$LR_P > Best_threshold_train[1],
"PSR", "AGN")
Environ <- ls()
Environ <- Environ[Environ != "FGL3_tidy"
& Environ != "FGL3_results"
& Environ != "FGL3_test"
& Environ != "FGL3_train"
& Environ != "predictions_FGL3_train_CV"
& Environ != "Block_index"
& Environ != "FGL3_train_CV"
& Environ != "ROC_threshold_plots_tables"
& Environ != "FGL3_Pulsars_train"
& Environ != "FGL3_Pulsars_test"]
rm(list = Environ)
save.image()