These scripts are associated with the following publication: “Classification and Ranking of Fermi LAT Gamma-ray Sources from the 3FGL Catalog Using Machine Learning Techniques”, Saz Parkinson, P. M. (HKU/LSR, SCIPP), Xu, H. (HKU), Yu, P. L. H. (HKU), Salvetti, D. (INAF-Milan), Marelli, M. (INAF-Milan), and Falcone, A. D. (Penn State), The Astrophysical Journal, 2016, in press (http://arxiv.org/abs/1602.00385)

NB: You are welcome to use these scripts for your own purposes, but if you do so, we kindly ask that you cite the above publication.

AGN vs PSR classification using Logistic Regression (LR) model (backward stepwise selection)

Load workspace from previous step

load(".RData")

Load pROC package

library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
null <- glm(pulsarness ~1, family = binomial, data = FGL3_train)
glm.step.backward.AIC <- step(glm(pulsarness ~., family = binomial, data = FGL3_train),
                              scope = list(lower = null, upper = glm(pulsarness ~., 
                              family = binomial, data = FGL3_train)),
                              direction = "backward")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Start:  AIC=147.46
## pulsarness ~ Spectral_Index + Variability_Index + Flux_Density + 
##     Unc_Energy_Flux100 + Signif_Curve + hr12 + hr23 + hr34 + 
##     hr45
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                      Df Deviance    AIC
## - hr12                1   128.34 146.34
## <none>                    127.46 147.46
## - hr23                1   130.94 148.94
## - Signif_Curve        1   131.61 149.61
## - Flux_Density        1   136.41 154.41
## - hr34                1   138.76 156.76
## - Spectral_Index      1   143.38 161.38
## - hr45                1   156.84 174.84
## - Unc_Energy_Flux100  1   173.80 191.80
## - Variability_Index   1   223.21 241.21
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## 
## Step:  AIC=146.34
## pulsarness ~ Spectral_Index + Variability_Index + Flux_Density + 
##     Unc_Energy_Flux100 + Signif_Curve + hr23 + hr34 + hr45
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
##                      Df Deviance    AIC
## <none>                    128.34 146.34
## - hr23                1   130.95 146.95
## - Signif_Curve        1   134.10 150.10
## - Flux_Density        1   136.45 152.45
## - hr34                1   140.34 156.34
## - Spectral_Index      1   148.98 164.98
## - hr45                1   158.06 174.06
## - Unc_Energy_Flux100  1   173.90 189.90
## - Variability_Index   1   223.37 239.37
print(summary(glm.step.backward.AIC))
## 
## Call:
## glm(formula = pulsarness ~ Spectral_Index + Variability_Index + 
##     Flux_Density + Unc_Energy_Flux100 + Signif_Curve + hr23 + 
##     hr34 + hr45, family = binomial, data = FGL3_train)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0784  -0.0647  -0.0190  -0.0043   4.0956  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        151.7880    23.5877   6.435 1.23e-10 ***
## Spectral_Index      -5.8615     1.8620  -3.148  0.00164 ** 
## Variability_Index   -5.5473     0.9253  -5.995 2.03e-09 ***
## Flux_Density         0.7541     0.2726   2.766  0.00567 ** 
## Unc_Energy_Flux100   3.7512     0.6978   5.376 7.64e-08 ***
## Signif_Curve         1.2275     0.5569   2.204  0.02750 *  
## hr23                 1.7846     1.1325   1.576  0.11506    
## hr34                -4.2981     1.3213  -3.253  0.00114 ** 
## hr45                -4.3291     0.9729  -4.450 8.59e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 788.05  on 1332  degrees of freedom
## Residual deviance: 128.34  on 1324  degrees of freedom
## AIC: 146.34
## 
## Number of Fisher Scoring iterations: 9
predictions_FGL3_train <- predict(glm.step.backward.AIC, FGL3_train, type = "response")
predictions_FGL3_test <- predict(glm.step.backward.AIC, FGL3_test, type = "response")

Get Training Set threshold, generate ROC plots, and print contingency tables

Best_threshold_train <- ROC_threshold_plots_tables(FGL3_train$pulsarness, 
                                                   predictions_FGL3_train, 
                                                   FGL3_test$pulsarness, 
                                                   predictions_FGL3_test)

##                    real_category
## Predict_class_train Non-Pulsar Pulsar
##              AGN          1167      4
##              Pulsar         50    112
##                   real_category
## Predict_class_test Non-Pulsar Pulsar
##             AGN           492      1
##             Pulsar         29     49

Add LR Prediction Probabilities to FGL3_results data frame:

FGL3_results$LR_P <- round(predict(glm.step.backward.AIC, FGL3_tidy,
                                   type = "response"), digits = 3)

Add LR Prediction category to FGL3_results data frame:

FGL3_results$LR_Pred <- ifelse(FGL3_results$LR_P > Best_threshold_train[1],
                               "PSR", "AGN")

Clean up and Set aside required data sets

Environ <- ls()
Environ <- Environ[Environ != "FGL3_tidy"
                   & Environ != "FGL3_results"
                   & Environ != "FGL3_test" 
                   & Environ != "FGL3_train" 
                   & Environ != "predictions_FGL3_train_CV" 
                   & Environ != "Block_index" 
                   & Environ != "FGL3_train_CV"
                   & Environ != "ROC_threshold_plots_tables"
                   & Environ != "FGL3_Pulsars_train"
                   & Environ != "FGL3_Pulsars_test"]
rm(list = Environ)

Save current workspace for subsequent steps

save.image()