Single Iteration Data Analysis

NoteNote:

The data in this example in files named .demo.txt. Your own analysis should omit the .demo.

Admissions Data Overview

Admissions data shows all the admission to the facility in the entire run, as well as whether or not the patient admitted imported the disease.

WarningWarning:

After some iterations of the simulation, the last row of data in this file is not complete. If you see errors when you read in the data, check the .txt file and, if necessary, delete the last record.

# Read admissions.txt and show descriptive info and stats

# Read admissions.txt
admissions_df <- read.table('../data/admissions.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

# Show structure 
cat('Admissions data:')
Admissions data:
head(admissions_df)
   time patientid importation
1 -1.00        75       false
2  0.00        76       false
3  0.15        77       false
4  0.25        78       false
5  0.70        79       false
6  0.96        80       false

Clinical Detection Data Overview

This dataset shows the time and patient id of a detection via the clinical (symptoms-base) route. DetectionCount is a check column. In some edge cases a patient may be detected, treated, recovered and detected again, in which case detection count for that patient would be >1.

#read clinical_detections and give descriptive stats.

clinical_detections_df <- read.table('../data/clinicalDetection.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

#show sample of rows:
head(clinical_detections_df)
   Time DetectedPatientID DetectionCount
1  2.66                83              1
2 24.98               107              1
3 26.09               131              1
4 28.37               116              1
5 29.24               139              1
6 33.95                96              1
#show descriptive stats
summary(clinical_detections_df)
      Time         DetectedPatientID DetectionCount
 Min.   :   2.66   Min.   :   83     Min.   :1     
 1st Qu.:1038.26   1st Qu.: 3795     1st Qu.:1     
 Median :2262.56   Median : 8179     Median :1     
 Mean   :2363.93   Mean   : 8462     Mean   :1     
 3rd Qu.:3457.09   3rd Qu.:12349     3rd Qu.:1     
 Max.   :5464.87   Max.   :19628     Max.   :1     

Daily Population Counts

The output file daily_population_stats.txt shows the total number of patients in the facility on a given day, the number of those who are colonized, detected and isolation. By default, it starts at the end of the burn-in period. There is another sanity check embedded in this file; it should always be the case that detected == isolated. We isolate detected patients, and we don’t isolate anybody else.

# Read daily_population_stats.txt (comma-delimited)
daily_pop_df <- read.table('../data/daily_population_stats.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

# Show structure and summary
cat('Daily population stats:')
Daily population stats:
head(daily_pop_df)
  time total_population colonized detected isolated
1 3651               99        44        7        7
2 3651               99        44        7        7
3 3652              100        43        7        7
4 3652              100        43        7        7
5 3653              102        45        7        7
6 3653              102        45        7        7
cat('\nSummary statistics:')

Summary statistics:
summary(daily_pop_df)
      time      total_population   colonized        detected    
 Min.   :3651   Min.   : 71.0    Min.   :15.00   Min.   : 4.00  
 1st Qu.:4036   1st Qu.: 90.0    1st Qu.:28.00   1st Qu.:14.00  
 Median :4422   Median : 96.0    Median :32.00   Median :18.00  
 Mean   :4422   Mean   : 96.2    Mean   :32.68   Mean   :17.64  
 3rd Qu.:4807   3rd Qu.:102.0    3rd Qu.:38.00   3rd Qu.:21.00  
 Max.   :5193   Max.   :126.0    Max.   :54.00   Max.   :35.00  
    isolated    
 Min.   : 4.00  
 1st Qu.:14.00  
 Median :18.00  
 Mean   :17.64  
 3rd Qu.:21.00  
 Max.   :35.00  

Daily Population Time Series

# Time-series plot: population, colonized, detected vs time
library(ggplot2)

# Assume columns: time, Total Population, Colonized, Detected
ggplot(daily_pop_df, aes(x = time)) +
    geom_line(aes(y = `total_population`, color = 'Population')) +
    geom_line(aes(y = colonized, color = 'Colonized')) +
    geom_line(aes(y = detected, color = 'Detected')) +
    labs(title = 'Daily Population Time Series', x = 'Time', y = 'Count', color = 'Legend') +
    theme_minimal()

Distribution of Daily Population Values

These are the distribution of daily samples of the total population of the sim, the colonized and detected counts.

# Histogram of Total Population
ggplot(daily_pop_df, aes(x = total_population)) +
    geom_histogram(binwidth = 1, fill = 'skyblue', color = 'black') +
    labs(title = 'Histogram of Total Population', x = 'total_population', y = 'Frequency') +
    theme_minimal()

# Histogram of Colonized
ggplot(daily_pop_df, aes(x = colonized)) +
    geom_histogram(binwidth = 1, fill = 'orange', color = 'black') +
    labs(title = 'Histogram of Colonized', x = 'colonized', y = 'Frequency') +
    theme_minimal()

# Histogram of Detected
ggplot(daily_pop_df, aes(x = detected)) +
    geom_histogram(binwidth = 1, fill = 'purple', color = 'black') +
    labs(title = 'Histogram of Detected', x = 'detected', y = 'Frequency') +
    theme_minimal()

Decolonization Events

These represent patients who’s colonization with the organism has ceased.

# Read decolonization.demo.txt (comma-delimited)
decolonization_df <- read.table('../data/decolonization.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

# Show structure and summary
cat('Decolonization events:')
Decolonization events:
head(decolonization_df)
   time decolonized_patient_id
1 14.72                     78
2 23.87                    112
3 28.40                    124
4 30.08                    110
5 30.40                    125
6 45.75                    100
cat('\nSummary statistics:')

Summary statistics:
summary(decolonization_df)
      time         decolonized_patient_id
 Min.   :  14.72   Min.   :   78         
 1st Qu.:1307.01   1st Qu.: 4530         
 Median :2695.80   Median : 9473         
 Mean   :2654.49   Mean   : 9431         
 3rd Qu.:3913.38   3rd Qu.:13925         
 Max.   :5465.38   Max.   :19624         

Detection Verification Events

# Read detection_verification.demo.txt (comma-delimited)
detection_verification_df <- read.table('../data/detection_verification.demo.txt', header=TRUE, sep=',', stringsAsFactors=TRUE)

# Show structure and summary
cat('Detection verification events:')
Detection verification events:
head(detection_verification_df)
   time patient_id   source colonized detection_count
1 10.12         79 CLINICAL      true               1
2 10.99         88 CLINICAL      true               1
3 17.79        130 CLINICAL      true               1
4 26.91        140 CLINICAL      true               1
5 27.87        118 CLINICAL      true               1
6 33.23        137 CLINICAL      true               1
cat('\nSummary statistics:')

Summary statistics:
summary(detection_verification_df)
      time           patient_id             source     colonized  
 Min.   :  10.12   Min.   :   79   CLINICAL    :1266   true:2913  
 1st Qu.:2673.47   1st Qu.: 9480   SURVEILLANCE:1647              
 Median :4056.75   Median :14263                                  
 Mean   :3622.80   Mean   :12767                                  
 3rd Qu.:4791.95   3rd Qu.:16841                                  
 Max.   :5474.36   Max.   :19347                                  
 detection_count 
 Min.   :0.0000  
 1st Qu.:0.0000  
 Median :0.0000  
 Mean   :0.4346  
 3rd Qu.:1.0000  
 Max.   :1.0000  

Surveillance Events

This is every surveillance test run after the end of the burn-in period.

# Read surveillance.demo.txt (comma-delimited)
surveillance_df <- read.table('../data/surveillance.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

# Show structure and summary
cat('Surveillance events:')
Surveillance events:
head(surveillance_df)
     Time Patient Colonized Detected
1 3664.08   13233      true     true
2 3664.24   13234     false    false
3 3664.61   13235      true     true
4 3664.68   13236     false    false
5 3664.94   13237      true     true
6 3664.94   13238     false    false
cat('\nSummary statistics:')

Summary statistics:
summary(surveillance_df)
      Time         Patient       Colonized           Detected        
 Min.   :3664   Min.   :13233   Length:7106        Length:7106       
 1st Qu.:4081   1st Qu.:14701   Class :character   Class :character  
 Median :4481   Median :16148   Mode  :character   Mode  :character  
 Mean   :4491   Mean   :16163                                        
 3rd Qu.:4901   3rd Qu.:17614                                        
 Max.   :5335   Max.   :19150                                        

Patient-to-Patient transmission

# Read transmission.demo.txt (comma-delimited)
transmission_df <- read.table('../data/transmissions.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)
Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
: number of items read is not a multiple of the number of columns
# Show structure and summary
cat('Transmission events:')
Transmission events:
head(transmission_df)
   time from_patientID to_patientID
1  6.45            104           79
2  7.94            110          108
3  8.71            100           76
4  9.12             79           78
5 12.64             80          121
6 13.07            112          109
cat('\nSummary statistics:')

Summary statistics:
summary(transmission_df)
      time         from_patientID   to_patientID  
 Min.   :   6.45   Min.   :   75   Min.   :   75  
 1st Qu.:1140.95   1st Qu.: 4021   1st Qu.: 4028  
 Median :2273.90   Median : 8083   Median : 8114  
 Mean   :2311.97   Mean   : 8157   Mean   : 8173  
 3rd Qu.:3452.97   3rd Qu.:12165   3rd Qu.:12152  
 Max.   :4881.69   Max.   :17098   Max.   :17166  
                                   NA's   :1