Single Iteration Data Analysis

NoteNote:

The data in this example in files named .demo.txt. Your own analysis should omit the .demo.

Admissions Data Overview

Admissions data shows all the admission to the facility in the entire run, as well as whether or not the patient admitted imported the disease.

WarningWarning:

After some iterations of the simulation, the last row of data in this file is not complete. If you see errors when you read in the data, check the .txt file and, if necessary, delete the last record.

# Read admissions.txt and show descriptive info and stats

# Read admissions.txt
admissions_df <- read.table('../data/admissions.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

# Show structure 
cat('Admissions data:')
Admissions data:
head(admissions_df)
   time patientid importation
1 -1.00        75       false
2  0.00        76       false
3  0.05        77       false
4  0.06        78       false
5  0.20        79       false
6  1.46        80       false

Clinical Detection Data Overview

This dataset shows the time and patient id of a detection via the clinical (symptoms-base) route. DetectionCount is a check column. In some edge cases a patient may be detected, treated, recovered and detected again, in which case detection count for that patient would be >1.

#read clinical_detections and give descriptive stats.

clinical_detections_df <- read.table('../data/clinicalDetection.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

#show sample of rows:
head(clinical_detections_df)
   Time DetectedPatientID DetectionCount
1 26.83               151              1
2 30.60               107              1
3 30.89               137              1
4 31.02               111              1
5 33.95               108              1
6 44.13               198              1
#show descriptive stats
summary(clinical_detections_df)
      Time         DetectedPatientID DetectionCount
 Min.   :  26.83   Min.   :  107     Min.   :1     
 1st Qu.:1388.93   1st Qu.: 4909     1st Qu.:1     
 Median :2651.58   Median : 9331     Median :1     
 Mean   :2695.46   Mean   : 9516     Mean   :1     
 3rd Qu.:4014.90   3rd Qu.:14249     3rd Qu.:1     
 Max.   :5467.43   Max.   :19151     Max.   :1     

Daily Population Counts

The output file daily_population_stats.txt shows the total number of patients in the facility on a given day, the number of those who are colonized, detected and isolation. By default, it starts at the end of the burn-in period. There is another sanity check embedded in this file; it should always be the case that detected == isolated. We isolate detected patients, and we don’t isolate anybody else.

# Read daily_population_stats.txt (comma-delimited)
daily_pop_df <- read.table('../data/daily_population_stats.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

# Show structure and summary
cat('Daily population stats:')
Daily population stats:
head(daily_pop_df)
  Time total_population colonized detected isolated
1 3651               96        44        4        4
2 3651               96        44        4        4
3 3652               93        45        6        6
4 3652               93        45        6        6
5 3653               90        43        6        6
6 3653               90        43        6        6
cat('\nSummary statistics:')

Summary statistics:
summary(daily_pop_df)
      Time      total_population   colonized        detected     
 Min.   :3651   Min.   : 67.00   Min.   :20.00   Min.   : 0.000  
 1st Qu.:4107   1st Qu.: 87.00   1st Qu.:33.00   1st Qu.: 3.000  
 Median :4563   Median : 92.00   Median :38.00   Median : 5.000  
 Mean   :4563   Mean   : 92.63   Mean   :37.67   Mean   : 5.101  
 3rd Qu.:5019   3rd Qu.: 99.00   3rd Qu.:42.00   3rd Qu.: 7.000  
 Max.   :5475   Max.   :128.00   Max.   :56.00   Max.   :11.000  
    isolated     
 Min.   : 0.000  
 1st Qu.: 3.000  
 Median : 5.000  
 Mean   : 5.101  
 3rd Qu.: 7.000  
 Max.   :11.000  

Daily Population Time Series

# Time-series plot: population, colonized, detected vs time
library(ggplot2)

# Assume columns: Time, total_population, colonized, detected
ggplot(daily_pop_df, aes(x = Time)) +
    geom_line(aes(y = `total_population`, color = 'Population')) +
    geom_line(aes(y = colonized, color = 'Colonized')) +
    geom_line(aes(y = detected, color = 'Detected')) +
    labs(title = 'Daily Population Time Series', x = 'Time', y = 'Count', color = 'Legend') +
    theme_minimal()

Distribution of Daily Population Values

These are the distribution of daily samples of the total population of the sim, the colonized and detected counts.

# Histogram of Total Population
ggplot(daily_pop_df, aes(x = total_population)) +
    geom_histogram(binwidth = 1, fill = 'skyblue', color = 'black') +
    labs(title = 'Histogram of Total Population', x = 'total_population', y = 'Frequency') +
    theme_minimal()

# Histogram of Colonized
ggplot(daily_pop_df, aes(x = colonized)) +
    geom_histogram(binwidth = 1, fill = 'orange', color = 'black') +
    labs(title = 'Histogram of Colonized', x = 'colonized', y = 'Frequency') +
    theme_minimal()

# Histogram of Detected
ggplot(daily_pop_df, aes(x = detected)) +
    geom_histogram(binwidth = 1, fill = 'purple', color = 'black') +
    labs(title = 'Histogram of Detected', x = 'detected', y = 'Frequency') +
    theme_minimal()

Decolonization Events

These represent patients who’s colonization with the organism has ceased.

# Read decolonization.demo.txt (comma-delimited)
decolonization_df <- read.table('../data/decolonization.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)

# Show structure and summary
cat('Decolonization events:')
Decolonization events:
head(decolonization_df)
   time decolonized_patient_id
1 23.89                    159
2 28.70                     96
3 30.87                     75
4 39.25                    114
5 48.79                    183
6 57.77                    194
cat('\nSummary statistics:')

Summary statistics:
summary(decolonization_df)
      time         decolonized_patient_id
 Min.   :  23.89   Min.   :   75         
 1st Qu.:1475.74   1st Qu.: 5145         
 Median :2817.95   Median : 9883         
 Mean   :2775.86   Mean   : 9751         
 3rd Qu.:4091.78   3rd Qu.:14463         
 Max.   :5470.71   Max.   :19207         

Detection Verification Events

# Read detection_verification.demo.txt (comma-delimited)
detection_verification_df <- read.table('../data/detection_verification.demo.txt', header=TRUE, sep=',', stringsAsFactors=TRUE)

# Show structure and summary
cat('Detection verification events:')
Detection verification events:
head(detection_verification_df)
   time patient_id   source colonized detection_count
1 26.83        151 CLINICAL      true               1
2 30.60        107 CLINICAL      true               1
3 30.89        137 CLINICAL      true               1
4 31.02        111 CLINICAL      true               1
5 33.95        108 CLINICAL      true               1
6 44.13        198 CLINICAL      true               1
cat('\nSummary statistics:')

Summary statistics:
summary(detection_verification_df)
      time           patient_id         source     colonized   detection_count
 Min.   :  26.83   Min.   :  107   CLINICAL:1541   true:1541   Min.   :1      
 1st Qu.:1388.93   1st Qu.: 4909                               1st Qu.:1      
 Median :2651.58   Median : 9331                               Median :1      
 Mean   :2695.46   Mean   : 9516                               Mean   :1      
 3rd Qu.:4014.90   3rd Qu.:14249                               3rd Qu.:1      
 Max.   :5467.43   Max.   :19151                               Max.   :1      

Surveillance Events

This is every surveillance test run after the end of the burn-in period.

# Read surveillance.demo.txt (comma-delimited)
# Check if file exists and has data
if (file.size('../data/surveillance.demo.txt') > 0) {
  surveillance_df <- read.table('../data/surveillance.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)
  
  # Show structure and summary
  cat('Surveillance events:')
  head(surveillance_df)
  cat('\nSummary statistics:')
  summary(surveillance_df)
} else {
  cat('No surveillance events in this dataset.\n')
}
No surveillance events in this dataset.

Patient-to-Patient transmission

# Read transmission.demo.txt (comma-delimited)
transmission_df <- read.table('../data/transmissions.demo.txt', header=TRUE, sep=',', stringsAsFactors=FALSE)
Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
: number of items read is not a multiple of the number of columns
# Show structure and summary
cat('Transmission events:')
Transmission events:
head(transmission_df)
   time from_patientID to_patientID
1  6.96            100           92
2  7.71             99           75
3  9.26            110          108
4  9.77             90           89
5 10.34            100          107
6 12.39             92           76
cat('\nSummary statistics:')

Summary statistics:
summary(transmission_df)
      time         from_patientID   to_patientID  
 Min.   :   6.96   Min.   :   90   Min.   :   75  
 1st Qu.:1310.46   1st Qu.: 4580   1st Qu.: 4641  
 Median :2582.28   Median : 9087   Median : 9117  
 Mean   :2539.28   Mean   : 8972   Mean   : 8992  
 3rd Qu.:3792.70   3rd Qu.:13426   3rd Qu.:13456  
 Max.   :5069.72   Max.   :17848   Max.   :17878  
                   NA's   :1       NA's   :1