<- here::here("fluanalysis","raw_data","SympAct_Any_Pos.Rda")
data_location
<- readRDS(data_location) rawdata1
wrangling
Load the data
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
Running Code
When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:
Load Data File
Load necessary packages
library(plyr) # for data processing/cleaning
library(dplyr) #for data processing/cleaning
Warning: package 'dplyr' was built under R version 4.2.2
Attaching package: 'dplyr'
The following objects are masked from 'package:plyr':
arrange, count, desc, failwith, id, mutate, rename, summarise,
summarize
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(tidyselect)# for datawrangling
Warning: package 'tidyselect' was built under R version 4.2.2
library(tidymodels)#for modeling
Warning: package 'tidymodels' was built under R version 4.2.2
── Attaching packages ────────────────────────────────────── tidymodels 1.0.0 ──
✔ broom 1.0.1 ✔ rsample 1.1.1
✔ dials 1.1.0 ✔ tibble 3.1.8
✔ ggplot2 3.4.0 ✔ tidyr 1.2.0
✔ infer 1.0.4 ✔ tune 1.0.1
✔ modeldata 1.1.0 ✔ workflows 1.1.3
✔ parsnip 1.0.4 ✔ workflowsets 1.0.0
✔ purrr 0.3.4 ✔ yardstick 1.1.0
✔ recipes 1.0.5
Warning: package 'dials' was built under R version 4.2.2
Warning: package 'ggplot2' was built under R version 4.2.2
Warning: package 'infer' was built under R version 4.2.2
Warning: package 'modeldata' was built under R version 4.2.2
Warning: package 'parsnip' was built under R version 4.2.2
Warning: package 'recipes' was built under R version 4.2.2
Warning: package 'rsample' was built under R version 4.2.2
Warning: package 'tune' was built under R version 4.2.2
Warning: package 'workflows' was built under R version 4.2.2
Warning: package 'workflowsets' was built under R version 4.2.2
Warning: package 'yardstick' was built under R version 4.2.2
── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ dplyr::arrange() masks plyr::arrange()
✖ purrr::compact() masks plyr::compact()
✖ dplyr::count() masks plyr::count()
✖ dplyr::desc() masks plyr::desc()
✖ purrr::discard() masks scales::discard()
✖ dplyr::failwith() masks plyr::failwith()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::id() masks plyr::id()
✖ dplyr::lag() masks stats::lag()
✖ dplyr::mutate() masks plyr::mutate()
✖ dplyr::rename() masks plyr::rename()
✖ recipes::step() masks stats::step()
✖ dplyr::summarise() masks plyr::summarise()
✖ dplyr::summarize() masks plyr::summarize()
• Use tidymodels_prefer() to resolve common conflicts.
library(tidyr) #for data processing/cleaning
library(skimr) #for nice visualization of data
Warning: package 'skimr' was built under R version 4.2.2
library(here) #to set paths
here() starts at C:/Data/GitHub/MADA23/betelihemgetachew-MADA-portfolio2
Attaching package: 'here'
The following object is masked from 'package:plyr':
here
library(gmodels)#to look at the tables
Warning: package 'gmodels' was built under R version 4.2.2
library(ggplot2) #to plot histograms and charts
The echo: false
option disables the printing of code (only output is displayed).
The below head, glimpse and summary functions will give an idea of what the data looks like and what it contains as well as missing values
head(rawdata1)
DxName1
1 Influenza like illness - Clinical Dx
2 Acute tonsillitis, unspecified
3 Influenza like illness - Clinical Dx
4 Influenza like illness - Clinical Dx
5 Acute pharyngitis, unspecified
6 Influenza like illness - Clinical Dx
DxName2 DxName3 DxName4 DxName5
1 <NA> <NA> <NA> <NA>
2 Influenza like illness - Clinical Dx <NA> <NA> <NA>
3 Acute pharyngitis, unspecified <NA> <NA> <NA>
4 Unspecified asthma with (acute) exacerbation <NA> <NA> <NA>
5 Influenza like illness - Clinical Dx <NA> <NA> <NA>
6 <NA> <NA> <NA> <NA>
Unique.Visit ActivityLevel ActivityLevelF SwollenLymphNodes ChestCongestion
1 340_17632125 10 10 Yes No
2 340_17794836 6 6 Yes Yes
3 342_17737773 2 2 Yes Yes
4 342_17806002 2 2 Yes Yes
5 342_17610918 5 5 Yes No
6 343_17543967 3 3 No No
ChillsSweats NasalCongestion CoughYN Sneeze Fatigue SubjectiveFever Headache
1 No No Yes No Yes Yes Yes
2 No Yes Yes No Yes Yes Yes
3 Yes Yes No Yes Yes Yes Yes
4 Yes Yes Yes Yes Yes Yes Yes
5 Yes No No No Yes Yes Yes
6 Yes No Yes Yes Yes Yes Yes
Weakness WeaknessYN CoughIntensity CoughYN2 Myalgia MyalgiaYN RunnyNose
1 Mild Yes Severe Yes Mild Yes No
2 Severe Yes Severe Yes Severe Yes No
3 Severe Yes Mild Yes Severe Yes Yes
4 Severe Yes Moderate Yes Severe Yes Yes
5 Moderate Yes None No Mild Yes No
6 Moderate Yes Moderate Yes Moderate Yes No
AbPain ChestPain Diarrhea EyePn Insomnia ItchyEye Nausea EarPn Hearing
1 No No No No No No No No No
2 No No No No No No No Yes Yes
3 Yes Yes No No Yes No Yes No No
4 No No No No Yes No Yes Yes No
5 No No No Yes Yes No Yes No No
6 No Yes Yes No No No Yes No No
Pharyngitis Breathless ToothPn Vision Vomit Wheeze BodyTemp
1 Yes No No No No No 98.3
2 Yes No No No No No 100.4
3 Yes Yes Yes No No No 100.8
4 Yes No No No No Yes 98.8
5 Yes No No No No No 100.5
6 Yes Yes No No No Yes 98.4
RapidFluA RapidFluB
1 Presumptive Negative For Influenza A Presumptive Negative For Influenza B
2 <NA> <NA>
3 Presumptive Negative For Influenza A Presumptive Negative For Influenza B
4 Presumptive Negative For Influenza A Presumptive Negative For Influenza B
5 <NA> <NA>
6 <NA> <NA>
PCRFluA PCRFluB TransScore1 TransScore1F TransScore2 TransScore2F TransScore3
1 <NA> <NA> 1 1 1 1 1
2 <NA> <NA> 3 3 2 2 1
3 <NA> <NA> 4 4 3 3 2
4 <NA> <NA> 5 5 4 4 3
5 <NA> <NA> 0 0 0 0 0
6 <NA> <NA> 2 2 2 2 2
TransScore3F TransScore4 TransScore4F ImpactScore ImpactScore2 ImpactScore3
1 1 0 0 7 6 3
2 1 2 2 8 7 4
3 2 4 4 14 13 9
4 3 4 4 12 11 7
5 0 0 0 11 10 6
6 2 1 1 12 11 7
ImpactScoreF ImpactScore2F ImpactScore3F ImpactScoreFD TotalSymp1 TotalSymp1F
1 7 6 3 7 8 8
2 8 7 4 8 11 11
3 14 13 9 14 18 18
4 12 11 7 12 17 17
5 11 10 6 11 11 11
6 12 11 7 12 14 14
TotalSymp2 TotalSymp3
1 8 8
2 10 9
3 17 16
4 16 15
5 11 11
6 14 14
glimpse(rawdata1)
Rows: 735
Columns: 63
$ DxName1 <fct> "Influenza like illness - Clinical Dx", "Acute tonsi…
$ DxName2 <fct> NA, "Influenza like illness - Clinical Dx", "Acute p…
$ DxName3 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Fever, unspecified"…
$ DxName4 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Other fatigue", NA,…
$ DxName5 <fct> NA, NA, NA, NA, NA, NA, NA, NA, "Headache", NA, NA, …
$ Unique.Visit <chr> "340_17632125", "340_17794836", "342_17737773", "342…
$ ActivityLevel <int> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, …
$ ActivityLevelF <fct> 10, 6, 2, 2, 5, 3, 4, 0, 0, 5, 9, 1, 3, 6, 5, 2, 2, …
$ SwollenLymphNodes <fct> Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, No, Yes, Y…
$ ChestCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y…
$ ChillsSweats <fct> No, No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, …
$ NasalCongestion <fct> No, Yes, Yes, Yes, No, No, No, Yes, Yes, Yes, Yes, Y…
$ CoughYN <fct> Yes, Yes, No, Yes, No, Yes, Yes, Yes, Yes, Yes, No, …
$ Sneeze <fct> No, No, Yes, Yes, No, Yes, No, Yes, No, No, No, No, …
$ Fatigue <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye…
$ SubjectiveFever <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, Yes…
$ Headache <fct> Yes, Yes, Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes…
$ Weakness <fct> Mild, Severe, Severe, Severe, Moderate, Moderate, Mi…
$ WeaknessYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye…
$ CoughIntensity <fct> Severe, Severe, Mild, Moderate, None, Moderate, Seve…
$ CoughYN2 <fct> Yes, Yes, Yes, Yes, No, Yes, Yes, Yes, Yes, Yes, Yes…
$ Myalgia <fct> Mild, Severe, Severe, Severe, Mild, Moderate, Mild, …
$ MyalgiaYN <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Ye…
$ RunnyNose <fct> No, No, Yes, Yes, No, No, Yes, Yes, Yes, Yes, No, No…
$ AbPain <fct> No, No, Yes, No, No, No, No, No, No, No, Yes, Yes, N…
$ ChestPain <fct> No, No, Yes, No, No, Yes, Yes, No, No, No, No, Yes, …
$ Diarrhea <fct> No, No, No, No, No, Yes, No, No, No, No, No, No, No,…
$ EyePn <fct> No, No, No, No, Yes, No, No, No, No, No, Yes, No, Ye…
$ Insomnia <fct> No, No, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Yes, Y…
$ ItchyEye <fct> No, No, No, No, No, No, No, No, No, No, No, No, Yes,…
$ Nausea <fct> No, No, Yes, Yes, Yes, Yes, No, No, Yes, Yes, Yes, Y…
$ EarPn <fct> No, Yes, No, Yes, No, No, No, No, No, No, No, Yes, Y…
$ Hearing <fct> No, Yes, No, No, No, No, No, No, No, No, No, No, No,…
$ Pharyngitis <fct> Yes, Yes, Yes, Yes, Yes, Yes, Yes, No, No, No, Yes, …
$ Breathless <fct> No, No, Yes, No, No, Yes, No, No, No, Yes, No, Yes, …
$ ToothPn <fct> No, No, Yes, No, No, No, No, No, Yes, No, No, Yes, N…
$ Vision <fct> No, No, No, No, No, No, No, No, No, No, No, No, No, …
$ Vomit <fct> No, No, No, No, No, No, Yes, No, No, No, Yes, Yes, N…
$ Wheeze <fct> No, No, No, Yes, No, Yes, No, No, No, No, No, Yes, N…
$ BodyTemp <dbl> 98.3, 100.4, 100.8, 98.8, 100.5, 98.4, 102.5, 98.4, …
$ RapidFluA <fct> Presumptive Negative For Influenza A, NA, Presumptiv…
$ RapidFluB <fct> Presumptive Negative For Influenza B, NA, Presumptiv…
$ PCRFluA <fct> NA, NA, NA, NA, NA, NA, Influenza A Not Detected, N…
$ PCRFluB <fct> NA, NA, NA, NA, NA, NA, Influenza B Not Detected, N…
$ TransScore1 <dbl> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5…
$ TransScore1F <fct> 1, 3, 4, 5, 0, 2, 2, 5, 4, 4, 2, 3, 2, 5, 3, 5, 1, 5…
$ TransScore2 <dbl> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4…
$ TransScore2F <fct> 1, 2, 3, 4, 0, 2, 2, 4, 3, 3, 1, 2, 2, 4, 2, 4, 1, 4…
$ TransScore3 <dbl> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3…
$ TransScore3F <fct> 1, 1, 2, 3, 0, 2, 2, 3, 2, 2, 0, 1, 1, 3, 1, 3, 1, 3…
$ TransScore4 <dbl> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4…
$ TransScore4F <fct> 0, 2, 4, 4, 0, 1, 1, 4, 3, 3, 2, 2, 2, 4, 3, 4, 0, 4…
$ ImpactScore <int> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9…
$ ImpactScore2 <int> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,…
$ ImpactScore3 <int> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, …
$ ImpactScoreF <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9…
$ ImpactScore2F <fct> 6, 7, 13, 11, 10, 11, 7, 6, 9, 6, 12, 16, 10, 12, 8,…
$ ImpactScore3F <fct> 3, 4, 9, 7, 6, 7, 3, 3, 6, 4, 7, 11, 6, 8, 4, 4, 5, …
$ ImpactScoreFD <fct> 7, 8, 14, 12, 11, 12, 8, 7, 10, 7, 13, 17, 11, 13, 9…
$ TotalSymp1 <dbl> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1…
$ TotalSymp1F <fct> 8, 11, 18, 17, 11, 14, 10, 12, 14, 11, 15, 20, 13, 1…
$ TotalSymp2 <dbl> 8, 10, 17, 16, 11, 14, 10, 11, 13, 10, 14, 19, 13, 1…
$ TotalSymp3 <dbl> 8, 9, 16, 15, 11, 14, 10, 10, 12, 9, 13, 18, 12, 16,…
summary(rawdata1)
DxName1
Influenza like illness - Clinical Dx :328
Influenza - Virus Identified :131
Fever, unspecified :101
Cough : 66
Acute pharyngitis, unspecified : 50
Acute upper respiratory infection, unspecified: 22
(Other) : 37
DxName2
Influenza - Virus Identified :126
Influenza like illness - Clinical Dx:115
Fever, unspecified : 45
Cough : 41
Acute pharyngitis, unspecified : 31
(Other) : 97
NA's :280
DxName3
Influenza - Virus Identified : 23
Influenza like illness - Clinical Dx: 14
Cough : 10
Fever, unspecified : 6
Acute pharyngitis, unspecified : 4
(Other) : 52
NA's :626
DxName4
Influenza - Virus Identified : 3
Acute upper respiratory infection, unspecified: 2
Encounter for immunization : 2
Influenza like illness - Clinical Dx : 2
Acute pharyngitis, unspecified : 1
(Other) : 9
NA's :716
DxName5
Acute suppurative otitis media without spontaneous rupture of ear drum, right ear : 0
Encounter for immunization : 0
Headache : 1
Other infectious mononucleosis without complication : 0
Strain of other flexor muscle, fascia and tendon at forearm level, right arm, subsequent encounter: 0
NA's :734
Unique.Visit ActivityLevel ActivityLevelF SwollenLymphNodes
Length:735 Min. : 0.000 3 :125 No :421
Class :character 1st Qu.: 3.000 5 : 97 Yes:314
Mode :character Median : 4.000 4 : 95
Mean : 4.463 2 : 80
3rd Qu.: 6.000 7 : 68
Max. :10.000 6 : 66
(Other):204
ChestCongestion ChillsSweats NasalCongestion CoughYN Sneeze Fatigue
No :326 No :131 No :170 No : 75 No :340 No : 64
Yes:409 Yes:604 Yes:565 Yes:660 Yes:395 Yes:671
SubjectiveFever Headache Weakness WeaknessYN CoughIntensity CoughYN2
No :230 No :115 None : 49 No : 49 None : 47 No : 47
Yes:505 Yes:620 Mild :224 Yes:686 Mild :156 Yes:688
Moderate:341 Moderate:360
Severe :121 Severe :172
Myalgia MyalgiaYN RunnyNose AbPain ChestPain Diarrhea EyePn
None : 79 No : 79 No :211 No :642 No :501 No :636 No :622
Mild :214 Yes:656 Yes:524 Yes: 93 Yes:234 Yes: 99 Yes:113
Moderate:327
Severe :115
Insomnia ItchyEye Nausea EarPn Hearing Pharyngitis Breathless
No :316 No :553 No :477 No :573 No :705 No :121 No :438
Yes:419 Yes:182 Yes:258 Yes:162 Yes: 30 Yes:614 Yes:297
ToothPn Vision Vomit Wheeze BodyTemp
No :569 No :716 No :656 No :514 Min. : 97.20
Yes:166 Yes: 19 Yes: 79 Yes:221 1st Qu.: 98.20
Median : 98.50
Mean : 98.94
3rd Qu.: 99.30
Max. :103.10
NA's :5
RapidFluA
Positive for Influenza A :169
Presumptive Negative For Influenza A:159
NA's :407
RapidFluB PCRFluA
Positive for Influenza B : 26 Influenza A Detected :120
Presumptive Negative For Influenza B:302 Influenza A Not Detected: 33
NA's :407 Assay Invalid : 0
Indeterminate : 1
NA's :581
PCRFluB TransScore1 TransScore1F TransScore2
Influenza B Detected : 9 Min. :0.000 0: 13 Min. :0.000
Influenza B Not Detected:145 1st Qu.:3.000 1: 53 1st Qu.:2.000
Assay Invalid : 0 Median :4.000 2:107 Median :3.000
NA's :581 Mean :3.473 3:157 Mean :2.917
3rd Qu.:5.000 4:210 3rd Qu.:4.000
Max. :5.000 5:195 Max. :4.000
TransScore2F TransScore3 TransScore3F TransScore4 TransScore4F
0: 13 Min. :0.000 0: 24 Min. :0.000 0: 50
1: 89 1st Qu.:1.000 1:166 1st Qu.:2.000 1:103
2:138 Median :2.000 2:222 Median :3.000 2:154
3:201 Mean :2.148 3:323 Mean :2.576 3:230
4:294 3rd Qu.:3.000 3rd Qu.:4.000 4:198
Max. :3.000 Max. :4.000
ImpactScore ImpactScore2 ImpactScore3 ImpactScoreF ImpactScore2F
Min. : 2.000 Min. : 2.000 Min. : 0.00 8 :105 7 :107
1st Qu.: 8.000 1st Qu.: 7.000 1st Qu.: 3.00 9 :104 8 :102
Median : 9.000 Median : 8.000 Median : 5.00 10 : 88 9 : 90
Mean : 9.514 Mean : 8.581 Mean : 5.06 7 : 84 10 : 86
3rd Qu.:11.000 3rd Qu.:10.000 3rd Qu.: 7.00 11 : 82 6 : 85
Max. :18.000 Max. :17.000 Max. :13.00 12 : 58 11 : 59
(Other):214 (Other):206
ImpactScore3F ImpactScoreFD TotalSymp1 TotalSymp1F TotalSymp2
4 :134 8 :105 Min. : 5.00 12 : 86 Min. : 4.00
5 :112 9 :104 1st Qu.:11.00 13 : 84 1st Qu.:10.00
3 :108 10 : 88 Median :13.00 14 : 80 Median :12.00
6 :102 7 : 84 Mean :12.99 11 : 72 Mean :12.43
7 : 66 11 : 82 3rd Qu.:15.00 10 : 62 3rd Qu.:15.00
2 : 64 12 : 58 Max. :23.00 15 : 61 Max. :22.00
(Other):149 (Other):214 (Other):290
TotalSymp3
Min. : 3.00
1st Qu.:10.00
Median :12.00
Mean :11.66
3rd Qu.:14.00
Max. :21.00
<-as.data.frame(rawdata1) rawdata1
<- rawdata1 %>%
rawdata2 select(-contains('Score')) %>%
select(-contains('Total'))%>%
select(-contains('FluA'))%>%
select(-contains('FluB'))%>%
select(-contains('Dxname'))%>%
select(-contains('Activity'))%>%
select(-c('Unique.Visit'))
#Removing NAs
<-rawdata2[complete.cases(rawdata2),] processed_data
<- here::here("fluanalysis","Processed_data","Processed_data.RDS")
save_data_location saveRDS(processed_data,file=save_data_location)