Describing & Interpreting Data

library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars
library(ggplot2)
# Question 1
# Import the data in the assignment from Excel.
hospital.data <- read.xlsx("homework1data.xlsx", 1) ; hospital.data
##   X7 X3 X5 X3.1 X1 X5.1 X10 X3.2 X4 X4.1
## 1  7  5  8    3  4    1  15    4  5    8
## 2  5  3  2    3  5    9   4    5  6    9
## 3  5  3  6    3  2    6   4    5  5    4
## 4  5  8  4    6 13    4   6    3  2    3
## 5  2  4  6    6  6    8   6    3  4    4
## 6  5 10  4    6  3    9   3    9  4    7
## 7 10 14  4    6  5   10   4    4  9    4
## 8  4  3  6    8  5    7   6    1  3   12
## 9 11  5  2    1  4    4   5    6  4    2
# Flatten the hospital.data list into a vector.
hospital.data <- unlist(hospital.data) ; hospital.data
##   X71   X72   X73   X74   X75   X76   X77   X78   X79   X31   X32   X33 
##     7     5     5     5     2     5    10     4    11     5     3     3 
##   X34   X35   X36   X37   X38   X39   X51   X52   X53   X54   X55   X56 
##     8     4    10    14     3     5     8     2     6     4     6     4 
##   X57   X58   X59 X3.11 X3.12 X3.13 X3.14 X3.15 X3.16 X3.17 X3.18 X3.19 
##     4     6     2     3     3     3     6     6     6     6     8     1 
##   X11   X12   X13   X14   X15   X16   X17   X18   X19 X5.11 X5.12 X5.13 
##     4     5     2    13     6     3     5     5     4     1     9     6 
## X5.14 X5.15 X5.16 X5.17 X5.18 X5.19  X101  X102  X103  X104  X105  X106 
##     4     8     9    10     7     4    15     4     4     6     6     3 
##  X107  X108  X109 X3.21 X3.22 X3.23 X3.24 X3.25 X3.26 X3.27 X3.28 X3.29 
##     4     6     5     4     5     5     3     3     9     4     1     6 
##   X41   X42   X43   X44   X45   X46   X47   X48   X49 X4.11 X4.12 X4.13 
##     5     6     5     2     4     4     9     3     4     8     9     4 
## X4.14 X4.15 X4.16 X4.17 X4.18 X4.19 
##     3     4     7     4    12     2
# Question 2
# Make a histogram of duration of hospital stays using a width of one day using traditional R.
hist(hospital.data, main = "Histogram of Hospital Stays", xlab = "Days", col = "light blue", breaks = c(0:15), xlim = c(0, 16), ylim = c(0, 25))
axis(1, at = seq(0, 15, by = 1))
abline(v = mean(hospital.data), col = "red", lty = 2, lwd = 1.5)     # Draw vertical line of mean
abline(v = median(hospital.data), col = "purple", lty = 2, lwd = 1.5)     # Draw vertical line of median

# Make a histogram of the same data using ggplot.
hospital.data1 <- data.frame(hospital.data)     # Convert data to data frame for use with ggplot
mean(unlist(hospital.data1))
## [1] 5.4
# Plot the data
ggplot(hospital.data1, aes(x = hospital.data1)) + 
  geom_histogram(binwidth = 5, fill = "light blue", color = "black", breaks = seq(0, 16, by = 1)) +
  labs(title = "Histogram of Hospital Stays", x = "Days", y = "Frequency") + ylim(c(0, 25)) +
  scale_x_continuous(breaks = seq(0, 16, by = 1)) + theme(plot.title = element_text(hjust = 0.5)) + 
  geom_vline(xintercept = mean(unlist(hospital.data1)), colour = "red", lty = 2) + 
  geom_vline(xintercept = median(unlist(hospital.data1)), colour = "purple", lty = 2)

# Question 3
# Display mean, median, and std. dev. of hospital.data.
mean(hospital.data) ; median(hospital.data) ; sd(hospital.data)
## [1] 5.4
## [1] 5
## [1] 2.827633
# Display first and third quartiles, and minimum and maximum of hospital.data.
quantile(hospital.data)     # Display all quartiles
##   0%  25%  50%  75% 100% 
##    1    4    5    6   15
quantile(hospital.data)[2]     # Display first quartile
## 25% 
##   4
quantile(hospital.data)[4]     # Display third quartile
## 75% 
##   6
min(hospital.data)     # Display minimum
## [1] 1
max(hospital.data)     # Display maximum
## [1] 15
# Question 4
# Find probability of patients in hospital for less than a week.
pnorm(7, mean = 5, sd = 3)
## [1] 0.7475075
# Find probability of patients in hospital for more than seven days using a sample size = 10.
pnorm(7, mean = 5, sd = (3 / sqrt(10)), lower.tail = FALSE)
## [1] 0.01750749
1 - pnorm(7, mean = 5, sd = (3 / sqrt(10)))
## [1] 0.01750749