library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars
library(ggplot2)
# Question 1
# Import the data in the assignment from Excel.
hospital.data <- read.xlsx("homework1data.xlsx", 1) ; hospital.data
## X7 X3 X5 X3.1 X1 X5.1 X10 X3.2 X4 X4.1
## 1 7 5 8 3 4 1 15 4 5 8
## 2 5 3 2 3 5 9 4 5 6 9
## 3 5 3 6 3 2 6 4 5 5 4
## 4 5 8 4 6 13 4 6 3 2 3
## 5 2 4 6 6 6 8 6 3 4 4
## 6 5 10 4 6 3 9 3 9 4 7
## 7 10 14 4 6 5 10 4 4 9 4
## 8 4 3 6 8 5 7 6 1 3 12
## 9 11 5 2 1 4 4 5 6 4 2
# Flatten the hospital.data list into a vector.
hospital.data <- unlist(hospital.data) ; hospital.data
## X71 X72 X73 X74 X75 X76 X77 X78 X79 X31 X32 X33
## 7 5 5 5 2 5 10 4 11 5 3 3
## X34 X35 X36 X37 X38 X39 X51 X52 X53 X54 X55 X56
## 8 4 10 14 3 5 8 2 6 4 6 4
## X57 X58 X59 X3.11 X3.12 X3.13 X3.14 X3.15 X3.16 X3.17 X3.18 X3.19
## 4 6 2 3 3 3 6 6 6 6 8 1
## X11 X12 X13 X14 X15 X16 X17 X18 X19 X5.11 X5.12 X5.13
## 4 5 2 13 6 3 5 5 4 1 9 6
## X5.14 X5.15 X5.16 X5.17 X5.18 X5.19 X101 X102 X103 X104 X105 X106
## 4 8 9 10 7 4 15 4 4 6 6 3
## X107 X108 X109 X3.21 X3.22 X3.23 X3.24 X3.25 X3.26 X3.27 X3.28 X3.29
## 4 6 5 4 5 5 3 3 9 4 1 6
## X41 X42 X43 X44 X45 X46 X47 X48 X49 X4.11 X4.12 X4.13
## 5 6 5 2 4 4 9 3 4 8 9 4
## X4.14 X4.15 X4.16 X4.17 X4.18 X4.19
## 3 4 7 4 12 2
# Question 2
# Make a histogram of duration of hospital stays using a width of one day using traditional R.
hist(hospital.data, main = "Histogram of Hospital Stays", xlab = "Days", col = "light blue", breaks = c(0:15), xlim = c(0, 16), ylim = c(0, 25))
axis(1, at = seq(0, 15, by = 1))
abline(v = mean(hospital.data), col = "red", lty = 2, lwd = 1.5) # Draw vertical line of mean
abline(v = median(hospital.data), col = "purple", lty = 2, lwd = 1.5) # Draw vertical line of median
# Make a histogram of the same data using ggplot.
hospital.data1 <- data.frame(hospital.data) # Convert data to data frame for use with ggplot
mean(unlist(hospital.data1))
## [1] 5.4
# Plot the data
ggplot(hospital.data1, aes(x = hospital.data1)) +
geom_histogram(binwidth = 5, fill = "light blue", color = "black", breaks = seq(0, 16, by = 1)) +
labs(title = "Histogram of Hospital Stays", x = "Days", y = "Frequency") + ylim(c(0, 25)) +
scale_x_continuous(breaks = seq(0, 16, by = 1)) + theme(plot.title = element_text(hjust = 0.5)) +
geom_vline(xintercept = mean(unlist(hospital.data1)), colour = "red", lty = 2) +
geom_vline(xintercept = median(unlist(hospital.data1)), colour = "purple", lty = 2)
# Question 3
# Display mean, median, and std. dev. of hospital.data.
mean(hospital.data) ; median(hospital.data) ; sd(hospital.data)
## [1] 5.4
## [1] 5
## [1] 2.827633
# Display first and third quartiles, and minimum and maximum of hospital.data.
quantile(hospital.data) # Display all quartiles
## 0% 25% 50% 75% 100%
## 1 4 5 6 15
quantile(hospital.data)[2] # Display first quartile
## 25%
## 4
quantile(hospital.data)[4] # Display third quartile
## 75%
## 6
min(hospital.data) # Display minimum
## [1] 1
max(hospital.data) # Display maximum
## [1] 15
# Question 4
# Find probability of patients in hospital for less than a week.
pnorm(7, mean = 5, sd = 3)
## [1] 0.7475075
# Find probability of patients in hospital for more than seven days using a sample size = 10.
pnorm(7, mean = 5, sd = (3 / sqrt(10)), lower.tail = FALSE)
## [1] 0.01750749
1 - pnorm(7, mean = 5, sd = (3 / sqrt(10)))
## [1] 0.01750749