-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcode
More file actions
124 lines (86 loc) · 3.38 KB
/
code
File metadata and controls
124 lines (86 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Importing data and loading packages
birthweight <- rio::import(here::here("Birth_baby_weight.csv"))
if(!require(pacman)) install.packages("pacman")
pacman::p_load( gt,car, dplyr, lessR, ggplot2, magrittr, janitor, plotrix, broom,kableExtra )
# Data manipulation
bweight <- birthweight %>%
mutate(birthwt = bwt_ounce*0.02835,
mheight = mheight_inches*0.0254,
mweight = mweight_pounds*0.4536)
bweight <- bweight %>%
select(Gender,birthwt, gestation_days,mother_age, mweight) %>%
filter(gestation_days > 200 & gestation_days < 350)
head(bweight,5)
any(is.na(bweight))
dim(bweight)
bweight$Gender <- as.factor(bweight$Gender)
levels(bweight$Gender) <- c("male","female")
str(bweight)
# Descriptive statistics of the data
summary(bweight)
table1 <- table(bweight$Gender)
piepercent <-paste0(round(100 * table1/sum(table1)), "%")
plotrix::pie3D(table1,radius = 1.5,
explode = 0.25,
labels = piepercent,
col = c("purple","orange"),
main = "Births according to sex ",
col.main="blue")
legend("topright",
c("males","females"),
cex = 0.8,
fill =c("purple","orange") )
table2 <- table(bweight$smoking_status)
barplot(table2,
main ="Mother's smoking status",
ylab = "Number of mothers",
col.main = "blue",
col.lab = "darkblue",
col = c("lightgreen","red") )
# Correlation and Linear regression analysis
ggplot(data = bweight,
mapping=aes(x=gestation_days,
y=birthwt))+
geom_point(col='green',
+ size=2)+
geom_smooth(method='lm',
+ col= 'blue')+
labs(title="Baby's birth weight Vs Gestation period",
x="Gestation period",
y="Birth Weight")
cor.test(bweight$birthwt,
bweight$gestation_days,method = "pearson")
# fitting a simple linear model
model1 <- lm(bweight$birthwt~bweight$gestation_days)
summary(model1)
hist(residuals(model1))
ggplot(data = bweight,
mapping=aes(x=mweight,
y=birthwt))+
geom_point(col='green',
size=2)+
labs(title="Baby's birth weight Vs mother weight",
x="Mother's weight",
y="Birth Weight")
cor.test(bweight$birthwt,
bweight$mweight,method = "pearson")
model2 <- lm(bweight$birthwt~bweight$mweight)
summary(model2)
plot(bweight$birthwt,residuals(model2))
hist(residuals(model2))
cor.test(bweight$birthwt,
bweight$mweight,method = "pearson")
shapiro.test(bweight$birthwt)
car::leveneTest(bweight$birthwt ~ bweight$Gender)
t.test(data = bweight, birthwt ~ Gender,
alt = "two.sided", var.eq = T)
boxplot(birthwt ~ Gender, data = bweight,
main="BOX PLOT OF BIRTH WEIGHT BY GENDER",
ylab = "Birth weight(kg)")
# Fitting a multiple linear regression model
model3 <- lm(birthwt ~ mweight + gestation_days,
data = bweight)
summary(model3)
model <- lm(birthwt ~ mweight + gestation_days + Gender,
data = bweight)
summary(model)