-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathAnalyse first and last measurements
76 lines (63 loc) · 2.79 KB
/
Analyse first and last measurements
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#RUN FILE "Initial data cleaning"
#CHOOSE FIRST WEIGHT CATEGORY RECORDED PER YEAR WHERE THERE ARE MULTIPLE MEASUREMENTS WITHIN A YEAR
#alldata <- alldata[order(AssessmentDate),]
#d<-duplicated(alldata[,c("ChildID","AcademicYear")],fromLast=FALSE)
#table(d)
# (~3% of children) have multiple measurements within a year
#head(alldata[d,],n=10)
#alldata<-alldata[!d,]
#DRAW IN BMI THRESHOLD TO WORK OUT CENTILES FOR OVERWEIGHT, OBESE, SEVERELY OBESE
alldata$weightcat<-1#normal
alldata$weightcat<-ifelse(alldata$Centile_BMI>=90.879,2,alldata$weightcat)#overweight
alldata$weightcat<-ifelse(alldata$Centile_BMI>=97.725,3,alldata$weightcat)#obese
alldata$weightcat<-ifelse(alldata$Centile_BMI<2.275,4,alldata$weightcat)#underweight
alldata$weightcat<-ifelse(alldata$Centile_BMI>=99.617,5,alldata$weightcat)#severe
table(alldata$weightcat)
alldata$NoRecords<-1
attach(alldata)
###################################################################################################
#FOR COMPARING FIRST AND LAST MEASUREMENTS
champ2<-alldata
#Print end BMI (not selecting for first measurement of each year)
#Print end weightcat (not selecting for first measurement of each year)
library(dplyr)
d<-champ2[,c("ChildID","BMI","Centile_BMI","weightcat","AssessmentDate")]
b<-d%>%
group_by(ChildID) %>%
slice(which.min(AssessmentDate)) %>%
data.frame()
colnames(b) <- c("ChildID","startBMI","startCentile","startcat","startDate")
champ2<-merge(champ2,b)
#Print start BMI (not selecting for first measurement of each year)
#Print start weightcat (not selecting for first measurement of each year)
b2<-d%>%
group_by(ChildID) %>%
slice(which.max(AssessmentDate)) %>%
data.frame()
colnames(b2) <- c("ChildID","endBMI","endCentile","endcat","endDate")
champ2<-merge(champ2,b2)
champ2<-champ2[,c(1,6,10,20:27)]
champ2<-unique(champ2)
champ2$span<-difftime(champ2$endDate,champ2$startDate,units="days")
champ2$span<-as.numeric(champ2$span,units="days")
champ2<-champ2[champ2$span>0,]
champ2<-unique(champ2)
length(unique(champ2$ChildID))
#####################################################################################################
champ2$centdiff<-champ2$startCentile-champ2$endCentile
champ2$anncent<-champ2$centdiff/champ2$span*365.25
t.test(champ2$span ~ champ2$IsAccountRegistered, data = champ2)
t.test(champ2$anncent ~ champ2$IsAccountRegistered, data = champ2)
library(data.table)
alldata$VAR1<-1
setDT(alldata)[VAR1==1, count.2:=1:.N, by=ChildID][]
c<-alldata%>%
group_by(ChildID) %>%
slice(which.max(count.2)) %>%
data.frame()
t.test(c$count.2 ~ c$IsAccountRegistered, data = c)
mean(c$count.2[c$IsAccountRegistered==1])
mean(c$count.2[c$IsAccountRegistered==0])
tbl = table(alldata$IsAccountRegistered, alldata$Gender)
chisq.test(tbl)
wilcox.test(multi$days~multi$IsAccountRegistered,data=multi,paired=FALSE,correct=FALSE ,alternative ="greater")