-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathInitial data cleaning
34 lines (29 loc) · 1.41 KB
/
Initial data cleaning
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
setwd("c:/Users/mqbpjhr4/Documents")
alldata<-read.table("LMStableedited.csv",header=TRUE, sep=",")
length(alldata$ChildID)
alldata<-subset(alldata,!is.na(alldata$BMI),)
length(alldata$ChildID) #44 records removed
alldata$ChildID<-as.factor(alldata$ChildID)
alldata<-alldata[floor(alldata$ageattest/52.12)<12&floor(alldata$ageattest/52.12)>3,]
levels(alldata$ChildID) #63339 children
alldata$PostcodeDecile<-ceiling(alldata$PostcodeDecile)
alldata2<-alldata[order(alldata$ChildID,alldata$AssessmentDate),]
names(alldata2)
d<-duplicated(alldata2[,c(1,4)])
head(alldata2[d,],n=10)
head(alldata2,n=20)
alldata2<-alldata2[!d,]
length(alldata$ChildID)-length(alldata2$ChildID) #134 duplicate entries removed
alldata<-alldata2
library(lubridate)
alldata$DateOfBirth<-(as.Date(paste(alldata$DateOfBirth,"-01",sep="")))
alldata$AssessmentDate<-as.Date(paste(alldata$AssessmentDate,"-01",sep=""))
alldata$assyr<-year(as.POSIXlt(alldata$AssessmentDate, format="%Y-%m-%d"))
attach(alldata)
alldata$ChildID<-as.factor(alldata$ChildID)
alldata$SchoolCode<-as.factor(alldata$SchoolCode)
alldata$IsAccountRegistered<-as.factor(alldata$IsAccountRegistered)
alldata$ageattestyr<-difftime(alldata$AssessmentDate,alldata$DateOfBirth,unit="weeks")/52.12
alldata$ageattestyr<-as.integer(floor(alldata$ageattestyr))
alldata<-alldata[!alldata$AcademicYear=="2011/2012"&!alldata$AcademicYear=="2012/2013",]
alldata$AcademicYear <- droplevels(alldata$AcademicYear)