分布查看
install.packages("pacman")
library(pacman)
class(data$qa003)#查看分类
hist(data$qa003)#绘制分布直方图
summary(data$qa003,na.rm=T)#检查数据分布,移除缺失数据
按照分割点分割数值变量
pacman::p_load(epikit)
data1 <- data %>%
mutate(
dis_cat = age_categories( # create new column
qa003, # numeric column to make groups from
breakers = c(0, 5, 10, 15, 20, # break points
30, 40, 50, 60, 70,100,140)))#设定分割点
data1<-data%>%mutate(
dis_cat=age_categories(
qa003,
lower = 0,
upper = 150,
by=50
)#间隔50
table(data1$qa003,data1$dis_cat,useNA = 'always')#检查分组是否正确
按照分位数分割数值变量
quantile(data$qa003,probs = c(0,.25,.5,.75,.90),na.rm = T)#自定义分位数分割
#按照每隔10%分割
install.packages('janitor')
library(janitor)
data1<-data%>%mutate(
qa003cut=cut(qa003,
breaks=quantile(
qa003,
probs=seq(0,1,by=0.1),
na.rm=T),
include.lowest=T))%>%
janitor::tabyl(qa003cut)#data1按照交叉表展示
每组平均分(每组样本量相等)
data1<-data%>%mutate(
enveng=ntile(qa003,10))#平均分为10组
dattab<-data1%>%
janitor::tabyl(enveng)
ranges<-data1%>%
group_by(enveng)%>%
summarise(
min=min(qa003,na.rm = T),
max=max(qa003,na.rm = T)
)#每组最大值和最小值
left_join(dattab,ranges,by='enveng')#将不同组联合在一起,按照平均分组变量‘enveng’
参考文献
https://epirhandbook.com/en/cleaning-data-and-core-functions.html
转自:“科研写作成长记”微信公众号
如有侵权,请联系本站删除!