# 使用聚合在一个调用中对几个变量应用几个函数

``x <- read.table(text = " id1 id2 val1 val2 1 ax 1 9 2 ax 2 4 3 ay 3 5 4 ay 4 9 5 bx 1 7 6 by 4 4 7 bx 3 9 8 by 2 8", header = TRUE)` `

` `# calculate mean aggregate(. ~ id1 + id2, data = x, FUN = mean) # count rows aggregate(. ~ id1 + id2, data = x, FUN = length)` `

` `do.call("rbind", aggregate(. ~ id1 + id2, data = x, FUN = function(x) data.frame(m = mean(x), n = length(x))))` `

` `# mn # id1 1 2 # id2 1 1 # 1.5 2 # 2 2 # 3.5 2 # 3 2 # 6.5 2 # 8 2 # 7 2 # 6 2 # Warning message: # In rbind(id1 = c(1L, 2L, 1L, 2L), id2 = c(1L, 1L, 2L, 2L), val1 = list( : # number of columns of result is not a multiple of vector length (arg 1)` `

` `> aggregate(. ~ id1+id2, data = x, FUN = function(x) c(mn = mean(x), n = length(x) ) ) # id1 id2 val1.mn val1.n val2.mn val2.n # 1 ax 1.5 2.0 6.5 2.0 # 2 bx 2.0 2.0 8.0 2.0 # 3 ay 3.5 2.0 7.0 2.0 # 4 by 3.0 2.0 6.0 2.0` `

` `str( aggregate(. ~ id1+id2, data = x, FUN = function(x) c(mn = mean(x), n = length(x) ) ) ) 'data.frame': 4 obs. of 4 variables: \$ id1 : Factor w/ 2 levels "a","b": 1 2 1 2 \$ id2 : Factor w/ 2 levels "x","y": 1 1 2 2 \$ val1: num [1:4, 1:2] 1.5 2 3.5 3 2 2 2 2 ..- attr(*, "dimnames")=List of 2 .. ..\$ : NULL .. ..\$ : chr "mn" "n" \$ val2: num [1:4, 1:2] 6.5 8 7 6 2 2 2 2 ..- attr(*, "dimnames")=List of 2 .. ..\$ : NULL .. ..\$ : chr "mn" "n"` `

` `str( do.call(data.frame, aggregate(. ~ id1+id2, data = x, FUN = function(x) c(mn = mean(x), n = length(x) ) ) ) ) 'data.frame': 4 obs. of 6 variables: \$ id1 : Factor w/ 2 levels "a","b": 1 2 1 2 \$ id2 : Factor w/ 2 levels "x","y": 1 1 2 2 \$ val1.mn: num 1.5 2 3.5 3 \$ val1.n : num 2 2 2 2 \$ val2.mn: num 6.5 8 7 6 \$ val2.n : num 2 2 2 2` `

` `aggregate(cbind(val1, val2) ~ id1 + id2, data = x, FUN = function(x) c(mn = mean(x), n = length(x) ) )` `

` `> DT id1 id2 val1 val2 1: ax 1 9 2: ax 2 4 3: ay 3 5 4: ay 4 9 5: bx 1 7 6: by 4 4 7: bx 3 9 8: by 2 8 > DT[,.(mean(val1),mean(val2),.N),by=.(id1,id2)] # simplest id1 id2 V1 V2 N 1: ax 1.5 6.5 2 2: ay 3.5 7.0 2 3: bx 2.0 8.0 2 4: by 3.0 6.0 2 > DT[,.(val1.m=mean(val1),val2.m=mean(val2),count=.N),by=.(id1,id2)] # named id1 id2 val1.m val2.m count 1: ax 1.5 6.5 2 2: ay 3.5 7.0 2 3: bx 2.0 8.0 2 4: by 3.0 6.0 2 > DT[,c(lapply(.SD,mean),count=.N),by=.(id1,id2)] # mean over all columns id1 id2 val1 val2 count 1: ax 1.5 6.5 2 2: ay 3.5 7.0 2 3: bx 2.0 8.0 2 4: by 3.0 6.0 2` `

` `x\$count <- 1 agg <- aggregate(. ~ id1 + id2, data = x,FUN = sum) agg # id1 id2 val1 val2 count # 1 ax 3 13 2 # 2 bx 4 16 2 # 3 ay 7 14 2 # 4 by 6 12 2 agg[c("val1", "val2")] <- agg[c("val1", "val2")] / agg\$count agg # id1 id2 val1 val2 count # 1 ax 1.5 6.5 2 # 2 bx 2.0 8.0 2 # 3 ay 3.5 7.0 2 # 4 by 3.0 6.0 2` `

` `x.mean <- aggregate(. ~ id1+id2, p, mean) x.len <- aggregate(. ~ id1+id2, p, length) merge(x.mean, x.len, by = c("id1", "id2")) id1 id2 val1.x val2.x val1.y val2.y 1 ax 1.5 6.5 2 2 2 ay 3.5 7.0 2 2 3 bx 2.0 8.0 2 2 4 by 3.0 6.0 2 2` `

` `x %>% group_by(id1, id2) %>% summarise_each(funs(mean, n()))` `

` ` id1 id2 val1_mean val2_mean val1_n val2_n 1 ax 1.5 6.5 2 2 2 ay 3.5 7.0 2 2 3 bx 2.0 8.0 2 2 4 by 3.0 6.0 2 2` `

` `# inclusion x %>% group_by(id1, id2) %>% summarise_each(funs(mean, n()), val1, val2) # exclusion x %>% group_by(id1, id2) %>% summarise_each(funs(mean, n()), -val2)` `

` `aggregate(cbind(val1, val2) ~ id1 + id2, data = x, FUN = plyr::each(avg = mean, n = length))` `