将一个矢量拆分成R中的块

``x <- 1:10 n <- 3 chunk <- function(x,n) split(x, factor(sort(rank(x)%%n))) chunk(x,n) \$`0` [1] 1 2 3 \$`1` [1] 4 5 6 7 \$`2` [1] 8 9 10` `

` `split(d, ceiling(seq_along(d)/20))` `

` `> d <- rpois(73,5) > d [1] 3 1 11 4 1 2 3 2 4 10 10 2 7 4 6 6 2 1 1 2 3 8 3 10 7 4 [27] 3 4 4 1 1 7 2 4 6 0 5 7 4 6 8 4 7 12 4 6 8 4 2 7 6 5 [53] 4 5 4 5 5 8 7 7 7 6 2 4 3 3 8 11 6 6 1 8 4 > max <- 20 > x <- seq_along(d) > d1 <- split(d, ceiling(x/max)) > d1 \$`1` [1] 3 1 11 4 1 2 3 2 4 10 10 2 7 4 6 6 2 1 1 2 \$`2` [1] 3 8 3 10 7 4 3 4 4 1 1 7 2 4 6 0 5 7 4 6 \$`3` [1] 8 4 7 12 4 6 8 4 2 7 6 5 4 5 4 5 5 8 7 7 \$`4` [1] 7 6 2 4 3 3 8 11 6 6 1 8 4` `
` `chunk2 <- function(x,n) split(x, cut(seq_along(x), n, labels = FALSE))` `

` `chunk.2 <- function(x, n, force.number.of.groups = TRUE, len = length(x), groups = trunc(len/n), overflow = len%%n) { if(force.number.of.groups) { f1 <- as.character(sort(rep(1:n, groups))) f <- as.character(c(f1, rep(n, overflow))) } else { f1 <- as.character(sort(rep(1:groups, n))) f <- as.character(c(f1, rep("overflow", overflow))) } g <- split(x, f) if(force.number.of.groups) { g.names <- names(g) g.names.ordered <- as.character(sort(as.numeric(g.names))) } else { g.names <- names(g[-length(g)]) g.names.ordered <- as.character(sort(as.numeric(g.names))) g.names.ordered <- c(g.names.ordered, "overflow") } return(g[g.names.ordered]) }` `

` `> x <- 1:10; n <- 3 > chunk.2(x, n, force.number.of.groups = FALSE) \$`1` [1] 1 2 3 \$`2` [1] 4 5 6 \$`3` [1] 7 8 9 \$overflow [1] 10 > chunk.2(x, n, force.number.of.groups = TRUE) \$`1` [1] 1 2 3 \$`2` [1] 4 5 6 \$`3` [1] 7 8 9 10` `

` `set.seed(42) x <- rnorm(1:1e7) n <- 3` `

` `> system.time(chunk(x, n)) # your function user system elapsed 29.500 0.620 30.125 > system.time(chunk.2(x, n, force.number.of.groups = TRUE)) user system elapsed 5.360 0.300 5.663` `

` `library(ggplot2) x <- 1:10 n <- 3 cut_number(x, n) # labels = FALSE if you just want an integer result #> [1] [1,4] [1,4] [1,4] [1,4] (4,7] (4,7] (4,7] (7,10] (7,10] (7,10] #> Levels: [1,4] (4,7] (7,10] # if you want it split into a list: split(x, cut_number(x, n)) #> \$`[1,4]` #> [1] 1 2 3 4 #> #> \$`(4,7]` #> [1] 5 6 7 #> #> \$`(7,10]` #> [1] 8 9 10` `

` `> x <- 1:10 > n <- 3` `

` `> chunk <- function(x, n) split(x, sort(rank(x) %% n)) > chunk(x,n) \$`0` [1] 1 2 3 \$`1` [1] 4 5 6 7 \$`2` [1] 8 9 10` `

` `> my.chunk <- function(x, n) split(x, sort(rep(letters[1:n], each=n, len=length(x)))) > my.chunk(x, n) \$a [1] 1 2 3 4 \$b [1] 5 6 7 \$c [1] 8 9 10` `

` `> my.other.chunk <- function(x, n) split(x, sort(rep(c("tom", "dick", "harry"), each=n, len=length(x)))) > my.other.chunk(x, n) \$dick [1] 1 2 3 \$harry [1] 4 5 6 \$tom [1] 7 8 9 10` `
` `simplified version... n = 3 split(x, sort(x%%n))` `

` `split(x,cut(x,quantile(x,(0:n)/n), include.lowest=TRUE, labels=FALSE))` `

1. 所有的大块都是统一的，除了最后一个;
2. 最后最后会变小，从不比块大小大。
` `chunk <- function(x,n) { f <- sort(rep(1:(trunc(length(x)/n)+1),n))[1:length(x)] return(split(x,f)) } #Test n<-c(1,2,3,4,5,6,7,8,9,10,11) c<-chunk(n,5) q<-lapply(c, function(r) cat(r,sep=",",collapse="|") ) #output 1,2,3,4,5,|6,7,8,9,10,|11,|` `

`split(x,matrix(1:n,n,length(x))[1:length(x)])`

`split(x,rep(1:n, ceiling(length(x)/n),length.out = length(x)))`

` `chunk <- function(x,n){ numOfVectors <- floor(length(x)/n) elementsPerVector <- c(rep(n,numOfVectors-1),n+length(x) %% n) elemDistPerVector <- rep(1:numOfVectors,elementsPerVector) split(x,factor(elemDistPerVector)) } set.seed(1) x <- rnorm(10) n <- 3 chunk(x,n) \$`1` [1] -0.6264538 0.1836433 -0.8356286 \$`2` [1] 1.5952808 0.3295078 -0.8204684 \$`3` [1] 0.4874291 0.7383247 0.5757814 -0.3053884` `

` `chunk <- function(x,y){ split(x, factor(sort(rank(row.names(x))%%y))) }` `

` `chunk <- function(x, n) { if((length(x)%%n)==0) {return(matrix(x, nrow=n))} else {return(matrix(append(x, rep(NA, n-(length(x)%%n))), nrow=n))} }` `

` `chunk <- function(x, n) (mapply(function(a, b) (x[a:b]), seq.int(from=1, to=length(x), by=n), pmin(seq.int(from=1, to=length(x), by=n)+(n-1), length(x)), SIMPLIFY=FALSE))` `

`split()`一样，它会返回一个列表，但是它不会浪费时间或者带有标签的空间，因此可能会更高效。

` `library(data.table) split_dt <- function(x,y) { for(i in seq(from=1,to=nrow(get(x)),by=y)) {df_ <<- get(x)[i:(i + y)]; assign(paste0("df_",i),df_,inherits=TRUE)} rm(df_,inherits=TRUE) }` `