# 将R因子自动扩展为每个因子水平的1/0指标variables的集合

``df.original <-data.frame(eggs = c("foo", "foo", "bar", "bar"), ham = c(1,2,3,4))` `

` `df.desired <- data.frame(foo = c(1,1,0,0), bar=c(0,0,1,1), ham=c(1,2,3,4))` `

### 8 Solutions collect form web for “将R因子自动扩展为每个因子水平的1/0指标variables的集合”

` `model.matrix( ~ Species - 1, data=iris )` `

` `R> library(ade4) R> df <-data.frame(eggs = c("foo", "foo", "bar", "bar"), ham = c("red","blue","green","red")) R> acm.disjonctif(df) eggs.bar eggs.foo ham.blue ham.green ham.red 1 0 1 0 0 1 2 0 1 1 0 0 3 1 0 0 1 0 4 1 0 0 0 1` `

` `require(reshape2) > dcast(df.original, ham ~ eggs, length) Using ham as value column: use value_var to override. ham bar foo 1 1 0 1 2 2 0 1 3 3 1 0 4 4 1 0` `

` `> with(df.original, data.frame(model.matrix(~eggs+0), ham)) eggsbar eggsfoo ham 1 0 1 1 2 0 1 2 3 1 0 3 4 1 0 4` `

` `library(nnet) with(df.original, data.frame(class.ind(eggs), ham)) bar foo ham 1 0 1 1 2 0 1 2 3 1 0 3 4 1 0 4` `

` `dummy <- function(df) { NUM <- function(dataframe)dataframe[,sapply(dataframe,is.numeric)] FAC <- function(dataframe)dataframe[,sapply(dataframe,is.factor)] require(ade4) if (is.null(ncol(NUM(df)))) { DF <- data.frame(NUM(df), acm.disjonctif(FAC(df))) names(DF)[1] <- colnames(df)[which(sapply(df, is.numeric))] } else { DF <- data.frame(NUM(df), acm.disjonctif(FAC(df))) } return(DF) }` `

` `df <-data.frame(eggs = c("foo", "foo", "bar", "bar"), ham = c("red","blue","green","red"), x=rnorm(4)) dummy(df) df2 <-data.frame(eggs = c("foo", "foo", "bar", "bar"), ham = c("red","blue","green","red")) dummy(df2)` `

` `# Function to explode factors that are considered to be categorical, # ie, they do not have too many levels. # - data: The data.frame in which categorical variables will be exploded. # - values: The exploded values for the value being unequal and equal to a level. # - max_factor_level_fraction: Maximum number of levels as a fraction of column length. Set to 1 to explode all factors. # Inspired by the acm.disjonctif function in the ade4 package. explode_factors <- function(data, values = c(-0.8, 0.8), max_factor_level_fraction = 0.2) { exploders <- colnames(data)[sapply(data, function(col){ is.factor(col) && nlevels(col) <= max_factor_level_fraction * length(col) })] if (length(exploders) > 0) { exploded <- lapply(exploders, function(exp){ col <- data[, exp] n <- length(col) dummies <- matrix(values[1], n, length(levels(col))) dummies[(1:n) + n * (unclass(col) - 1)] <- values[2] colnames(dummies) <- paste(exp, levels(col), sep = '_') dummies }) # Only keep numeric data. data <- data[sapply(data, is.numeric)] # Add exploded values. data <- cbind(data, exploded) } return(data) }` `

` `df.original <-data.frame(eggs = c("foo", "foo", "bar", "bar"), ham = c(1,2,3,4)) df.original # eggs ham # 1 foo 1 # 2 foo 2 # 3 bar 3 # 4 bar 4 # Create the dummy boolean variables using the model.matrix() function. > mm <- model.matrix(~eggs-1, df.original) > mm # eggsbar eggsfoo # 1 0 1 # 2 0 1 # 3 1 0 # 4 1 0 # attr(,"assign") # [1] 1 1 # attr(,"contrasts") # attr(,"contrasts")\$eggs # [1] "contr.treatment" # Remove the "eggs" prefix from the column names as the OP desired. colnames(mm) <- gsub("eggs","",colnames(mm)) mm # bar foo # 1 0 1 # 2 0 1 # 3 1 0 # 4 1 0 # attr(,"assign") # [1] 1 1 # attr(,"contrasts") # attr(,"contrasts")\$eggs # [1] "contr.treatment" # Combine the matrix back with the original dataframe. result <- cbind(df.original, mm) result # eggs ham bar foo # 1 foo 1 0 1 # 2 foo 2 0 1 # 3 bar 3 1 0 # 4 bar 4 1 0 # At this point, you can select out the columns that you want.` `
• 如何禁用R中的科学记数法？
• 向geom_polygon添加自定义图像填充ggplot
• 如何检查函数调用是否导致警告？
• 为什么apply（）返回一个转置的XTmatrix？
• 在R中快速读取非常大的表格作为数据框
• 错误：ggplot2中的stat_count（）
• 如何手动更改ggplot2中的图例中的键标签
• 具有dynamic数量variables的公式
• 在data.table中过滤掉重复/非唯一的行
• 为什么对于一个整数向量x，as（x，“numeric”）会触发加载一个额外的S4强制方法？
• 在不规则的网格上绘制轮廓