Skip to contents

Combine several data frames by using id variables to match rows

Usage

CbindIdMatch(
  ...,
  addName = names(x),
  sep = "_",
  idNames = sapply(x, function(x) names(x)[1]),
  idNames1 = idNames,
  addLast = FALSE
)

Arguments

...

Several data frames as several input parameters or a list of data frames

addName

NULL or vector of strings used to name columns according to origin frame

sep

A character string to separate when addName apply

idNames

Names of a id variable within each data frame

idNames1

Names of variables in first data frame that correspond to the id variable within each data frame

addLast

When TRUE addName will be at end

Value

A single data frame

Details

The first data frame is the basis and the other frames will be matched by using id-variables. The default id-variables are the first variable in each frame. Corresponding variables with the same name in first frame is assumed. An id-variable is not needed if the number of rows is one or the same as the first frame. Then the element of idNames can be set to a string with zero length.

See also

RbindAll (same example data)

Author

Øyvind Langsrud

Examples

zA <- data.frame(idA = 1:10, idB = rep(10 * (1:5), 2), idC = rep(c(100, 200), 5), 
                 idC2 = c(100, rep(200, 9)), idC3 = rep(100, 10), 
                 idD = 99, x = round(rnorm(10), 3), xA = round(runif(10), 2))
zB <- data.frame(idB = 10 * (1:5), x = round(rnorm(5), 3), xB = round(runif(5), 2))
zC <- data.frame(idC = c(100, 200), x = round(rnorm(2), 3), xC = round(runif(2), 2))
zD <- data.frame(idD = 99, x = round(rnorm(1), 3), xD = round(runif(1), 2))
CbindIdMatch(zA, zB, zC, zD)
#> Warning: Column names not unique
#>    idA idB idC idC2 idC3 idD      x   xA      x   xB     x   xC      x   xD
#> 1    1  10 100  100  100  99 -1.400 0.29 -1.863 0.68 0.724 0.39 -0.481 0.53
#> 2    2  20 200  200  100  99  0.255 0.68 -0.522 0.50 2.354 0.46 -0.481 0.53
#> 3    3  30 100  200  100  99 -2.437 0.74 -0.053 0.64 0.724 0.39 -0.481 0.53
#> 4    4  40 200  200  100  99 -0.006 0.20  0.543 0.66 2.354 0.46 -0.481 0.53
#> 5    5  50 100  200  100  99  0.622 0.98 -0.914 0.10 0.724 0.39 -0.481 0.53
#> 6    6  10 200  200  100  99  1.148 0.74 -1.863 0.68 2.354 0.46 -0.481 0.53
#> 7    7  20 100  200  100  99 -1.822 0.05 -0.522 0.50 0.724 0.39 -0.481 0.53
#> 8    8  30 200  200  100  99 -0.247 0.53 -0.053 0.64 2.354 0.46 -0.481 0.53
#> 9    9  40 100  200  100  99 -0.244 0.70  0.543 0.66 0.724 0.39 -0.481 0.53
#> 10  10  50 200  200  100  99 -0.283 0.69 -0.914 0.10 2.354 0.46 -0.481 0.53
CbindIdMatch(a = zA, b = zB, c = zC, d = zD, idNames = c("", "idB", "idC", ""))
#>    a_idA a_idB a_idC a_idC2 a_idC3 a_idD    a_x a_xA    b_x b_xB   c_x c_xC
#> 1      1    10   100    100    100    99 -1.400 0.29 -1.863 0.68 0.724 0.39
#> 2      2    20   200    200    100    99  0.255 0.68 -0.522 0.50 2.354 0.46
#> 3      3    30   100    200    100    99 -2.437 0.74 -0.053 0.64 0.724 0.39
#> 4      4    40   200    200    100    99 -0.006 0.20  0.543 0.66 2.354 0.46
#> 5      5    50   100    200    100    99  0.622 0.98 -0.914 0.10 0.724 0.39
#> 6      6    10   200    200    100    99  1.148 0.74 -1.863 0.68 2.354 0.46
#> 7      7    20   100    200    100    99 -1.822 0.05 -0.522 0.50 0.724 0.39
#> 8      8    30   200    200    100    99 -0.247 0.53 -0.053 0.64 2.354 0.46
#> 9      9    40   100    200    100    99 -0.244 0.70  0.543 0.66 0.724 0.39
#> 10    10    50   200    200    100    99 -0.283 0.69 -0.914 0.10 2.354 0.46
#>    d_idD    d_x d_xD
#> 1     99 -0.481 0.53
#> 2     99 -0.481 0.53
#> 3     99 -0.481 0.53
#> 4     99 -0.481 0.53
#> 5     99 -0.481 0.53
#> 6     99 -0.481 0.53
#> 7     99 -0.481 0.53
#> 8     99 -0.481 0.53
#> 9     99 -0.481 0.53
#> 10    99 -0.481 0.53
CbindIdMatch(a = zA, b = zB, c = zC, d = zD, idNames1 = c("", "idB", "idC2", ""))
#>    a_idA a_idB a_idC a_idC2 a_idC3 a_idD    a_x a_xA    b_x b_xB   c_x c_xC
#> 1      1    10   100    100    100    99 -1.400 0.29 -1.863 0.68 0.724 0.39
#> 2      2    20   200    200    100    99  0.255 0.68 -0.522 0.50 2.354 0.46
#> 3      3    30   100    200    100    99 -2.437 0.74 -0.053 0.64 2.354 0.46
#> 4      4    40   200    200    100    99 -0.006 0.20  0.543 0.66 2.354 0.46
#> 5      5    50   100    200    100    99  0.622 0.98 -0.914 0.10 2.354 0.46
#> 6      6    10   200    200    100    99  1.148 0.74 -1.863 0.68 2.354 0.46
#> 7      7    20   100    200    100    99 -1.822 0.05 -0.522 0.50 2.354 0.46
#> 8      8    30   200    200    100    99 -0.247 0.53 -0.053 0.64 2.354 0.46
#> 9      9    40   100    200    100    99 -0.244 0.70  0.543 0.66 2.354 0.46
#> 10    10    50   200    200    100    99 -0.283 0.69 -0.914 0.10 2.354 0.46
#>    d_idD    d_x d_xD
#> 1     99 -0.481 0.53
#> 2     99 -0.481 0.53
#> 3     99 -0.481 0.53
#> 4     99 -0.481 0.53
#> 5     99 -0.481 0.53
#> 6     99 -0.481 0.53
#> 7     99 -0.481 0.53
#> 8     99 -0.481 0.53
#> 9     99 -0.481 0.53
#> 10    99 -0.481 0.53
CbindIdMatch(a = zA, b = zB, c = zC, d = zD, idNames1 = c("", "idB", "idC3", ""))
#> Warning: All rows not used
#>    a_idA a_idB a_idC a_idC2 a_idC3 a_idD    a_x a_xA    b_x b_xB   c_x c_xC
#> 1      1    10   100    100    100    99 -1.400 0.29 -1.863 0.68 0.724 0.39
#> 2      2    20   200    200    100    99  0.255 0.68 -0.522 0.50 0.724 0.39
#> 3      3    30   100    200    100    99 -2.437 0.74 -0.053 0.64 0.724 0.39
#> 4      4    40   200    200    100    99 -0.006 0.20  0.543 0.66 0.724 0.39
#> 5      5    50   100    200    100    99  0.622 0.98 -0.914 0.10 0.724 0.39
#> 6      6    10   200    200    100    99  1.148 0.74 -1.863 0.68 0.724 0.39
#> 7      7    20   100    200    100    99 -1.822 0.05 -0.522 0.50 0.724 0.39
#> 8      8    30   200    200    100    99 -0.247 0.53 -0.053 0.64 0.724 0.39
#> 9      9    40   100    200    100    99 -0.244 0.70  0.543 0.66 0.724 0.39
#> 10    10    50   200    200    100    99 -0.283 0.69 -0.914 0.10 0.724 0.39
#>    d_idD    d_x d_xD
#> 1     99 -0.481 0.53
#> 2     99 -0.481 0.53
#> 3     99 -0.481 0.53
#> 4     99 -0.481 0.53
#> 5     99 -0.481 0.53
#> 6     99 -0.481 0.53
#> 7     99 -0.481 0.53
#> 8     99 -0.481 0.53
#> 9     99 -0.481 0.53
#> 10    99 -0.481 0.53
CbindIdMatch(zA, zB, zC, zD, addName = c("", "bbb", "ccc", "ddd"), sep = ".", addLast = TRUE)
#>    idA idB idC idC2 idC3 idD      x   xA  x.bbb xB.bbb x.ccc xC.ccc  x.ddd
#> 1    1  10 100  100  100  99 -1.400 0.29 -1.863   0.68 0.724   0.39 -0.481
#> 2    2  20 200  200  100  99  0.255 0.68 -0.522   0.50 2.354   0.46 -0.481
#> 3    3  30 100  200  100  99 -2.437 0.74 -0.053   0.64 0.724   0.39 -0.481
#> 4    4  40 200  200  100  99 -0.006 0.20  0.543   0.66 2.354   0.46 -0.481
#> 5    5  50 100  200  100  99  0.622 0.98 -0.914   0.10 0.724   0.39 -0.481
#> 6    6  10 200  200  100  99  1.148 0.74 -1.863   0.68 2.354   0.46 -0.481
#> 7    7  20 100  200  100  99 -1.822 0.05 -0.522   0.50 0.724   0.39 -0.481
#> 8    8  30 200  200  100  99 -0.247 0.53 -0.053   0.64 2.354   0.46 -0.481
#> 9    9  40 100  200  100  99 -0.244 0.70  0.543   0.66 0.724   0.39 -0.481
#> 10  10  50 200  200  100  99 -0.283 0.69 -0.914   0.10 2.354   0.46 -0.481
#>    xD.ddd
#> 1    0.53
#> 2    0.53
#> 3    0.53
#> 4    0.53
#> 5    0.53
#> 6    0.53
#> 7    0.53
#> 8    0.53
#> 9    0.53
#> 10   0.53
try(CbindIdMatch(X = zA, Y = zA[, 4:5], Z = zC, idNames = NULL)) # Error
#> Error in CbindIdMatch(X = zA, Y = zA[, 4:5], Z = zC, idNames = NULL) : 
#>   Element of idNames/idNames1 missing when needed
CbindIdMatch(X = zA, Y = zA[, 4:5], Z = zD, idNames = NULL)      # Ok since equal NROW or NROW==1
#>    X_idA X_idB X_idC X_idC2 X_idC3 X_idD    X_x X_xA Y_idC2 Y_idC3 Z_idD    Z_x
#> 1      1    10   100    100    100    99 -1.400 0.29    100    100    99 -0.481
#> 2      2    20   200    200    100    99  0.255 0.68    200    100    99 -0.481
#> 3      3    30   100    200    100    99 -2.437 0.74    200    100    99 -0.481
#> 4      4    40   200    200    100    99 -0.006 0.20    200    100    99 -0.481
#> 5      5    50   100    200    100    99  0.622 0.98    200    100    99 -0.481
#> 6      6    10   200    200    100    99  1.148 0.74    200    100    99 -0.481
#> 7      7    20   100    200    100    99 -1.822 0.05    200    100    99 -0.481
#> 8      8    30   200    200    100    99 -0.247 0.53    200    100    99 -0.481
#> 9      9    40   100    200    100    99 -0.244 0.70    200    100    99 -0.481
#> 10    10    50   200    200    100    99 -0.283 0.69    200    100    99 -0.481
#>    Z_xD
#> 1  0.53
#> 2  0.53
#> 3  0.53
#> 4  0.53
#> 5  0.53
#> 6  0.53
#> 7  0.53
#> 8  0.53
#> 9  0.53
#> 10 0.53
CbindIdMatch(list(a = zA, b = zB, c = zC, d = zD))               # List is alternative input
#>    a_idA a_idB a_idC a_idC2 a_idC3 a_idD    a_x a_xA    b_x b_xB   c_x c_xC
#> 1      1    10   100    100    100    99 -1.400 0.29 -1.863 0.68 0.724 0.39
#> 2      2    20   200    200    100    99  0.255 0.68 -0.522 0.50 2.354 0.46
#> 3      3    30   100    200    100    99 -2.437 0.74 -0.053 0.64 0.724 0.39
#> 4      4    40   200    200    100    99 -0.006 0.20  0.543 0.66 2.354 0.46
#> 5      5    50   100    200    100    99  0.622 0.98 -0.914 0.10 0.724 0.39
#> 6      6    10   200    200    100    99  1.148 0.74 -1.863 0.68 2.354 0.46
#> 7      7    20   100    200    100    99 -1.822 0.05 -0.522 0.50 0.724 0.39
#> 8      8    30   200    200    100    99 -0.247 0.53 -0.053 0.64 2.354 0.46
#> 9      9    40   100    200    100    99 -0.244 0.70  0.543 0.66 0.724 0.39
#> 10    10    50   200    200    100    99 -0.283 0.69 -0.914 0.10 2.354 0.46
#>       d_x d_xD
#> 1  -0.481 0.53
#> 2  -0.481 0.53
#> 3  -0.481 0.53
#> 4  -0.481 0.53
#> 5  -0.481 0.53
#> 6  -0.481 0.53
#> 7  -0.481 0.53
#> 8  -0.481 0.53
#> 9  -0.481 0.53
#> 10 -0.481 0.53