These functions allow convenient sorting and ordering of collections of (ff) vectors organized in (ffdf) data.frames

dforder(x, ...)
dfsort(x, ...)
ramdforder(x, ...)
ramdfsort(x, ...)
ffdforder(x, ...)
ffdfsort(x, ...)

Arguments

x

a data.frame (for dforder, dfsort, ramorder, ramsort) or an ffdf object (for ffdforder, ffdfsort)

...

further arguments passed to sort, ramsort or ffsort (for objects with one column) or passed to order, ramorder or fforder (for objects with mulitple columns)

Value

the order functions return an (ff) vector of integer order positions, the sort functions return a sorted clone of the (ffdf) input data.frame

Author

Jens Oehlschlägel

Examples

   x <- ff(sample(1e5, 1e6, TRUE))
   y <- ff(sample(1e5, 1e6, TRUE))
   z <- ff(sample(1e5, 1e6, TRUE))
   d <- ffdf(x, y, z)
   d2 <- ffdfsort(d)
   d2
#> ffdf (all open) dim=c(1000000,3), dimorder=c(1,2) row.names=NULL
#> ffdf virtual mapping
#>   PhysicalName VirtualVmode PhysicalVmode  AsIs VirtualIsMatrix
#> x            x      integer       integer FALSE           FALSE
#> y            y      integer       integer FALSE           FALSE
#> z            z      integer       integer FALSE           FALSE
#>   PhysicalIsMatrix PhysicalElementNo PhysicalFirstCol PhysicalLastCol
#> x            FALSE                 1                1               1
#> y            FALSE                 2                1               1
#> z            FALSE                 3                1               1
#>   PhysicalIsOpen
#> x           TRUE
#> y           TRUE
#> z           TRUE
#> ffdf data
#>              x      y      z
#> 1            1   6615  30965
#> 2            1   7359  79059
#> 3            1  14282  58430
#> 4            1  20089  23350
#> 5            1  34376  89218
#> 6            1  38423  76774
#> 7            1  66715  48333
#> 8            1  82647  26881
#> :            :      :      :
#> 999993   99999  59653  15826
#> 999994   99999  62171  52869
#> 999995  100000  26002  19089
#> 999996  100000  26391  47977
#> 999997  100000  33869  34739
#> 999998  100000  49939  69262
#> 999999  100000  86725  51496
#> 1000000 100000  88874  69694
   d
#> ffdf (all open) dim=c(1000000,3), dimorder=c(1,2) row.names=NULL
#> ffdf virtual mapping
#>   PhysicalName VirtualVmode PhysicalVmode  AsIs VirtualIsMatrix
#> x            x      integer       integer FALSE           FALSE
#> y            y      integer       integer FALSE           FALSE
#> z            z      integer       integer FALSE           FALSE
#>   PhysicalIsMatrix PhysicalElementNo PhysicalFirstCol PhysicalLastCol
#> x            FALSE                 1                1               1
#> y            FALSE                 2                1               1
#> z            FALSE                 3                1               1
#>   PhysicalIsOpen
#> x           TRUE
#> y           TRUE
#> z           TRUE
#> ffdf data
#>             x     y     z
#> 1        4662 34054 67462
#> 2       12741  7535  3722
#> 3       72745 42862  7916
#> 4       56115 55806 41932
#> 5       22418 92434 19875
#> 6       26381 29523 58946
#> 7       52511 61774 57458
#> 8       89632 72436 69970
#> :           :     :     :
#> 999993  84413  8519 63803
#> 999994  44491 27628  4882
#> 999995  97347 83352 70409
#> 999996   8554 94765  3009
#> 999997  82615 21585 10041
#> 999998  56799 31087 66617
#> 999999  43267 70988 90342
#> 1000000 32926 52272 58094
   d2 <- d[1:2]
   i <- ffdforder(d2)
   d[i,]
#> ffdf (all open) dim=c(1000000,3), dimorder=c(1,2) row.names=NULL
#> ffdf virtual mapping
#>   PhysicalName VirtualVmode PhysicalVmode  AsIs VirtualIsMatrix
#> x            x      integer       integer FALSE           FALSE
#> y            y      integer       integer FALSE           FALSE
#> z            z      integer       integer FALSE           FALSE
#>   PhysicalIsMatrix PhysicalElementNo PhysicalFirstCol PhysicalLastCol
#> x            FALSE                 1                1               1
#> y            FALSE                 2                1               1
#> z            FALSE                 3                1               1
#>   PhysicalIsOpen
#> x           TRUE
#> y           TRUE
#> z           TRUE
#> ffdf data
#>              x      y      z
#> 1            1   6615  30965
#> 2            1   7359  79059
#> 3            1  14282  58430
#> 4            1  20089  23350
#> 5            1  34376  89218
#> 6            1  38423  76774
#> 7            1  66715  48333
#> 8            1  82647  26881
#> :            :      :      :
#> 999993   99999  59653  15826
#> 999994   99999  62171  52869
#> 999995  100000  26002  19089
#> 999996  100000  26391  47977
#> 999997  100000  33869  34739
#> 999998  100000  49939  69262
#> 999999  100000  86725  51496
#> 1000000 100000  88874  69694
   rm(x, y, z, i, d, d2)
   gc()
#>           used (Mb) gc trigger  (Mb) max used  (Mb)
#> Ncells 1156094 61.8    1994352 106.6  1994352 106.6
#> Vcells 2151717 16.5    8388608  64.0  8318871  63.5