21
21
# elong <- tidyr::gather(economics, variable, value, -date)
22
22
# plot_ly(group2NA(elong, "variable"), x = ~date, y = ~value)
23
23
#
24
-
24
+ # @import data.table
25
+ #
25
26
group2NA <- function (data , groupNames = " group" , nested = NULL , ordered = NULL ,
26
27
retrace.first = inherits(data , " GeomPolygon" )) {
27
28
if (NROW(data ) == 0 ) return (data )
28
- data <- data [! duplicated(names(data ))]
29
+
30
+ if (data.table :: is.data.table(data )){
31
+ data <- data [,unique(names(data )),with = FALSE ]
32
+ } else {
33
+ data <- data [! duplicated(names(data ))]
34
+ }
35
+
29
36
# a few workarounds since dplyr clobbers classes that we rely on in ggplotly
30
37
retrace <- force(retrace.first )
31
38
datClass <- class(data )
@@ -34,41 +41,38 @@ group2NA <- function(data, groupNames = "group", nested = NULL, ordered = NULL,
34
41
groupNames <- groupNames [groupNames %in% names(data )]
35
42
nested <- nested [nested %in% names(data )]
36
43
ordered <- ordered [ordered %in% names(data )]
37
- # ignore any already existing groups
38
- data <- dplyr :: ungroup( data )
44
+
45
+ # ignore any already existing groups (not required w/ data.table? )
39
46
40
47
# if group doesn't exist, just arrange before returning
41
48
if (! length(groupNames )) {
42
49
if (length(ordered )) {
43
- data <- dplyr :: arrange_ (data , c(nested , ordered ))
50
+ data.table :: setDT (data ,key = c(nested , ordered ))
44
51
}
45
52
return (data )
46
53
}
54
+
47
55
allVars <- c(nested , groupNames , ordered )
48
- for (i in allVars ) {
49
- data <- dplyr :: group_by_(data , i , add = TRUE )
50
- }
51
- # first, arrange everything
52
- data <- dplyr :: do(data , dplyr :: arrange_(. , allVars ))
53
- data <- dplyr :: ungroup(data )
54
- for (i in c(nested , groupNames )) {
55
- data <- dplyr :: group_by_(data , i , add = TRUE )
56
- }
57
- # TODO: this is slow, can it be done with dplyr::slice()?
56
+
57
+ # # first, arrange everything (not required w/data.table?)
58
+
59
+ # TODO: this is slow
58
60
d <- if (retrace.first ) {
59
- dplyr :: do (data , rbind.data.frame( . , . [ 1 ,], NA ))
61
+ data.table :: setDT (data )[, index : = .GRP , keyby = allVars ][, .SD [c( 1 : ( .N ), 1 ,( .N + 1 ))], keyby = index ][, index : = NULL ]
60
62
} else {
61
- dplyr :: do (data , rbind.data.frame( . , NA ))
63
+ data.table :: setDT (data )[, index : = .GRP , keyby = allVars ][, .SD [ 1 : ( .N + 1 )], keyby = index ][, index : = NULL ]
62
64
}
65
+
63
66
# TODO: how to drop the NAs separating the nested values? Does it even matter?
64
67
# d <- dplyr::ungroup(d)
65
68
# for (i in nested) {
66
69
# d <- dplyr::group_by_(dplyr::ungroup(d), i, add = TRUE)
67
70
# }
68
71
# d <- dplyr::do(d, .[seq_len(NROW(.)),])
69
- n <- NROW( d )
70
- if (all(is.na(d [n , ]))) d <- d [- n , ]
72
+
73
+ if (all(is.na(d [.N , ]))) d <- d [- .N , ]
71
74
structure(d , class = datClass )
75
+
72
76
}
73
77
74
78
0 commit comments