Skip to content

Commit 1d2f008

Browse files
author
Matt Summersgill
committed
Re-writing function group2NA() using data.table, importing data.table to namespace
1 parent d20c4d6 commit 1d2f008

File tree

5 files changed

+32
-20
lines changed

5 files changed

+32
-20
lines changed

.Rbuildignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@ Untitled*
1313
rsconnect/
1414
revdep/
1515
^LICENSE\.md$
16+
^packrat/
17+
^\.Rprofile$
18+
Matt/

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,6 @@ inst/examples/*/rsconnect/*
1313
Untitled*
1414
rsconnect/
1515
revdep/
16+
packrat/lib*/
17+
packrat/src/
18+
matt/*

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ Imports:
4141
tibble,
4242
lazyeval (>= 0.2.0),
4343
crosstalk,
44-
purrr
44+
purrr,
45+
data.table
4546
Suggests:
4647
MASS,
4748
maps,

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ export(transmute)
201201
export(transmute_)
202202
export(ungroup)
203203
import(ggplot2)
204+
import(data.table)
204205
importFrom(dplyr,arrange)
205206
importFrom(dplyr,arrange_)
206207
importFrom(dplyr,distinct)

R/group2NA.R

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,18 @@
2121
# elong <- tidyr::gather(economics, variable, value, -date)
2222
# plot_ly(group2NA(elong, "variable"), x = ~date, y = ~value)
2323
#
24-
24+
# @import data.table
25+
#
2526
group2NA <- function(data, groupNames = "group", nested = NULL, ordered = NULL,
2627
retrace.first = inherits(data, "GeomPolygon")) {
2728
if (NROW(data) == 0) return(data)
28-
data <- data[!duplicated(names(data))]
29+
30+
if(data.table::is.data.table(data)){
31+
data <- data[,unique(names(data)),with=FALSE]
32+
} else {
33+
data <- data[!duplicated(names(data))]
34+
}
35+
2936
# a few workarounds since dplyr clobbers classes that we rely on in ggplotly
3037
retrace <- force(retrace.first)
3138
datClass <- class(data)
@@ -34,41 +41,38 @@ group2NA <- function(data, groupNames = "group", nested = NULL, ordered = NULL,
3441
groupNames <- groupNames[groupNames %in% names(data)]
3542
nested <- nested[nested %in% names(data)]
3643
ordered <- ordered[ordered %in% names(data)]
37-
# ignore any already existing groups
38-
data <- dplyr::ungroup(data)
44+
45+
# ignore any already existing groups (not required w/data.table?)
3946

4047
# if group doesn't exist, just arrange before returning
4148
if (!length(groupNames)) {
4249
if (length(ordered)) {
43-
data <- dplyr::arrange_(data, c(nested, ordered))
50+
data.table::setDT(data,key = c(nested, ordered))
4451
}
4552
return(data)
4653
}
54+
4755
allVars <- c(nested, groupNames, ordered)
48-
for (i in allVars) {
49-
data <- dplyr::group_by_(data, i, add = TRUE)
50-
}
51-
# first, arrange everything
52-
data <- dplyr::do(data, dplyr::arrange_(., allVars))
53-
data <- dplyr::ungroup(data)
54-
for (i in c(nested, groupNames)) {
55-
data <- dplyr::group_by_(data, i, add = TRUE)
56-
}
57-
# TODO: this is slow, can it be done with dplyr::slice()?
56+
57+
## first, arrange everything (not required w/data.table?)
58+
59+
# TODO: this is slow
5860
d <- if (retrace.first) {
59-
dplyr::do(data, rbind.data.frame(., .[1,], NA))
61+
data.table::setDT(data)[, index := .GRP, keyby = allVars][, .SD[c(1:(.N),1,(.N+1))], keyby = index][,index := NULL]
6062
} else {
61-
dplyr::do(data, rbind.data.frame(., NA))
63+
data.table::setDT(data)[, index := .GRP, keyby = allVars][, .SD[1:(.N+1)], keyby = index][,index := NULL]
6264
}
65+
6366
# TODO: how to drop the NAs separating the nested values? Does it even matter?
6467
# d <- dplyr::ungroup(d)
6568
# for (i in nested) {
6669
# d <- dplyr::group_by_(dplyr::ungroup(d), i, add = TRUE)
6770
# }
6871
# d <- dplyr::do(d, .[seq_len(NROW(.)),])
69-
n <- NROW(d)
70-
if (all(is.na(d[n, ]))) d <- d[-n, ]
72+
73+
if (all(is.na(d[.N, ]))) d <- d[-.N,]
7174
structure(d, class = datClass)
75+
7276
}
7377

7478

0 commit comments

Comments
 (0)