Skip to content

Commit 25a6e7b

Browse files
authored
Merge pull request #1 from msummersgill/working.data.table
Tune group2NA to eliminate unnecessary copies
2 parents 283da0b + be66550 commit 25a6e7b

File tree

2 files changed

+36
-19
lines changed

2 files changed

+36
-19
lines changed

R/group2NA.R

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,52 +25,69 @@
2525
#
2626
group2NA <- function(data, groupNames = "group", nested = NULL, ordered = NULL,
2727
retrace.first = inherits(data, "GeomPolygon")) {
28+
2829
if (NROW(data) == 0) return(data)
2930

31+
## make copy and eliminate duplicated column names
32+
## If data is already an internal copy and does not need to be protected from in place modifications, then
33+
## the copy being created here could be eliminated for all cases except where column names are duplicated --
34+
## wouldn't save much time, but could lower amt. of memory allocation required for plotly calls.
35+
3036
if(data.table::is.data.table(data)){
3137
data <- data[,unique(names(data)),with=FALSE]
3238
} else {
3339
data <- data[!duplicated(names(data))]
3440
}
3541

36-
# a few workarounds since dplyr clobbers classes that we rely on in ggplotly
42+
## store class information from function input
3743
retrace <- force(retrace.first)
3844
datClass <- class(data)
3945

40-
# sanitize variable names
46+
## sanitize variable names
4147
groupNames <- groupNames[groupNames %in% names(data)]
4248
nested <- nested[nested %in% names(data)]
4349
ordered <- ordered[ordered %in% names(data)]
4450

45-
# ignore any already existing groups (not required w/data.table?)
46-
47-
# if group doesn't exist, just arrange before returning
51+
## if group doesn't exist, just arrange before returning
4852
if (!length(groupNames)) {
4953
if (length(ordered)) {
50-
data.table::setDT(data,key = c(nested, ordered))
54+
return(
55+
structure(
56+
data.table::setDT(data,key = c(nested, ordered)),
57+
class = datClass)
58+
)
59+
} else {
60+
return(data)
5161
}
52-
return(data)
5362
}
5463

5564
allVars <- c(nested, groupNames, ordered)
5665

57-
# TODO: better now
58-
d <- if (retrace.first) {
59-
data.table::setDT(data, key = allVars)[ data[, .I[c(seq_along(.I), 1L, .N+1L)], by=allVars]$V1 ]
66+
## if retrace.first is TRUE,repeat the first row of each group and add an empty row of NA's after each group.
67+
## if retrace.first is FALSE, just add an empty row to each group.
68+
## delete final row of NA's, return d with the original class
69+
70+
## IMPORTANT: does it matter if operating w/data.table setDT() clobbers row names attribute?
71+
if (retrace.first) {
72+
return(
73+
data.table::setDT(data, key = allVars)[ data[, .I[c(seq_along(.I), 1L, .N+1L)], by=allVars]$V1 ][-.N,] %>%
74+
structure(class = datClass)
75+
)
6076
} else {
61-
data.table::setDT(data, key = allVars)[ data[, .I[c(seq_along(.I), 1L, .N+1L)], by=allVars]$V1 ]
77+
return(
78+
structure(
79+
data.table::setDT(data, key = allVars)[ data[, .I[c(seq_along(.I), 1L, .N+1L)], by=allVars]$V1 ][-.N,],
80+
class = datClass)
81+
)
6282
}
6383

64-
# TODO: how to drop the NAs separating the nested values? Does it even matter?
84+
## IMPORTANT: does this still need to be done?
85+
## TODO: how to drop the NAs separating the nested values? Does it even matter?
6586
# d <- dplyr::ungroup(d)
6687
# for (i in nested) {
6788
# d <- dplyr::group_by_(dplyr::ungroup(d), i, add = TRUE)
6889
# }
6990
# d <- dplyr::do(d, .[seq_len(NROW(.)),])
70-
71-
if (all(is.na(d[.N, ]))) d <- d[-.N,]
72-
structure(d, class = datClass)
73-
7491
}
7592

7693

plotly.Rproj

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Version: 1.0
22

3-
RestoreWorkspace: Default
4-
SaveWorkspace: Default
5-
AlwaysSaveHistory: Default
3+
RestoreWorkspace: No
4+
SaveWorkspace: No
5+
AlwaysSaveHistory: No
66

77
EnableCodeIndexing: Yes
88
UseSpacesForTab: Yes

0 commit comments

Comments
 (0)