@@ -2,10 +2,18 @@ import { Vector } from "../vector/vector";
2
2
import { StructVector } from "../vector/struct" ;
3
3
import { VirtualVector } from "../vector/virtual" ;
4
4
5
+ export type NextFunc = ( idx : number , cols : Vector [ ] ) => void ;
6
+ export type PredicateFunc = ( idx : number , cols : Vector [ ] ) => boolean ;
7
+
5
8
export abstract class DataFrame {
9
+ constructor ( readonly lengths : Uint32Array ) { }
6
10
public abstract columns : Vector < any > [ ] ;
7
11
public abstract getBatch ( batch : number ) : Vector [ ] ;
8
- public abstract scan ( next : ( idx : number , cols : Vector [ ] ) => void ) : void ;
12
+ public abstract scan ( next : NextFunc ) : void ;
13
+ public filter ( predicate : PredicateFunc ) : DataFrame {
14
+ return new FilteredDataFrame ( this , predicate ) ;
15
+ }
16
+
9
17
static from ( table : Vector < any > ) : DataFrame {
10
18
// There are two types of Vectors we might want to make into
11
19
// a ChunkedDataFrame:
@@ -31,23 +39,26 @@ export abstract class DataFrame {
31
39
return new SimpleDataFrame ( [ table ] ) ;
32
40
}
33
41
}
42
+
43
+ count ( ) : number {
44
+ return this . lengths . reduce ( ( acc , val ) => acc + val ) ;
45
+ }
34
46
}
35
47
36
48
class SimpleDataFrame extends DataFrame {
37
49
readonly lengths : Uint32Array ;
38
50
constructor ( public columns : Vector < any > [ ] ) {
39
- super ( ) ;
51
+ super ( new Uint32Array ( [ 0 , columns [ 0 ] . length ] ) ) ;
40
52
if ( ! this . columns . slice ( 1 ) . every ( ( v ) => v . length === this . columns [ 0 ] . length ) ) {
41
53
throw new Error ( "Attempted to create a DataFrame with un-aligned vectors" ) ;
42
54
}
43
- this . lengths = new Uint32Array ( [ 0 , this . columns [ 0 ] . length ] ) ;
44
55
}
45
56
46
57
public getBatch ( ) {
47
58
return this . columns ;
48
59
}
49
60
50
- public scan ( next : ( idx : number , cols : Vector [ ] ) => void ) {
61
+ public scan ( next : NextFunc ) {
51
62
for ( let idx = - 1 ; ++ idx < this . lengths [ 1 ] ; ) {
52
63
next ( idx , this . columns )
53
64
}
@@ -62,24 +73,16 @@ class SimpleDataFrame extends DataFrame {
62
73
63
74
class ChunkedDataFrame extends DataFrame {
64
75
public columns : Vector < any > [ ] ;
65
- readonly lengths : Uint32Array ;
66
76
constructor ( private virtuals : VirtualVector < any > [ ] ) {
67
- super ( ) ;
68
- const offsets = virtuals [ 0 ] . offsets ;
69
- if ( ! this . virtuals . slice ( 1 ) . every ( ( v ) => v . aligned ( virtuals [ 0 ] ) ) ) {
70
- throw new Error ( "Attempted to create a DataFrame with un-aligned vectors" ) ;
71
- }
72
- this . lengths = new Uint32Array ( offsets . length ) ;
73
- offsets . forEach ( ( offset , i ) => {
74
- this . lengths [ i ] = offsets [ i + 1 ] - offset ; ;
75
- } ) ;
77
+ super ( ChunkedDataFrame . getLengths ( virtuals ) ) ;
78
+ this . virtuals = virtuals ;
76
79
}
77
80
78
81
getBatch ( batch : number ) : Vector [ ] {
79
82
return this . virtuals . map ( ( virt ) => virt . vectors [ batch ] ) ;
80
83
}
81
84
82
- scan ( next : ( idx : number , cols : Vector [ ] ) => void ) {
85
+ scan ( next : NextFunc ) {
83
86
for ( let batch = - 1 ; ++ batch < this . lengths . length ; ) {
84
87
const length = this . lengths [ batch ] ;
85
88
@@ -106,4 +109,69 @@ class ChunkedDataFrame extends DataFrame {
106
109
}
107
110
}
108
111
}
112
+
113
+ private static getLengths ( virtuals : VirtualVector < any > [ ] ) : Uint32Array {
114
+ if ( ! virtuals . slice ( 1 ) . every ( ( v ) => v . aligned ( virtuals [ 0 ] ) ) ) {
115
+ throw new Error ( "Attempted to create a DataFrame with un-aligned vectors" ) ;
116
+ }
117
+ return new Uint32Array ( virtuals [ 0 ] . vectors . map ( ( v ) => v . length ) ) ;
118
+ }
119
+ }
120
+
121
+ class FilteredDataFrame extends DataFrame {
122
+ public columns : Vector < any > [ ] ;
123
+ constructor ( readonly parent : DataFrame , private predicate : PredicateFunc ) {
124
+ super ( parent . lengths ) ;
125
+ }
126
+
127
+ getBatch ( batch : number ) : Vector [ ] {
128
+ return this . parent . getBatch ( batch ) ;
129
+ } ;
130
+
131
+ scan ( next : NextFunc ) {
132
+ // inlined version of this:
133
+ // this.parent.scan((idx, columns) => {
134
+ // if (this.predicate(idx, columns)) next(idx, columns);
135
+ // });
136
+ for ( let batch = - 1 ; ++ batch < this . parent . lengths . length ; ) {
137
+ const length = this . parent . lengths [ batch ] ;
138
+
139
+ // load batches
140
+ const columns = this . parent . getBatch ( batch ) ;
141
+
142
+ // yield all indices
143
+ for ( let idx = - 1 ; ++ idx < length ; ) {
144
+ if ( this . predicate ( idx , columns ) ) next ( idx , columns ) ;
145
+ }
146
+ }
147
+ }
148
+
149
+ count ( ) : number {
150
+ // inlined version of this:
151
+ // let sum = 0;
152
+ // this.parent.scan((idx, columns) => {
153
+ // if (this.predicate(idx, columns)) ++sum;
154
+ // });
155
+ // return sum;
156
+ let sum = 0 ;
157
+ for ( let batch = - 1 ; ++ batch < this . parent . lengths . length ; ) {
158
+ const length = this . parent . lengths [ batch ] ;
159
+
160
+ // load batches
161
+ const columns = this . parent . getBatch ( batch ) ;
162
+
163
+ // yield all indices
164
+ for ( let idx = - 1 ; ++ idx < length ; ) {
165
+ if ( this . predicate ( idx , columns ) ) ++ sum ;
166
+ }
167
+ }
168
+ return sum ;
169
+ }
170
+
171
+ filter ( predicate : PredicateFunc ) : DataFrame {
172
+ return new FilteredDataFrame (
173
+ this . parent ,
174
+ ( idx , cols ) => this . predicate ( idx , cols ) && predicate ( idx , cols )
175
+ ) ;
176
+ }
109
177
}
0 commit comments