1
1
import { Vector } from "../vector/vector" ;
2
- import { StructVector } from "../vector/struct" ;
2
+ import { StructVector , StructRow } from "../vector/struct" ;
3
3
import { VirtualVector } from "../vector/virtual" ;
4
4
5
5
import { Predicate } from "./predicate"
6
6
7
7
export type NextFunc = ( idx : number , cols : Vector [ ] ) => void ;
8
8
9
- export class DataFrame {
9
+ export class DataFrameRow extends StructRow < any > {
10
+ constructor ( batches : Vector [ ] , idx : number ) {
11
+ super ( new StructVector ( { columns : batches } ) , idx ) ;
12
+ }
13
+ }
14
+
15
+ export interface DataFrameOps {
16
+ readonly batches : Vector [ ] [ ] ;
17
+ readonly lengths : Uint32Array ;
18
+ filter ( predicate : Predicate ) : DataFrameOps ;
19
+ scan ( next : NextFunc ) : void ;
20
+ count ( ) : number ;
21
+ }
22
+
23
+ export class DataFrame extends Vector < DataFrameRow > implements DataFrameOps {
10
24
readonly lengths : Uint32Array ;
11
- public columns : Vector < any > [ ] ;
12
- constructor ( readonly batches : Vector < any > [ ] [ ] ) {
25
+ constructor ( readonly batches : Vector [ ] [ ] ) {
26
+ super ( ) ;
13
27
// for each batch
14
28
this . lengths = new Uint32Array ( batches . map ( ( batch ) => {
15
29
// verify that every vector has the same length, and return that
@@ -23,7 +37,17 @@ export class DataFrame {
23
37
} ) ) ;
24
38
}
25
39
26
- public filter ( predicate : Predicate ) : DataFrame {
40
+ get ( idx : number ) : DataFrameRow | null {
41
+ let batch = 0 ;
42
+ while ( idx > this . lengths [ batch ] && batch < this . lengths . length )
43
+ idx -= this . lengths [ batch ++ ] ;
44
+
45
+ if ( batch === this . lengths . length ) return null ;
46
+
47
+ else return new DataFrameRow ( this . batches [ batch ] , idx ) ;
48
+ }
49
+
50
+ filter ( predicate : Predicate ) : DataFrameOps {
27
51
return new FilteredDataFrame ( this , predicate ) ;
28
52
}
29
53
@@ -50,11 +74,11 @@ export class DataFrame {
50
74
const length = this . lengths [ batch ] ;
51
75
52
76
// load batches
53
- this . columns = this . batches [ batch ] ;
77
+ const columns = this . batches [ batch ] ;
54
78
55
79
// yield all indices
56
80
for ( let idx = - 1 ; ++ idx < length ; ) {
57
- yield idx ;
81
+ yield new DataFrameRow ( columns , idx ) ;
58
82
}
59
83
}
60
84
}
@@ -85,10 +109,12 @@ export class DataFrame {
85
109
}
86
110
}
87
111
88
- class FilteredDataFrame extends DataFrame {
89
- public columns : Vector < any > [ ] ;
90
- constructor ( readonly parent : DataFrame , private predicate : Predicate ) {
91
- super ( parent . batches ) ;
112
+ class FilteredDataFrame implements DataFrameOps {
113
+ readonly lengths : Uint32Array ;
114
+ readonly batches : Vector [ ] [ ] ;
115
+ constructor ( readonly parent : DataFrameOps , private predicate : Predicate ) {
116
+ this . batches = parent . batches ;
117
+ this . lengths = parent . lengths ;
92
118
}
93
119
94
120
scan ( next : NextFunc ) {
@@ -133,7 +159,7 @@ class FilteredDataFrame extends DataFrame {
133
159
return sum ;
134
160
}
135
161
136
- filter ( predicate : Predicate ) : DataFrame {
162
+ filter ( predicate : Predicate ) : DataFrameOps {
137
163
return new FilteredDataFrame (
138
164
this . parent ,
139
165
this . predicate . and ( predicate )
0 commit comments