Skip to content

Commit edcbdbe

Browse files
author
Brian Hulette
committed
cleanup
1 parent 20717d5 commit edcbdbe

File tree

2 files changed

+175
-164
lines changed

2 files changed

+175
-164
lines changed

js/src/table.ts

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,17 @@ function columnsFromBatches(batches: Vector[][]) {
5454

5555
export class Table implements DataFrame {
5656
static from(sources?: Iterable<Uint8Array | Buffer | string> | object | string) {
57-
let batches: Vector<any>[][] = [[]];
57+
let batches: Vector[][] = [];
5858
if (sources) {
59-
batches = Array.from(read(sources));
59+
batches = [];
60+
for (let batch of read(sources)) {
61+
batches.push(batch);
62+
}
6063
}
6164
return new Table({ batches });
6265
}
6366
static async fromAsync(sources?: AsyncIterable<Uint8Array | Buffer | string>) {
64-
let batches: Vector<any>[][] = [[]];
67+
let batches: Vector[][] = [];
6568
if (sources) {
6669
batches = [];
6770
for await (let batch of readAsync(sources)) {
@@ -119,34 +122,34 @@ export class Table implements DataFrame {
119122
count_by = new Col(count_by);
120123
}
121124

122-
// the last batch will have the most complete dictionary, use it's data
123-
// vector as our count by keys
125+
// Assume that all dictionary batches are deltas, which means that the
126+
// last record batch has the most complete dictionary
124127
count_by.bind(this.batches[this.batches.length - 1]);
125128
if (!(count_by.vector instanceof DictionaryVector)) {
126-
throw new Error("countBy currently only supports dictionary-encoded columns");
129+
throw new Error('countBy currently only supports dictionary-encoded columns');
127130
}
128131

129-
let keys: Vector = (count_by.vector as DictionaryVector<any>).data;
132+
let data: Vector = (count_by.vector as DictionaryVector<any>).data;
130133
// TODO: Adjust array byte width based on overall length
131134
// (e.g. if this.length <= 255 use Uint8Array, etc...)
132-
let counts: Uint32Array = new Uint32Array(keys.length);
133-
135+
let counts: Uint32Array = new Uint32Array(data.length);
134136

135137
for (let batch = -1; ++batch < this.lengths.length;) {
136138
const length = this.lengths[batch];
137139

138140
// load batches
139141
const columns = this.batches[batch];
140142
count_by.bind(columns);
143+
const keys: Vector = (count_by.vector as DictionaryVector<any>).keys;
141144

142145
// yield all indices
143146
for (let idx = -1; ++idx < length;) {
144-
let key = (count_by.vector as DictionaryVector<any>).getKey(idx)
147+
let key = keys.get(idx);
145148
if (key !== null) { counts[key]++; }
146149
}
147150
}
148151

149-
return new CountByResult(keys, new Uint32Vector({data: counts}))
152+
return new CountByResult(data, new Uint32Vector({data: counts}));
150153
}
151154
*[Symbol.iterator]() {
152155
for (let batch = -1; ++batch < this.lengths.length;) {
@@ -220,16 +223,17 @@ class FilteredDataFrame implements DataFrame {
220223
count_by = new Col(count_by);
221224
}
222225

223-
// the last batch will have the most complete dictionary, use it's data
224-
// vector as our count by keys
226+
// Assume that all dictionary batches are deltas, which means that the
227+
// last record batch has the most complete dictionary
225228
count_by.bind(this.parent.batches[this.parent.batches.length - 1]);
226229
if (!(count_by.vector instanceof DictionaryVector)) {
227-
throw new Error("countBy currently only supports dictionary-encoded columns");
230+
throw new Error('countBy currently only supports dictionary-encoded columns');
228231
}
229232

230-
let keys: Vector = (count_by.vector as DictionaryVector<any>).data;
231-
let counts: Uint32Array = new Uint32Array(keys.length);
232-
233+
const data: Vector = (count_by.vector as DictionaryVector<any>).data;
234+
// TODO: Adjust array byte width based on overall length
235+
// (e.g. if this.length <= 255 use Uint8Array, etc...)
236+
const counts: Uint32Array = new Uint32Array(data.length);
233237

234238
for (let batch = -1; ++batch < this.parent.lengths.length;) {
235239
const length = this.parent.lengths[batch];
@@ -238,28 +242,29 @@ class FilteredDataFrame implements DataFrame {
238242
const columns = this.parent.batches[batch];
239243
const predicate = this.predicate.bind(columns);
240244
count_by.bind(columns);
245+
const keys: Vector = (count_by.vector as DictionaryVector<any>).keys;
241246

242247
// yield all indices
243248
for (let idx = -1; ++idx < length;) {
244-
let key = (count_by.vector as DictionaryVector<any>).getKey(idx)
249+
let key = keys.get(idx);
245250
if (key !== null && predicate(idx, columns)) { counts[key]++; }
246251
}
247252
}
248253

249-
return new CountByResult(keys, new Uint32Vector({data: counts}))
254+
return new CountByResult(data, new Uint32Vector({data: counts}));
250255
}
251256
}
252257

253258
export class CountByResult extends Table implements DataFrame {
254-
constructor(readonly keys: Vector, readonly counts: Vector<number|null>) {
255-
super({batches: [[keys, counts]]});
259+
constructor(readonly values: Vector, readonly counts: Vector<number|null>) {
260+
super({batches: [[values, counts]]});
256261
}
257262

258263
asJSON(): Object {
259264
let result: {[key: string]: number|null} = {};
260265

261266
for (let i = -1; ++i < this.length;) {
262-
result[this.keys.get(i)] = this.counts.get(i);
267+
result[this.values.get(i)] = this.counts.get(i);
263268
}
264269

265270
return result;

0 commit comments

Comments
 (0)