Skip to content
This repository was archived by the owner on Aug 17, 2024. It is now read-only.

Commit cf7ebcf

Browse files
committed
modif on joins
1 parent 5758f80 commit cf7ebcf

File tree

3 files changed

+44
-39
lines changed

3 files changed

+44
-39
lines changed

examples/titanic_analysis.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ DataFrame.fromCSV('http://vincentarelbundock.github.io/Rdatasets/csv/COUNT/titan
5555
const cleanCountByGroup = countByGroup.rename('aggregation', 'count').sortBy('count', true);
5656

5757
// And now show the result
58-
cleanCountByGroup.show(100);
58+
cleanCountByGroup.show(300);
5959
// | class | age | sex | survived | count |
6060
// ------------------------------------------------------------
6161
// | 3rd class | adults | man | no | 387 |

src/dataframe.js

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -174,26 +174,33 @@ class DataFrame {
174174
return [array.map(row => new Row(row, columns)), columns];
175175
}
176176

177-
_joinByType(gdf1, gdf2, type) {
178-
if (type === 'out' || type === 'in') {
179-
const gdf2Hashs = gdf2.listHashs();
180-
return gdf1.toCollection().map(({group, hash}) => {
181-
const isContained = gdf2Hashs.includes(hash);
182-
console.log(gdf2.get(hash).group.listColumns());
183-
const filterCondition = (bool) => bool ? group : false;
184-
return type === 'out' ? filterCondition(!isContained) : filterCondition(isContained);
185-
}).filter(group => group);
186-
}
187-
return gdf1.toCollection().map(({group}) => group);
177+
_joinByType(gdf1, gdf2, type, newColumns) {
178+
const gdf2Hashs = gdf2.listHashs();
179+
return gdf1.toCollection().map(({group, hash}) => {
180+
const isContained = gdf2Hashs.includes(hash);
181+
if (gdf2.get(hash)) {
182+
const gdf2Collection = gdf2.get(hash).group.toCollection();
183+
const combinedGroup = group.toCollection().map(row => {
184+
return gdf2Collection.map(row2 => Object.assign({}, row2, row));
185+
}).reduce((p, n) => [...p, ...n], []);
186+
group = this.__newInstance__(
187+
combinedGroup,
188+
newColumns
189+
);
190+
}
191+
const filterCondition = (bool) => bool ? group : false;
192+
if (type !== 'out' && type !== 'in') return group;
193+
return type === 'out' ? filterCondition(!isContained) : filterCondition(isContained);
194+
}).filter(group => group);
188195
}
189196

190197
_join(dfToJoin, on, types) {
191198
const newColumns = [...new Set([...this.listColumns(), ...dfToJoin.listColumns()])];
192199
const gdf = this.groupBy(...on);
193200
const gdfToJoin = dfToJoin.groupBy(...on);
194201
return [this.__newInstance__([], newColumns), ...iter([
195-
...this._joinByType(gdf, gdfToJoin, types[0]),
196-
...this._joinByType(gdfToJoin, gdf, types[1]),
202+
...this._joinByType(gdf, gdfToJoin, types[0], newColumns),
203+
...this._joinByType(gdfToJoin, gdf, types[1], newColumns),
197204
], group => group.restructure(newColumns))].reduce((p, n) => p.union(n));
198205
}
199206

tests/dataframe-test.js

Lines changed: 23 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -587,55 +587,53 @@ test('DataFrame rows can be ', (assert) => {
587587

588588
assert.deepEqual(
589589
df5.innerJoin(df6, 'id').sortBy('id').toArray(), [
590-
[1, 0, undefined],
591-
[1, undefined, 0],
592-
[3, 1, undefined],
593-
[3, undefined, 6],
594-
[8, 1, undefined],
595-
[8, undefined, 2],
590+
[1, 0, 0],
591+
[3, 1, 6],
592+
[8, 1, 2],
596593
], 'inner joined.'
597594
);
598595

599596
assert.deepEqual(
600597
df5.fullJoin(df6, 'id').sortBy('id').toArray(), [
601-
[1, 0, undefined],
602-
[1, undefined, 0],
598+
[1, 0, 0],
599+
[1, 0, 0],
603600
[2, undefined, 1],
604-
[3, 1, undefined],
605-
[3, undefined, 6],
601+
[3, 1, 6],
602+
[3, 1, 6],
606603
[6, undefined, 1],
607-
[8, 1, undefined],
608-
[8, undefined, 2],
604+
[8, 1, 2],
605+
[8, 1, 2],
609606
], 'full joined.'
610607
);
608+
611609
assert.deepEqual(
612610
df5.outerJoin(df6, 'id').sortBy('id').toArray(), [
613611
[2, undefined, 1],
614612
[6, undefined, 1],
615613
], 'outer joined.'
616614
);
617615

616+
df5.leftJoin(df6, 'id').sortBy('id').show()
617+
618618
assert.deepEqual(
619-
df5.leftJoin(df6, 'id').sortBy('id').toArray(), [
620-
[1, 0, undefined],
621-
[1, undefined, 0],
622-
[3, 1, undefined],
623-
[3, undefined, 6],
624-
[8, 1, undefined],
625-
[8, undefined, 2],
619+
df6.leftJoin(df5, 'id').sortBy('id').toArray(), [
620+
[1, 0, 0],
621+
[2, undefined, 1],
622+
[3, 1, 6],
623+
[6, undefined, 1],
624+
[8, 1, 2],
626625
], 'left joined.'
627626
);
628627

628+
df5.rightJoin(df6, 'id').sortBy('id').show()
629+
629630
assert.deepEqual(
630631
df5.rightJoin(df6, 'id').sortBy('id').toArray(), [
631-
[1, 0, undefined],
632-
[1, undefined, 0],
632+
[1, 0, 0],
633633
[2, undefined, 1],
634-
[3, 1, undefined],
635-
[3, undefined, 6],
634+
[3, 1, 6],
636635
[6, undefined, 1],
637-
[8, 1, undefined],
638-
[8, undefined, 2],
636+
[8, 1, 2],
639637
], 'right joined.'
640638
);
641639

0 commit comments

Comments
 (0)