From caec575a64333d7c01516e30f589f5b5050e8758 Mon Sep 17 00:00:00 2001 From: Arnab Gupta Date: Tue, 9 Jun 2020 21:51:16 -0400 Subject: [PATCH] Fix for bug that causes memory overrun and adds more rows than it should --- lib/writer.js | 12 +++++++++--- test/integration.js | 4 +--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/writer.js b/lib/writer.js index f1130e81..3c2828f5 100644 --- a/lib/writer.js +++ b/lib/writer.js @@ -94,8 +94,11 @@ class ParquetWriter { parquet_shredder.shredRecord(this.schema, row, this.rowBuffer); if (this.rowBuffer.rowCount >= this.rowGroupSize) { - await this.envelopeWriter.writeRowGroup(this.rowBuffer); + let to_write = this.rowBuffer; + let rowCount = this.rowBuffer.rowCount; this.rowBuffer = {}; + to_write.rowCount = rowCount; + await this.envelopeWriter.writeRowGroup(to_write); } } @@ -112,9 +115,12 @@ class ParquetWriter { this.closed = true; - if (this.rowBuffer.rowCount > 0 || this.rowBuffer.rowCount >= this.rowGroupSize) { - await this.envelopeWriter.writeRowGroup(this.rowBuffer); + if (this.rowBuffer.rowCount > 0) { + let to_write = this.rowBuffer; + let rowCount = this.rowBuffer.rowCount; this.rowBuffer = {}; + to_write.rowCount = rowCount; + await this.envelopeWriter.writeRowGroup(to_write); } await this.envelopeWriter.writeFooter(this.userMetadata); diff --git a/test/integration.js b/test/integration.js index 5cfb2089..5ed21abd 100644 --- a/test/integration.js +++ b/test/integration.js @@ -105,9 +105,7 @@ async function writeTestFile(opts) { let rows = mkTestRows(opts); - for (let row of rows) { - await writer.appendRow(row); - } + rows.forEach(async row => await writer.appendRow(row)); await writer.close(); }