Skip to content

Commit

Permalink
Sparse global order reader: defer tile deletion until end of merge. (#…
Browse files Browse the repository at this point in the history
…4014)

This fixes an issue in the sparse global order reader where an unused tile might be used after deletion. The problem arises when a fragment consolidated with timestamps has many duplicated cells. If the tile doesn't get used in the merge, it gets deleted when we process the last cell, but the problem is that there still might be other cells from the same tile in the tile queue as cells in the queue are not sorted depending on position.

The fix is to keep track of all the tiles to delete and delete them when the merge is done to prevent any use after free.

Note that this PR doesn't add unit tests for the issue as the randomness makes it very difficult to come up with any cases where this reproduces consistently.

---
TYPE: IMPROVEMENT
DESC: Sparse global order reader: defer tile deletion until end of merge.

(cherry picked from commit e86556f)
  • Loading branch information
KiterLuc authored and ihnorton committed Apr 4, 2023
1 parent bca9f82 commit 432d4c2
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 14 deletions.
38 changes: 26 additions & 12 deletions tiledb/sm/query/readers/sparse_global_order_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,8 @@ template <class CompType>
bool SparseGlobalOrderReader<BitmapType>::add_all_dups_to_queue(
GlobalOrderResultCoords<BitmapType>& rc,
std::vector<TileListIt>& result_tiles_it,
TileMinHeap<CompType>& tile_queue) {
TileMinHeap<CompType>& tile_queue,
std::vector<TileListIt>& to_delete) {
auto frag_idx = rc.tile_->frag_idx();
auto dups = array_schema_.allows_dups();
uint64_t last_cell_pos;
Expand Down Expand Up @@ -785,8 +786,7 @@ bool SparseGlobalOrderReader<BitmapType>::add_all_dups_to_queue(
if (!rc.tile_->used()) {
ignored_tiles_.emplace(
frag_idx, result_tiles_it[frag_idx]->tile_idx());
throw_if_not_ok(
remove_result_tile(frag_idx, result_tiles_it[frag_idx]));
to_delete.emplace_back(result_tiles_it[frag_idx]);
}

result_tiles_it[frag_idx] = next_tile;
Expand All @@ -804,7 +804,8 @@ template <class CompType>
bool SparseGlobalOrderReader<BitmapType>::add_next_cell_to_queue(
GlobalOrderResultCoords<BitmapType>& rc,
std::vector<TileListIt>& result_tiles_it,
TileMinHeap<CompType>& tile_queue) {
TileMinHeap<CompType>& tile_queue,
std::vector<TileListIt>& to_delete) {
auto frag_idx = rc.tile_->frag_idx();
auto dups = array_schema_.allows_dups();

Expand All @@ -818,13 +819,13 @@ bool SparseGlobalOrderReader<BitmapType>::add_next_cell_to_queue(
// Try the next cell in the same tile.
if (!rc.advance_to_next_cell()) {
// Save the potential tile to delete and increment the tile iterator.
auto to_delete = result_tiles_it[frag_idx];
auto to_delete_it = result_tiles_it[frag_idx];
result_tiles_it[frag_idx]++;

// Remove the tile from result tiles if it wasn't used at all.
if (!rc.tile_->used()) {
ignored_tiles_.emplace(frag_idx, to_delete->tile_idx());
throw_if_not_ok(remove_result_tile(frag_idx, to_delete));
ignored_tiles_.emplace(frag_idx, to_delete_it->tile_idx());
to_delete.push_back(to_delete_it);
}

// Try to find a new tile.
Expand Down Expand Up @@ -868,7 +869,7 @@ bool SparseGlobalOrderReader<BitmapType>::add_next_cell_to_queue(
// for purge deletes with no dups mode.
if (purge_deletes_no_dups_mode_ &&
fragment_metadata_[frag_idx]->has_timestamps()) {
if (add_all_dups_to_queue(rc, result_tiles_it, tile_queue)) {
if (add_all_dups_to_queue(rc, result_tiles_it, tile_queue, to_delete)) {
return true;
}
}
Expand Down Expand Up @@ -968,6 +969,7 @@ SparseGlobalOrderReader<BitmapType>::merge_result_cell_slabs(
std::vector<TileListIt> rt_it(result_tiles_.size());

// For all fragments, get the first tile in the sorting queue.
std::vector<TileListIt> to_delete;
auto status = parallel_for(
storage_manager_->compute_tp(), 0, result_tiles_.size(), [&](uint64_t f) {
if (result_tiles_[f].size() > 0) {
Expand All @@ -980,7 +982,7 @@ SparseGlobalOrderReader<BitmapType>::merge_result_cell_slabs(
read_state_.frag_idx_[f].cell_idx_ :
0;
GlobalOrderResultCoords rc(&*(rt_it[f]), cell_idx);
bool res = add_next_cell_to_queue(rc, rt_it, tile_queue);
bool res = add_next_cell_to_queue(rc, rt_it, tile_queue, to_delete);
{
std::unique_lock<std::mutex> ul(tile_queue_mutex_);
need_more_tiles |= res;
Expand Down Expand Up @@ -1051,12 +1053,14 @@ SparseGlobalOrderReader<BitmapType>::merge_result_cell_slabs(
tile_queue.pop();

// Put the next cell from the processed tile in the queue.
need_more_tiles = add_next_cell_to_queue(to_remove, rt_it, tile_queue);
need_more_tiles =
add_next_cell_to_queue(to_remove, rt_it, tile_queue, to_delete);
} else {
update_frag_idx(tile, to_process.pos_ + 1);

// Put the next cell from the processed tile in the queue.
need_more_tiles = add_next_cell_to_queue(to_process, rt_it, tile_queue);
need_more_tiles =
add_next_cell_to_queue(to_process, rt_it, tile_queue, to_delete);

to_process = tile_queue.top();
tile_queue.pop();
Expand Down Expand Up @@ -1135,7 +1139,8 @@ SparseGlobalOrderReader<BitmapType>::merge_result_cell_slabs(
}

// Put the next cell in the queue.
need_more_tiles = add_next_cell_to_queue(to_process, rt_it, tile_queue);
need_more_tiles =
add_next_cell_to_queue(to_process, rt_it, tile_queue, to_delete);
}

buffers_full_ = num_cells == 0;
Expand All @@ -1145,6 +1150,15 @@ SparseGlobalOrderReader<BitmapType>::merge_result_cell_slabs(
result_cell_slabs.size(),
buffers_full_);

// Delete tiles that were marked for deletion. Make one last check on the used
// variable as one duplicate cell might have been merged and changed the
// status.
for (auto& it : to_delete) {
if (!it->used()) {
throw_if_not_ok(remove_result_tile(it->frag_idx(), it));
}
}

return {Status::Ok(), std::move(result_cell_slabs)};
}; // namespace sm

Expand Down
8 changes: 6 additions & 2 deletions tiledb/sm/query/readers/sparse_global_order_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,16 @@ class SparseGlobalOrderReader : public SparseIndexReaderBase,
* @param rc Current result coords for the fragment.
* @param result_tiles_it Iterator, per frag, in the list of retult tiles.
* @param tile_queue Queue of one result coords, per fragment, sorted.
* @param to_delete List of tiles to delete.
*
* @return If more tiles are needed.
*/
template <class CompType>
bool add_all_dups_to_queue(
GlobalOrderResultCoords<BitmapType>& rc,
std::vector<TileListIt>& result_tiles_it,
TileMinHeap<CompType>& tile_queue);
TileMinHeap<CompType>& tile_queue,
std::vector<TileListIt>& to_delete);

/**
* Add a cell (for a specific fragment) to the queue of cells currently being
Expand All @@ -308,14 +310,16 @@ class SparseGlobalOrderReader : public SparseIndexReaderBase,
* @param rc Current result coords for the fragment.
* @param result_tiles_it Iterator, per frag, in the list of retult tiles.
* @param tile_queue Queue of one result coords, per fragment, sorted.
* @param to_delete List of tiles to delete.
*
* @return If more tiles are needed.
*/
template <class CompType>
bool add_next_cell_to_queue(
GlobalOrderResultCoords<BitmapType>& rc,
std::vector<TileListIt>& result_tiles_it,
TileMinHeap<CompType>& tile_queue);
TileMinHeap<CompType>& tile_queue,
std::vector<TileListIt>& to_delete);

/**
* Computes a tile's Hilbert values for a tile.
Expand Down

0 comments on commit 432d4c2

Please sign in to comment.