-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathedge_buffer.cc
273 lines (256 loc) · 10.7 KB
/
edge_buffer.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#include <vector>
#include <tuple>
#include <sstream>
#include <limits>
#include <algorithm>
#include <stdexcept>
#include "edge_buffer.hpp"
static const auto UMAX = std::numeric_limits<std::size_t>::max();
BirthData::BirthData(double l, double r, tsk_id_t c)
: left{l}, right{r}, child{c}, next{NULL_EDGE_BUFFER_INDEX}
{
if (r <= l)
{
throw std::invalid_argument("BirthData: right <= left");
}
}
EdgeBuffer::EdgeBuffer(std::size_t num_nodes) : first(num_nodes, NULL_EDGE_BUFFER_INDEX), births{}
{
}
ExistingEdges::ExistingEdges(tsk_id_t p, std::size_t start_, std::size_t stop_)
: parent{p}, start{start_}, stop{stop_}
{
}
EDGE_BUFFER_INDEX_TYPE
get_buffer_end(const edge_buffer_ptr& new_edges, std::size_t i)
{
if (i >= new_edges->first.size())
{
throw std::runtime_error("invalid parent index");
}
auto f = new_edges->first[i];
while (f != NULL_EDGE_BUFFER_INDEX && new_edges->births[f].next != NULL_EDGE_BUFFER_INDEX)
{
f = new_edges->births[f].next;
if (f != NULL_EDGE_BUFFER_INDEX && f >= new_edges->births.size())
{
throw std::runtime_error("invalid next value");
}
}
return f;
}
EDGE_BUFFER_INDEX_TYPE
buffer_new_edge(tsk_id_t parent, double left, double right, tsk_id_t child,
edge_buffer_ptr& new_edges)
{
if (parent == TSK_NULL || child == TSK_NULL)
{
throw std::runtime_error("bad node IDs passed to buffer_new_edge");
}
if (parent >= new_edges->first.size())
{
new_edges->first.resize(parent + 1, NULL_EDGE_BUFFER_INDEX);
}
new_edges->births.emplace_back(left, right, child);
if (new_edges->first[parent] == NULL_EDGE_BUFFER_INDEX)
{
new_edges->first[parent] = new_edges->births.size() - 1;
if (new_edges->births[new_edges->first[parent]].next != NULL_EDGE_BUFFER_INDEX)
{
std::ostringstream o;
o << "invalid next entry for first birth: " << parent << ' '
<< new_edges->first[parent] << ' '
<< new_edges->births[new_edges->first[parent]].next << ' '
<< new_edges->first.size() << ' ' << new_edges->births.size();
throw std::runtime_error(o.str());
}
}
else
{
auto l = get_buffer_end(new_edges, parent);
new_edges->births[l].next = new_edges->births.size() - 1;
}
return new_edges->births.size() - 1;
}
EDGE_BUFFER_INDEX_TYPE
buffer_new_edge_at(EDGE_BUFFER_INDEX_TYPE loc, double left, double right, tsk_id_t child,
edge_buffer_ptr& new_edges)
{
if(loc>= new_edges->births.size()) { throw std::runtime_error("bad location"); }
new_edges->births.emplace_back(left, right, child);
new_edges->births[loc].next = new_edges->births.size() - 1;
return new_edges->births.size() - 1;
}
std::vector<ExistingEdges>
find_pre_existing_edges(const table_collection_ptr& tables,
const std::vector<tsk_id_t>& alive_at_last_simplification,
const edge_buffer_ptr& new_edges)
// FIXME: the indexing step need go no farther than the time of the most
// recent node in alive_at_last_simplification.
{
std::vector<tsk_id_t> alive_with_new_edges;
for (auto a : alive_at_last_simplification)
{
if (new_edges->first[a] != NULL_EDGE_BUFFER_INDEX)
{
alive_with_new_edges.push_back(a);
}
}
if (alive_with_new_edges.empty()) // get out early
{
return {};
}
// index where each node already has edges.
std::vector<std::size_t> starts(tables->nodes.num_rows, UMAX),
stops(tables->nodes.num_rows, UMAX);
for (decltype(tables->edges.num_rows) i = 0; i < tables->edges.num_rows; ++i)
{
if (starts[tables->edges.parent[i]] == UMAX)
{
starts[tables->edges.parent[i]] = i;
stops[tables->edges.parent[i]]
= i; // FIXME: idiomatically, this should be i+1
}
else
{
stops[tables->edges.parent[i]]
= i; // FIXME: idiomatically, this should be i+1
}
}
std::vector<ExistingEdges> existing_edges;
for (auto a : alive_with_new_edges)
{
existing_edges.emplace_back(a, starts[a], stops[a]);
}
// Our only sort!!
std::sort(begin(existing_edges), end(existing_edges),
[&tables](const ExistingEdges& lhs, const ExistingEdges& rhs) {
// lexical comparison of tuple elements just like in Python
return std::tie(tables->nodes.time[lhs.parent], lhs.start, lhs.parent)
< std::tie(tables->nodes.time[rhs.parent], rhs.start,
rhs.parent);
});
for (std::size_t i = 1; i < existing_edges.size(); ++i)
{
auto t0 = tables->nodes.time[existing_edges[i - 1].parent];
auto t1 = tables->nodes.time[existing_edges[i].parent];
if (t0 > t1)
{
throw std::runtime_error(
"existing edges not properly sorted by time");
}
}
return existing_edges;
}
auto
handle_pre_existing_edges(const table_collection_ptr& tables,
const edge_buffer_ptr& new_edges,
const std::vector<ExistingEdges>& existing_edges,
temp_edges& edge_liftover) -> decltype(tables->edges.num_rows)
{
decltype(tables->edges.num_rows) offset = 0;
for (const auto& ex : existing_edges)
{
// FIXME: this while loop is repeated 2x just w/different
// ranges
while (offset < tables->edges.num_rows
&& tables->nodes.time[tables->edges.parent[offset]]
< tables->nodes.time[ex.parent])
{
edge_liftover.add_edge(
tables->edges.left[offset], tables->edges.right[offset],
tables->edges.parent[offset], tables->edges.child[offset]);
++offset;
}
if (ex.start != UMAX)
{
while (offset < ex.start
&& tables->nodes.time[tables->edges.parent[offset]]
<= tables->nodes.time[ex.parent])
{
edge_liftover.add_edge(tables->edges.left[offset],
tables->edges.right[offset],
tables->edges.parent[offset],
tables->edges.child[offset]);
++offset;
}
// FIXME: stop condition isn't idiomatic
for (decltype(ex.start) i = ex.start; i < ex.stop + 1; ++i)
{
edge_liftover.add_edge(
tables->edges.left[i], tables->edges.right[i],
tables->edges.parent[i], tables->edges.child[i]);
}
// NOTE: differs from python, so could be a source of error
offset = ex.stop + 1;
}
auto n = new_edges->first[ex.parent];
while (n != NULL_EDGE_BUFFER_INDEX)
{
edge_liftover.add_edge(new_edges->births[n].left,
new_edges->births[n].right, ex.parent,
new_edges->births[n].child);
n = new_edges->births[n].next;
}
}
return offset;
}
void
copy_births_since_last_simplification(const edge_buffer_ptr& new_edges,
const table_collection_ptr& tables,
double max_time, temp_edges& edge_liftover)
{
// TODO: should validate data in new_edges
edge_liftover.clear(); // Re-use this buffer b/c it'll get big.
// Go backwards through new births, and add them
// to our temporary edge table if they are newer
// than the last simplification time
for (auto b = new_edges->first.rbegin(); b < new_edges->first.rend(); ++b)
{
auto d = std::distance(new_edges->first.rbegin(), b);
auto parent = new_edges->first.size() - d - 1;
auto ptime = tables->nodes.time[parent];
if (*b != NULL_EDGE_BUFFER_INDEX && ptime < max_time)
{
auto n = *b;
while (n != NULL_EDGE_BUFFER_INDEX)
{
edge_liftover.add_edge(new_edges->births[n].left,
new_edges->births[n].right, parent,
new_edges->births[n].child);
n = new_edges->births[n].next;
}
}
else if (*b != NULL_EDGE_BUFFER_INDEX and ptime >= max_time)
{
break;
}
}
}
void
stitch_together_edges(const std::vector<tsk_id_t>& alive_at_last_simplification,
double max_time, edge_buffer_ptr& new_edges,
temp_edges& edge_liftover, table_collection_ptr& tables)
{
copy_births_since_last_simplification(new_edges, tables, max_time, edge_liftover);
auto existing_edges
= find_pre_existing_edges(tables, alive_at_last_simplification, new_edges);
auto offset
= handle_pre_existing_edges(tables, new_edges, existing_edges, edge_liftover);
for (; offset < tables->edges.num_rows; ++offset)
{
edge_liftover.add_edge(
tables->edges.left[offset], tables->edges.right[offset],
tables->edges.parent[offset], tables->edges.child[offset]);
}
int ret = tsk_edge_table_set_columns(
&tables->edges, edge_liftover.size(), edge_liftover.left.data(),
edge_liftover.right.data(), edge_liftover.parent.data(),
edge_liftover.child.data(), nullptr, 0);
// This resets sizes to 0, but keeps the memory allocated.
edge_liftover.clear();
// TODO: move this cleanup to function
new_edges->first.resize(tables->nodes.num_rows);
std::fill(begin(new_edges->first), end(new_edges->first), NULL_EDGE_BUFFER_INDEX);
new_edges->births.clear();
}