118 lines
5.2 KiB
C++
118 lines
5.2 KiB
C++
|
#include <libregexis024fa/graph_to_bytecode/core.h>
|
||
|
|
||
|
#include <assert.h>
|
||
|
#include <libregexis024fa/graph_to_bytecode/writing_commands.h>
|
||
|
|
||
|
#include <libregexis024fa/graph_to_bytecode/filter.h>
|
||
|
|
||
|
#define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; }
|
||
|
|
||
|
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
|
||
|
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error)
|
||
|
{
|
||
|
bookmark_id_t node_start_bm_offset = bookmark_manager.new_range_of_bookmarks(fa.all.size());
|
||
|
std::vector<size_t> not_yet_dedicated_second_read_ns_ssids;
|
||
|
first_read_ns = 0;
|
||
|
second_read_ns = 0;
|
||
|
fork_ss_ns = 0;
|
||
|
assert(fa.start);
|
||
|
std::vector<FA_Node*> todo = {fa.start};
|
||
|
// std::vector<bool> promised(fa.all.size(), false);
|
||
|
// promised[fa.start->nodeId] = true;
|
||
|
|
||
|
auto nodesBookmark = [&](FA_Node* node) -> bookmark_id_t {
|
||
|
assert(node);
|
||
|
return node_start_bm_offset + node->nodeId;
|
||
|
};
|
||
|
|
||
|
auto addBranching = [&](FA_Node* node) {
|
||
|
todo.push_back(node);
|
||
|
};
|
||
|
|
||
|
auto reading_head = [&](bool is_in_second_ns) {
|
||
|
if (is_in_second_ns) {
|
||
|
cmd_READ_second_ns(result, not_yet_dedicated_second_read_ns_ssids);
|
||
|
second_read_ns++;
|
||
|
} else {
|
||
|
cmd_READ_first_ns(result, first_read_ns++);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
while (!todo.empty()) {
|
||
|
FA_Node* node = todo.back(); todo.pop_back();
|
||
|
if (bookmark_manager.has_landed(nodesBookmark(node))) {
|
||
|
continue;
|
||
|
}
|
||
|
while (true) {
|
||
|
if (bookmark_manager.has_landed(nodesBookmark(node))) {
|
||
|
cmd_JUMP(result, bookmark_manager, nodesBookmark(node));
|
||
|
break;
|
||
|
}
|
||
|
bookmark_manager.land_bookmark(result, nodesBookmark(node));
|
||
|
if (node->type == match) {
|
||
|
cmd_MATCH(result);
|
||
|
cmd_DIE(result);
|
||
|
break;
|
||
|
} else if (node->type == one_char_read) {
|
||
|
FA_NodeOfOneCharRead* ocr = dynamic_cast<FA_NodeOfOneCharRead*>(node);
|
||
|
nonthrowing_assert(first_read_ns + second_read_ns < UINT32_MAX);
|
||
|
reading_head(ocr->second_ns);
|
||
|
write_filter(result, bookmark_manager, {ocr->filter},{nodesBookmark(ocr->nxt_node)});
|
||
|
node = ocr->nxt_node;
|
||
|
} else if (node->type == look_one_behind) {
|
||
|
FA_NodeOfLookOneBehind* lob = dynamic_cast<FA_NodeOfLookOneBehind*>(node);
|
||
|
write_filter(result, bookmark_manager, {lob->filter}, {nodesBookmark(lob->nxt_node)});
|
||
|
node = lob->nxt_node;
|
||
|
} else if (node->type == forking) {
|
||
|
FA_NodeOfForking* fn = dynamic_cast<FA_NodeOfForking*>(node);
|
||
|
std::vector<FA_Node*>& nxt_options = fn->nxt_options;
|
||
|
if (nxt_options.empty()) {
|
||
|
cmd_DIE(result);
|
||
|
break;
|
||
|
}
|
||
|
if (nxt_options.size() >= 2) {
|
||
|
nonthrowing_assert(fork_ss_ns < UINT32_MAX);
|
||
|
regex_sslot_id_t sslot = fork_ss_ns++;
|
||
|
for (size_t i = 0; i + 1 < nxt_options.size(); i++) {
|
||
|
cmd_FORK(result, bookmark_manager, sslot, nodesBookmark(nxt_options[i]));
|
||
|
addBranching(nxt_options[i]);
|
||
|
}
|
||
|
}
|
||
|
node = nxt_options.back();
|
||
|
} else if (node->type == track_array_mov_imm) {
|
||
|
FA_NodeOfTrackArrayMovImm* tami = dynamic_cast<FA_NodeOfTrackArrayMovImm*>(node);
|
||
|
write_byte(result, tami->operation);
|
||
|
write_tai(result, tami->key);
|
||
|
write_quadword(result, tami->imm_value);
|
||
|
node = tami->nxt_node;
|
||
|
} else if (node->type == track_array_mov_halfinvariant) {
|
||
|
FA_NodeOfTrackArrayMovHalfinvariant* tamh = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant *>(node);
|
||
|
write_byte(result, tamh->operation);
|
||
|
write_tai(result, tamh->key);
|
||
|
node = tamh->nxt_node;
|
||
|
} else if (node->type == det_char_crossroads) {
|
||
|
FA_NodeOfDetCharCrossroads* dcc = dynamic_cast<FA_NodeOfDetCharCrossroads*>(node);
|
||
|
nonthrowing_assert(first_read_ns + second_read_ns < UINT32_MAX);
|
||
|
if (dcc->matching)
|
||
|
cmd_MATCH(result);
|
||
|
reading_head(dcc->second_ns);
|
||
|
std::vector<codeset_t> codesets;
|
||
|
std::vector<bookmark_id_t> branches;
|
||
|
for (const DFA_CrossroadPath& p: dcc->crossroads) {
|
||
|
codesets.push_back(p.input);
|
||
|
branches.push_back(nodesBookmark(p.nxt_node));
|
||
|
addBranching(p.nxt_node);
|
||
|
}
|
||
|
write_filter(result, bookmark_manager, codesets, branches);
|
||
|
if (dcc->crossroads.empty())
|
||
|
break;
|
||
|
node = dcc->crossroads[0].nxt_node;
|
||
|
} else
|
||
|
assert(false);
|
||
|
}
|
||
|
}
|
||
|
for (size_t j = 0; j < not_yet_dedicated_second_read_ns_ssids.size(); j++) {
|
||
|
belated_sslot_id(result, not_yet_dedicated_second_read_ns_ssids[j], j + first_read_ns);
|
||
|
}
|
||
|
}
|