119 lines
5.6 KiB
C++

#include <libregexis024fa/graph_to_bytecode/core.h>
#include <assert.h>
#include <libregexis024fa/graph_to_bytecode/writing_commands.h>
#include <libregexis024fa/graph_to_bytecode/filter.h>
namespace regexis024 {
#define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; }
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error)
{
bookmark_id_t node_start_bm_offset = bookmark_manager.new_range_of_bookmarks(fa.all.size());
std::vector<size_t> not_yet_dedicated_second_read_ns_ssids;
first_read_ns = 0;
second_read_ns = 0;
fork_ss_ns = 0;
assert(fa.start);
std::vector<FA_Node*> todo = {fa.start};
// std::vector<bool> promised(fa.all.size(), false);
// promised[fa.start->nodeId] = true;
auto nodesBookmark = [&](FA_Node* node) -> bookmark_id_t {
assert(node);
return node_start_bm_offset + node->nodeId;
};
auto addBranching = [&](FA_Node* node) {
todo.push_back(node);
};
auto reading_head = [&](bool is_in_second_ns) {
if (is_in_second_ns) {
cmd_READ_second_ns(result, not_yet_dedicated_second_read_ns_ssids);
second_read_ns++;
} else {
cmd_READ_first_ns(result, first_read_ns++);
}
};
while (!todo.empty()) {
FA_Node* node = todo.back(); todo.pop_back();
if (bookmark_manager.has_landed(nodesBookmark(node))) {
continue;
}
while (true) {
if (bookmark_manager.has_landed(nodesBookmark(node))) {
cmd_JUMP(result, bookmark_manager, nodesBookmark(node));
break;
}
bookmark_manager.land_bookmark(result, nodesBookmark(node));
if (node->type == match) {
cmd_MATCH(result);
cmd_DIE(result);
break;
} else if (node->type == one_char_read) {
FA_NodeOfOneCharRead* ocr = dynamic_cast<FA_NodeOfOneCharRead*>(node);
nonthrowing_assert(first_read_ns + second_read_ns < UINT32_MAX);
reading_head(ocr->second_ns);
write_filter(result, bookmark_manager, {ocr->filter},{nodesBookmark(ocr->nxt_node)});
node = ocr->nxt_node;
} else if (node->type == look_one_behind) {
FA_NodeOfLookOneBehind* lob = dynamic_cast<FA_NodeOfLookOneBehind*>(node);
write_filter(result, bookmark_manager, {lob->filter}, {nodesBookmark(lob->nxt_node)});
node = lob->nxt_node;
} else if (node->type == forking) {
FA_NodeOfForking* fn = dynamic_cast<FA_NodeOfForking*>(node);
std::vector<FA_Node*>& nxt_options = fn->nxt_options;
if (nxt_options.empty()) {
cmd_DIE(result);
break;
}
if (nxt_options.size() >= 2) {
nonthrowing_assert(fork_ss_ns < UINT32_MAX);
sslot_id_t sslot = fork_ss_ns++;
for (size_t i = 0; i + 1 < nxt_options.size(); i++) {
cmd_FORK(result, bookmark_manager, sslot, nodesBookmark(nxt_options[i]));
addBranching(nxt_options[i]);
}
}
node = nxt_options.back();
} else if (node->type == track_array_mov_imm) {
FA_NodeOfTrackArrayMovImm* tami = dynamic_cast<FA_NodeOfTrackArrayMovImm*>(node);
write_byte(result, tami->operation);
write_tai(result, tami->key);
write_quadword(result, tami->imm_value);
node = tami->nxt_node;
} else if (node->type == track_array_mov_halfinvariant) {
FA_NodeOfTrackArrayMovHalfinvariant* tamh = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant *>(node);
write_byte(result, tamh->operation);
write_tai(result, tamh->key);
node = tamh->nxt_node;
} else if (node->type == det_char_crossroads) {
FA_NodeOfDetCharCrossroads* dcc = dynamic_cast<FA_NodeOfDetCharCrossroads*>(node);
nonthrowing_assert(first_read_ns + second_read_ns < UINT32_MAX);
if (dcc->matching)
cmd_MATCH(result);
reading_head(dcc->second_ns);
std::vector<codeset_t> codesets;
std::vector<bookmark_id_t> branches;
for (const DFA_CrossroadPath& p: dcc->crossroads) {
codesets.push_back(p.input);
branches.push_back(nodesBookmark(p.nxt_node));
addBranching(p.nxt_node);
}
write_filter(result, bookmark_manager, codesets, branches);
if (dcc->crossroads.empty())
break;
node = dcc->crossroads[0].nxt_node;
} else
assert(false);
}
}
for (size_t j = 0; j < not_yet_dedicated_second_read_ns_ssids.size(); j++) {
belated_sslot_id(result, not_yet_dedicated_second_read_ns_ssids[j], j + first_read_ns);
}
}
}