Fixed vm bugs, refactored the entire broject to use regexis024 namespace, removed some junk, added tests
This commit is contained in:
parent
b11afa72ea
commit
76f3742521
@ -72,7 +72,6 @@ struct Libregexis024BuildSystem {
|
|||||||
"libregexis024fa/graph_to_bytecode/core.cpp",
|
"libregexis024fa/graph_to_bytecode/core.cpp",
|
||||||
|
|
||||||
"libregexis024sol/common_codesets.cpp",
|
"libregexis024sol/common_codesets.cpp",
|
||||||
"libregexis024sol/part_of_expr_that_tracks.cpp",
|
|
||||||
"libregexis024sol/expr_compiler.cpp",
|
"libregexis024sol/expr_compiler.cpp",
|
||||||
"libregexis024sol/square_bracket_expression.cpp",
|
"libregexis024sol/square_bracket_expression.cpp",
|
||||||
"libregexis024sol/sol_misc_base.cpp",
|
"libregexis024sol/sol_misc_base.cpp",
|
||||||
|
@ -10,19 +10,20 @@
|
|||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
#include <libregexis024fa/tracking_fa_nodes.h>
|
#include <libregexis024fa/tracking_fa_nodes.h>
|
||||||
|
|
||||||
const char* one_char_read_color = "black";
|
namespace regexis024 {
|
||||||
const char* forking_color = "darkorchid1";
|
const char* one_char_read_color = "black";
|
||||||
const char* look_one_behind_color = "darkslateblue";
|
const char* forking_color = "darkorchid1";
|
||||||
const char* look_one_ahead_color = "coral1";
|
const char* look_one_behind_color = "darkslateblue";
|
||||||
const char* track_array_mov_imm_color = "lightblue2";
|
const char* look_one_ahead_color = "coral1";
|
||||||
const char* track_array_mov_halfinvariant_color = "lightseagreen";
|
const char* track_array_mov_imm_color = "lightblue2";
|
||||||
const char* match_pending_lob_color = "darkgoldenrod2";
|
const char* track_array_mov_halfinvariant_color = "lightseagreen";
|
||||||
const char* match_color = "gold";
|
const char* match_pending_lob_color = "darkgoldenrod2";
|
||||||
const char* det_char_crossroads_color = "navy";
|
const char* match_color = "gold";
|
||||||
const char* error_color = "crimson";
|
const char* det_char_crossroads_color = "navy";
|
||||||
const char* STAR = "★";
|
const char* error_color = "crimson";
|
||||||
|
const char* STAR = "★";
|
||||||
|
|
||||||
const char* get_associated_color(FA_Node* node){
|
const char* get_associated_color(FA_Node* node){
|
||||||
switch (node->type) {
|
switch (node->type) {
|
||||||
#define ccase(tn) case tn: return tn##_color;
|
#define ccase(tn) case tn: return tn##_color;
|
||||||
ccase(one_char_read)
|
ccase(one_char_read)
|
||||||
@ -38,20 +39,20 @@ const char* get_associated_color(FA_Node* node){
|
|||||||
return "black";
|
return "black";
|
||||||
#undef ccase
|
#undef ccase
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct NodesProblems{
|
struct NodesProblems{
|
||||||
size_t actual_refcount = 0;
|
size_t actual_refcount = 0;
|
||||||
bool refcount_problem = false;
|
bool refcount_problem = false;
|
||||||
size_t edges_point_to_null = 0;
|
size_t edges_point_to_null = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct EdgesProblems {
|
struct EdgesProblems {
|
||||||
bool points_to_null = false;
|
bool points_to_null = false;
|
||||||
explicit EdgesProblems(bool points_to_null): points_to_null(points_to_null) {}
|
explicit EdgesProblems(bool points_to_null): points_to_null(points_to_null) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string get_applied_edge_attributes(FA_Node* node, const NodesProblems& np, const EdgesProblems& ep){
|
std::string get_applied_edge_attributes(FA_Node* node, const NodesProblems& np, const EdgesProblems& ep){
|
||||||
std::string res = "color=";
|
std::string res = "color=";
|
||||||
if (ep.points_to_null) {
|
if (ep.points_to_null) {
|
||||||
res += error_color;
|
res += error_color;
|
||||||
@ -61,9 +62,9 @@ std::string get_applied_edge_attributes(FA_Node* node, const NodesProblems& np,
|
|||||||
res += " style=bold";
|
res += " style=bold";
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string get_applied_node_attributes(FA_Node* node, const NodesProblems& bd){
|
std::string get_applied_node_attributes(FA_Node* node, const NodesProblems& bd){
|
||||||
std::string res = "color=";
|
std::string res = "color=";
|
||||||
res += get_associated_color(node);
|
res += get_associated_color(node);
|
||||||
if (bd.refcount_problem)
|
if (bd.refcount_problem)
|
||||||
@ -72,9 +73,9 @@ std::string get_applied_node_attributes(FA_Node* node, const NodesProblems& bd){
|
|||||||
(node->type == det_char_crossroads && dynamic_cast<FA_NodeOfDetCharCrossroads*>(node)->matching))
|
(node->type == det_char_crossroads && dynamic_cast<FA_NodeOfDetCharCrossroads*>(node)->matching))
|
||||||
res += " shape=doublecircle";
|
res += " shape=doublecircle";
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void append_reverse_hex(std::string& res, uint32_t num){
|
void append_reverse_hex(std::string& res, uint32_t num){
|
||||||
if (num == 0){
|
if (num == 0){
|
||||||
res += "0";
|
res += "0";
|
||||||
} else {
|
} else {
|
||||||
@ -84,9 +85,9 @@ void append_reverse_hex(std::string& res, uint32_t num){
|
|||||||
num >>= 4;
|
num >>= 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string stringify_codeset(const codeset_t& cs){
|
std::string stringify_codeset(const codeset_t& cs){
|
||||||
std::string res;
|
std::string res;
|
||||||
for (long i = static_cast<long>(cs.size()) - 1; i >= 0; i--) {
|
for (long i = static_cast<long>(cs.size()) - 1; i >= 0; i--) {
|
||||||
uint64_t start = cs[i].first, end = cs[i].second;
|
uint64_t start = cs[i].first, end = cs[i].second;
|
||||||
@ -102,9 +103,9 @@ std::string stringify_codeset(const codeset_t& cs){
|
|||||||
}
|
}
|
||||||
std::reverse(res.begin(), res.end()); /* ascii works wonders */
|
std::reverse(res.begin(), res.end()); /* ascii works wonders */
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string get_extended_node_lable(FA_Node* node){
|
std::string get_extended_node_lable(FA_Node* node){
|
||||||
if ((node->type == one_char_read && dynamic_cast<FA_NodeOfOneCharRead*>(node)->second_ns) ||
|
if ((node->type == one_char_read && dynamic_cast<FA_NodeOfOneCharRead*>(node)->second_ns) ||
|
||||||
(node->type == det_char_crossroads && dynamic_cast<FA_NodeOfDetCharCrossroads*>(node)->second_ns)) {
|
(node->type == det_char_crossroads && dynamic_cast<FA_NodeOfDetCharCrossroads*>(node)->second_ns)) {
|
||||||
return std::string(" ") + STAR;
|
return std::string(" ") + STAR;
|
||||||
@ -115,9 +116,9 @@ std::string get_extended_node_lable(FA_Node* node){
|
|||||||
return std::string(" pending loa ") + stringify_codeset(mn->pending_filter);
|
return std::string(" pending loa ") + stringify_codeset(mn->pending_filter);
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string get_node_lable(FA_Node* node, const NodesProblems& bd){
|
std::string get_node_lable(FA_Node* node, const NodesProblems& bd){
|
||||||
std::string res;
|
std::string res;
|
||||||
switch (node->type) {
|
switch (node->type) {
|
||||||
#define tcase(tn, str) case tn: res = str; break;
|
#define tcase(tn, str) case tn: res = str; break;
|
||||||
@ -135,9 +136,9 @@ std::string get_node_lable(FA_Node* node, const NodesProblems& bd){
|
|||||||
if (bd.refcount_problem)
|
if (bd.refcount_problem)
|
||||||
res += ("!refcount: " + std::to_string(node->refs) + "!");
|
res += ("!refcount: " + std::to_string(node->refs) + "!");
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_edge(FA_Node* start, const FA_Node* dest, const std::string& label, FILE* fd, NodesProblems& np){
|
void print_edge(FA_Node* start, const FA_Node* dest, const std::string& label, FILE* fd, NodesProblems& np){
|
||||||
if (!dest){
|
if (!dest){
|
||||||
fprintf(stderr, "NULL transition going from node %lu\n", start->nodeId);
|
fprintf(stderr, "NULL transition going from node %lu\n", start->nodeId);
|
||||||
fprintf(fd, "%lu->NULL_%lu_%lu [label=\"%s\" color=crimson]", start->nodeId,
|
fprintf(fd, "%lu->NULL_%lu_%lu [label=\"%s\" color=crimson]", start->nodeId,
|
||||||
@ -146,9 +147,9 @@ void print_edge(FA_Node* start, const FA_Node* dest, const std::string& label, F
|
|||||||
}
|
}
|
||||||
fprintf(fd, "%lu->%lu [label=\"%s\" %s]\n", start->nodeId, dest->nodeId, label.c_str(),
|
fprintf(fd, "%lu->%lu [label=\"%s\" %s]\n", start->nodeId, dest->nodeId, label.c_str(),
|
||||||
get_applied_edge_attributes(start, np, EdgesProblems(false)).c_str());
|
get_applied_edge_attributes(start, np, EdgesProblems(false)).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
|
void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
|
||||||
const RegexPriorityTable& priority_table){
|
const RegexPriorityTable& priority_table){
|
||||||
assert(fa.start);
|
assert(fa.start);
|
||||||
assert(fd);
|
assert(fd);
|
||||||
@ -184,7 +185,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
|
|||||||
|
|
||||||
/* Two Infoboxes */
|
/* Two Infoboxes */
|
||||||
|
|
||||||
auto stringifyTrackingVarType = [](tracking_var_type type) -> std::string {
|
auto stringifyTrackingVarType = [](tracking_var_type_t type) -> std::string {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case tracking_var_types::range:
|
case tracking_var_types::range:
|
||||||
return "range";
|
return "range";
|
||||||
@ -199,7 +200,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
|
|||||||
for (auto& p: ktr.track_names){
|
for (auto& p: ktr.track_names){
|
||||||
const SubtrackingNameInfo& tu = ktr.retrieval_info[p.second];
|
const SubtrackingNameInfo& tu = ktr.retrieval_info[p.second];
|
||||||
|
|
||||||
auto getRole = [](bool presence, tracking_var_type type, int first, int second,
|
auto getRole = [](bool presence, tracking_var_type_t type, int first, int second,
|
||||||
const std::string& ARR_NAME) -> std::string {
|
const std::string& ARR_NAME) -> std::string {
|
||||||
if (!presence) {
|
if (!presence) {
|
||||||
assert(first == -1 && second == -1);
|
assert(first == -1 && second == -1);
|
||||||
@ -264,7 +265,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
|
|||||||
if (!isImmMovOpcode(cn->operation))
|
if (!isImmMovOpcode(cn->operation))
|
||||||
fprintf(stderr, "bad operation in node %lu\n", node->nodeId);
|
fprintf(stderr, "bad operation in node %lu\n", node->nodeId);
|
||||||
snprintf(buf, 1024, "%s %hu %lu",
|
snprintf(buf, 1024, "%s %hu %lu",
|
||||||
regex024_opcode_tostr(cn->operation), cn->key, cn->imm_value);
|
opcode_to_str(cn->operation), cn->key, cn->imm_value);
|
||||||
print_edge(node, cn->nxt_node,std::string(buf), fd, bd);
|
print_edge(node, cn->nxt_node,std::string(buf), fd, bd);
|
||||||
} else if (node->type == track_array_mov_halfinvariant){
|
} else if (node->type == track_array_mov_halfinvariant){
|
||||||
FA_NodeOfTrackArrayMovHalfinvariant* cn = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant *>(node);
|
FA_NodeOfTrackArrayMovHalfinvariant* cn = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant *>(node);
|
||||||
@ -272,7 +273,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
|
|||||||
if (!isCurPosMovOpcode(cn->operation))
|
if (!isCurPosMovOpcode(cn->operation))
|
||||||
fprintf(stderr, "bad operation in node %lu\n", node->nodeId);
|
fprintf(stderr, "bad operation in node %lu\n", node->nodeId);
|
||||||
snprintf(buf, 1024, "%s %hu",
|
snprintf(buf, 1024, "%s %hu",
|
||||||
regex024_opcode_tostr(cn->operation), cn->key);
|
opcode_to_str(cn->operation), cn->key);
|
||||||
print_edge(node, cn->nxt_node,std::string(buf), fd, bd);
|
print_edge(node, cn->nxt_node,std::string(buf), fd, bd);
|
||||||
} else if (node->type == det_char_crossroads){
|
} else if (node->type == det_char_crossroads){
|
||||||
FA_NodeOfDetCharCrossroads* cn = dynamic_cast<FA_NodeOfDetCharCrossroads *>(node);
|
FA_NodeOfDetCharCrossroads* cn = dynamic_cast<FA_NodeOfDetCharCrossroads *>(node);
|
||||||
@ -284,9 +285,9 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
fprintf(fd, "}\n");
|
fprintf(fd, "}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
FILE* get_fd(const char* apath){
|
FILE* get_fd(const char* apath){
|
||||||
errno = 0;
|
errno = 0;
|
||||||
FILE *fd = fopen(apath, "w");
|
FILE *fd = fopen(apath, "w");
|
||||||
if (!fd)
|
if (!fd)
|
||||||
@ -297,9 +298,9 @@ FILE* get_fd(const char* apath){
|
|||||||
if (!fd)
|
if (!fd)
|
||||||
perror("fopen a");
|
perror("fopen a");
|
||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTools& ktr,
|
void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTools& ktr,
|
||||||
const RegexPriorityTable& priority_table) {
|
const RegexPriorityTable& priority_table) {
|
||||||
const char* temp_gv = "FAGraph.gv";
|
const char* temp_gv = "FAGraph.gv";
|
||||||
const char* temp_png = "FAGraph.png";
|
const char* temp_png = "FAGraph.png";
|
||||||
@ -322,4 +323,5 @@ void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTool
|
|||||||
assert(chw >= 0);
|
assert(chw >= 0);
|
||||||
unlink(temp_gv);
|
unlink(temp_gv);
|
||||||
unlink(temp_png);
|
unlink(temp_png);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,8 +5,10 @@
|
|||||||
#include <libregexis024sol/part_of_expr_that_tracks.h>
|
#include <libregexis024sol/part_of_expr_that_tracks.h>
|
||||||
#include <libregexis024fa/selarr_priority_table.h>
|
#include <libregexis024fa/selarr_priority_table.h>
|
||||||
|
|
||||||
/* Uses temporary file FAGraph.gv,png, dot command and sxiv */
|
namespace regexis024 {
|
||||||
void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTools& ktr,
|
/* Uses temporary file FAGraph.gv,png, dot command and sxiv */
|
||||||
|
void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTools& ktr,
|
||||||
const RegexPriorityTable& priority_table);
|
const RegexPriorityTable& priority_table);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,40 +2,38 @@
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
#include <libregexis024vm/utils.h>
|
#include <libregexis024vm/utils.h>
|
||||||
|
|
||||||
TreeWithStringsNode::TreeWithStringsNode(const std::string &val): val(val) {
|
namespace regexis024 {
|
||||||
}
|
static const char* ch_empty = " ";
|
||||||
|
static const char* ch_passing_by = "\u2502 ";
|
||||||
|
static const char* ch_connect_right_and_forward = "\u251c\u2500\u2500\u2500";
|
||||||
|
static const char* ch_connect_right_last = "\u2514\u2500\u2500\u2500";
|
||||||
|
|
||||||
static const char* ch_empty = " ";
|
static const char* ch_box_left_side = "\u2551";
|
||||||
static const char* ch_passing_by = "\u2502 ";
|
static const char* ch_box_right_side = "\u2551";
|
||||||
static const char* ch_connect_right_and_forward = "\u251c\u2500\u2500\u2500";
|
static const char* ch_box_top_side = "\u2550";
|
||||||
static const char* ch_connect_right_last = "\u2514\u2500\u2500\u2500";
|
static const char* ch_box_bottom_side = "\u2550";
|
||||||
|
static const char* ch_box_crn_top_left = "\u2554";
|
||||||
|
static const char* ch_box_crn_top_right = "\u2557";
|
||||||
|
static const char* ch_box_crn_bottom_left = "\u255A";
|
||||||
|
static const char* ch_box_crn_bottom_right = "\u255D";
|
||||||
|
|
||||||
static const char* ch_box_left_side = "\u2551";
|
size_t length_of_line(const std::string& str) {
|
||||||
static const char* ch_box_right_side = "\u2551";
|
|
||||||
static const char* ch_box_top_side = "\u2550";
|
|
||||||
static const char* ch_box_bottom_side = "\u2550";
|
|
||||||
static const char* ch_box_crn_top_left = "\u2554";
|
|
||||||
static const char* ch_box_crn_top_right = "\u2557";
|
|
||||||
static const char* ch_box_crn_bottom_left = "\u255A";
|
|
||||||
static const char* ch_box_crn_bottom_right = "\u255D";
|
|
||||||
|
|
||||||
size_t length_of_line(const std::string& str) {
|
|
||||||
size_t ch = 0;
|
size_t ch = 0;
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
while (pos < str.size()) {
|
while (pos < str.size()) {
|
||||||
int32_t code;
|
int32_t code;
|
||||||
size_t adj;
|
size_t adj;
|
||||||
utf8_string_iterat(code, adj, pos, reinterpret_cast<const uint8_t*>(str.data()), str.size());
|
utf8_string_iterat(code, adj, pos, str.data(), str.size());
|
||||||
if (code < 0)
|
if (code < 0)
|
||||||
return ch;
|
return ch;
|
||||||
ch++;
|
ch++;
|
||||||
pos += adj;
|
pos += adj;
|
||||||
}
|
}
|
||||||
return ch;
|
return ch;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Warning: recursion used */
|
/* Warning: recursion used */
|
||||||
void toLines_dfs(const TreeWithStringsNode& node, lines& out, std::vector<bool>& prefix) {
|
void toLines_dfs(const TreeWithStringsNode& node, lines& out, std::vector<bool>& prefix) {
|
||||||
out.push_back("");
|
out.push_back("");
|
||||||
size_t n = prefix.size();
|
size_t n = prefix.size();
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
@ -54,21 +52,21 @@ void toLines_dfs(const TreeWithStringsNode& node, lines& out, std::vector<bool>&
|
|||||||
toLines_dfs(node.childeren[i], out, prefix);
|
toLines_dfs(node.childeren[i], out, prefix);
|
||||||
}
|
}
|
||||||
prefix.pop_back();
|
prefix.pop_back();
|
||||||
}
|
}
|
||||||
|
|
||||||
void TreeWithStringsNode::toLines(lines &out) const {
|
void TreeWithStringsNode::toLines(lines &out) const {
|
||||||
std::vector<bool> prefix;
|
std::vector<bool> prefix;
|
||||||
toLines_dfs(*this, out, prefix);
|
toLines_dfs(*this, out, prefix);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string strMul(size_t n, const char* str) {
|
std::string strMul(size_t n, const char* str) {
|
||||||
std::string res;
|
std::string res;
|
||||||
for (size_t i = 0; i < n; i++)
|
for (size_t i = 0; i < n; i++)
|
||||||
res += str;
|
res += str;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
lines wrapWithBox(const lines &in) {
|
lines wrapWithBox(const lines &in) {
|
||||||
lines out;
|
lines out;
|
||||||
size_t max_width = 0;
|
size_t max_width = 0;
|
||||||
for (auto& l: in)
|
for (auto& l: in)
|
||||||
@ -80,10 +78,10 @@ lines wrapWithBox(const lines &in) {
|
|||||||
}
|
}
|
||||||
out.push_back(ch_box_crn_bottom_left + strMul(max_width, ch_box_bottom_side) + ch_box_crn_bottom_right);
|
out.push_back(ch_box_crn_bottom_left + strMul(max_width, ch_box_bottom_side) + ch_box_crn_bottom_right);
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
void printLines(const lines &in) {
|
void printLines(const lines &in) {
|
||||||
for (auto& l: in)
|
for (auto& l: in)
|
||||||
printf("%s\n", l.c_str());
|
printf("%s\n", l.c_str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,20 +6,19 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
typedef std::vector<std::string> lines;
|
namespace regexis024 {
|
||||||
|
typedef std::vector<std::string> lines;
|
||||||
|
|
||||||
struct TreeWithStringsNode {
|
struct TreeWithStringsNode {
|
||||||
std::string val;
|
std::string val;
|
||||||
std::vector<TreeWithStringsNode> childeren;
|
std::vector<TreeWithStringsNode> childeren;
|
||||||
|
|
||||||
explicit TreeWithStringsNode(const std::string &val);
|
|
||||||
TreeWithStringsNode() = default;
|
|
||||||
|
|
||||||
void toLines(lines& out) const;
|
void toLines(lines& out) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
lines wrapWithBox(const lines& in);
|
lines wrapWithBox(const lines& in);
|
||||||
|
|
||||||
void printLines(const lines& in);
|
void printLines(const lines& in);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,15 +2,16 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
std::string thread_to_str(const REGEX_IS024_Thread& thread){
|
namespace regexis024 {
|
||||||
|
std::string thread_to_str(const Thread& thread){
|
||||||
if (!(thread.slot_occupation_status & SLOT_OCCUPIED))
|
if (!(thread.slot_occupation_status & SLOT_OCCUPIED))
|
||||||
return "{ unoccupied }";
|
return "{ unoccupied }";
|
||||||
char buf[1024];
|
char buf[1024];
|
||||||
snprintf(buf, 1024, "{ IP = %lu }", thread.IP);
|
snprintf(buf, 1024, "{ IP = %lu }", thread.IP);
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string stack_to_str(const REGEX_IS024_Stack& stack){
|
std::string stack_to_str(const SSID_Stack& stack){
|
||||||
std::string res = "{ ";
|
std::string res = "{ ";
|
||||||
for (uint32_t i = 0; i < stack.sz; i++){
|
for (uint32_t i = 0; i < stack.sz; i++){
|
||||||
if (i != 0)
|
if (i != 0)
|
||||||
@ -19,9 +20,9 @@ std::string stack_to_str(const REGEX_IS024_Stack& stack){
|
|||||||
}
|
}
|
||||||
res += " }";
|
res += " }";
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string slots_to_str(const REGEX_IS024_CONTEXT& ctx){
|
std::string slots_to_str(const VMContext& ctx){
|
||||||
if (!ctx.initialized)
|
if (!ctx.initialized)
|
||||||
return "uninitialized";
|
return "uninitialized";
|
||||||
std::string READ_slots;
|
std::string READ_slots;
|
||||||
@ -41,18 +42,19 @@ std::string slots_to_str(const REGEX_IS024_CONTEXT& ctx){
|
|||||||
stack_to_str(ctx.READ_halted_stack_new_second).c_str(),
|
stack_to_str(ctx.READ_halted_stack_new_second).c_str(),
|
||||||
stack_to_str(ctx.READ_halted_stack_old).c_str(), stack_to_str(ctx.FORK_halted_stack).c_str());
|
stack_to_str(ctx.READ_halted_stack_old).c_str(), stack_to_str(ctx.FORK_halted_stack).c_str());
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
void debug_print_context(const REGEX_IS024_CONTEXT& ctx, const char* place) {
|
void debug_print_context(const VMContext& ctx, const char* place) {
|
||||||
printf("== DEBUG `%s` ==\n", place);
|
printf("== DEBUG `%s` ==\n", place);
|
||||||
|
|
||||||
printf("Active thread: %s, sifting_with: %s, match: %s\n%s\n",
|
printf("Active thread: %s, sifting_with: %s, match: %s\n%s\n",
|
||||||
thread_to_str(ctx.active_thread).c_str(),
|
thread_to_str(ctx.active_thread).c_str(),
|
||||||
ctx.sifting_with ? thread_to_str(*ctx.sifting_with).c_str() : "NO", thread_to_str(ctx.matched_thread).c_str(),
|
ctx.sifting_with ? thread_to_str(*ctx.sifting_with).c_str() : "NO", thread_to_str(ctx.matched_thread).c_str(),
|
||||||
slots_to_str(ctx).c_str());
|
slots_to_str(ctx).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void debug_print_thread(const REGEX_IS024_Thread& thr, const char *place) {
|
void debug_print_thread(const Thread& thr, const char *place) {
|
||||||
printf("== DEBUG `%s` ==\n", place);
|
printf("== DEBUG `%s` ==\n", place);
|
||||||
printf("This thread: %s\n", thread_to_str(thr).c_str());
|
printf("This thread: %s\n", thread_to_str(thr).c_str());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,10 @@
|
|||||||
#include <libregexis024vm/libregexis024vm.h>
|
#include <libregexis024vm/libregexis024vm.h>
|
||||||
#include <libregexis024vm/instruction_implementation.h>
|
#include <libregexis024vm/instruction_implementation.h>
|
||||||
|
|
||||||
void debug_print_context(const REGEX_IS024_CONTEXT& ctx, const char* place);
|
namespace regexis024 {
|
||||||
|
void debug_print_context(const VMContext& ctx, const char* place);
|
||||||
|
|
||||||
void debug_print_thread(const REGEX_IS024_Thread& thr, const char *place);
|
void debug_print_thread(const Thread& thr, const char *place);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
#include <libregexis024fa/codeset.h>
|
#include <libregexis024fa/codeset.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
codeset_t invert_set(const codeset_t &X) {
|
namespace regexis024 {
|
||||||
|
codeset_t invert_set(const codeset_t &X) {
|
||||||
if (X.empty())
|
if (X.empty())
|
||||||
return {{0, UINT32_MAX}};
|
return {{0, UINT32_MAX}};
|
||||||
codeset_t res;
|
codeset_t res;
|
||||||
@ -13,7 +14,7 @@ codeset_t invert_set(const codeset_t &X) {
|
|||||||
if (X.back().second != UINT32_MAX)
|
if (X.back().second != UINT32_MAX)
|
||||||
res.emplace_back(X.back().second + 1, UINT32_MAX);
|
res.emplace_back(X.back().second + 1, UINT32_MAX);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define elA (A[i])
|
#define elA (A[i])
|
||||||
#define elB (B[j])
|
#define elB (B[j])
|
||||||
@ -23,7 +24,7 @@ codeset_t invert_set(const codeset_t &X) {
|
|||||||
#define Aended (i == An)
|
#define Aended (i == An)
|
||||||
#define Bended (j == Bn)
|
#define Bended (j == Bn)
|
||||||
|
|
||||||
codeset_t merge_sets(const codeset_t &A, const codeset_t &B) {
|
codeset_t merge_sets(const codeset_t &A, const codeset_t &B) {
|
||||||
codeset_t res;
|
codeset_t res;
|
||||||
prepare
|
prepare
|
||||||
std::pair<uint32_t, uint32_t> cur;
|
std::pair<uint32_t, uint32_t> cur;
|
||||||
@ -63,9 +64,9 @@ codeset_t merge_sets(const codeset_t &A, const codeset_t &B) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
codeset_t intersect_sets(const codeset_t &A, const codeset_t &B) {
|
codeset_t intersect_sets(const codeset_t &A, const codeset_t &B) {
|
||||||
codeset_t res;
|
codeset_t res;
|
||||||
prepare
|
prepare
|
||||||
while (true){
|
while (true){
|
||||||
@ -82,34 +83,34 @@ codeset_t intersect_sets(const codeset_t &A, const codeset_t &B) {
|
|||||||
Binc;
|
Binc;
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
codeset_t subtract_sets(const codeset_t &A, const codeset_t &B) {
|
codeset_t subtract_sets(const codeset_t &A, const codeset_t &B) {
|
||||||
return intersect_sets(A, invert_set(B));
|
return intersect_sets(A, invert_set(B));
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_inside(uint32_t start, uint32_t end, codeset_t &X) {
|
bool is_inside(uint32_t start, uint32_t end, codeset_t &X) {
|
||||||
for (auto& p: X){
|
for (auto& p: X){
|
||||||
if (p.first <= start && end <= p.second)
|
if (p.first <= start && end <= p.second)
|
||||||
return true;
|
return true;
|
||||||
assert(end < p.first || p.second < start);
|
assert(end < p.first || p.second < start);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
codeset_t set_add_char(const codeset_t& X, uint32_t cp) {
|
codeset_t set_add_char(const codeset_t& X, uint32_t cp) {
|
||||||
return merge_sets(X, {{cp, cp}});
|
return merge_sets(X, {{cp, cp}});
|
||||||
}
|
}
|
||||||
|
|
||||||
codeset_t set_add_range(const codeset_t& X, uint32_t start, uint32_t end) {
|
codeset_t set_add_range(const codeset_t& X, uint32_t start, uint32_t end) {
|
||||||
return merge_sets(X, {{start, end}});
|
return merge_sets(X, {{start, end}});
|
||||||
}
|
}
|
||||||
|
|
||||||
codeset_t codeset_of_one_char(uint32_t ch) {
|
codeset_t codeset_of_one_char(uint32_t ch) {
|
||||||
return codeset_t({{ch, ch}});
|
return codeset_t({{ch, ch}});
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string stringifyCodesetBase10(const codeset_t& CS) {
|
std::string stringifyCodesetBase10(const codeset_t& CS) {
|
||||||
std::string cs;
|
std::string cs;
|
||||||
for (auto p: CS) {
|
for (auto p: CS) {
|
||||||
if (!cs.empty())
|
if (!cs.empty())
|
||||||
@ -117,4 +118,5 @@ std::string stringifyCodesetBase10(const codeset_t& CS) {
|
|||||||
cs += std::to_string(p.first) + "-" + std::to_string(p.second);
|
cs += std::to_string(p.first) + "-" + std::to_string(p.second);
|
||||||
}
|
}
|
||||||
return cs;
|
return cs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,22 +6,24 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
typedef std::vector<std::pair<uint32_t, uint32_t>> codeset_t;
|
namespace regexis024 {
|
||||||
|
typedef std::vector<std::pair<uint32_t, uint32_t>> codeset_t;
|
||||||
|
|
||||||
codeset_t invert_set(const codeset_t& X);
|
codeset_t invert_set(const codeset_t& X);
|
||||||
codeset_t merge_sets(const codeset_t& A, const codeset_t& B);
|
codeset_t merge_sets(const codeset_t& A, const codeset_t& B);
|
||||||
codeset_t intersect_sets(const codeset_t& A, const codeset_t& B);
|
codeset_t intersect_sets(const codeset_t& A, const codeset_t& B);
|
||||||
codeset_t subtract_sets(const codeset_t& A, const codeset_t& B);
|
codeset_t subtract_sets(const codeset_t& A, const codeset_t& B);
|
||||||
|
|
||||||
/* Aborts if segment in question hit the edge (unsafe function) */
|
/* Aborts if segment in question hit the edge (unsafe function) */
|
||||||
bool is_inside(uint32_t start, uint32_t end, codeset_t& X);
|
bool is_inside(uint32_t start, uint32_t end, codeset_t& X);
|
||||||
|
|
||||||
codeset_t set_add_char(const codeset_t& X, uint32_t cp);
|
codeset_t set_add_char(const codeset_t& X, uint32_t cp);
|
||||||
codeset_t set_add_range(const codeset_t& X, uint32_t start, uint32_t end);
|
codeset_t set_add_range(const codeset_t& X, uint32_t start, uint32_t end);
|
||||||
|
|
||||||
codeset_t codeset_of_one_char(uint32_t ch);
|
codeset_t codeset_of_one_char(uint32_t ch);
|
||||||
#define codeset_of_all codeset_t({{0, UINT32_MAX}})
|
#define codeset_of_all codeset_t({{0, UINT32_MAX}})
|
||||||
|
|
||||||
std::string stringifyCodesetBase10(const codeset_t& CS);
|
std::string stringifyCodesetBase10(const codeset_t& CS);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_CODESET_H
|
#endif //LIBREGEXIS024_CODESET_H
|
@ -2,13 +2,14 @@
|
|||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
ColoredCodesetSegment::ColoredCodesetSegment(uint32_t color, uint32_t right_code): color(color), right_code(right_code) {}
|
namespace regexis024 {
|
||||||
|
ColoredCodesetSegment::ColoredCodesetSegment(uint32_t color, uint32_t right_code): color(color), right_code(right_code) {}
|
||||||
|
|
||||||
ColoredCodesetSegmentList::ColoredCodesetSegmentList() {
|
ColoredCodesetSegmentList::ColoredCodesetSegmentList() {
|
||||||
first = new ColoredCodesetSegment(0, UINT32_MAX);
|
first = new ColoredCodesetSegment(0, UINT32_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColoredCodesetSegmentList::replace_myself(const ColoredCodesetSegmentList &other) {
|
void ColoredCodesetSegmentList::replace_myself(const ColoredCodesetSegmentList &other) {
|
||||||
assert(other.first);
|
assert(other.first);
|
||||||
ColoredCodesetSegment** in_cur = &first;
|
ColoredCodesetSegment** in_cur = &first;
|
||||||
ColoredCodesetSegment* in_other = other.first;
|
ColoredCodesetSegment* in_other = other.first;
|
||||||
@ -17,36 +18,36 @@ void ColoredCodesetSegmentList::replace_myself(const ColoredCodesetSegmentList &
|
|||||||
in_cur = &((**in_cur).next);
|
in_cur = &((**in_cur).next);
|
||||||
in_other = in_other->next;
|
in_other = in_other->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ColoredCodesetSegmentList::ColoredCodesetSegmentList(const ColoredCodesetSegmentList &other) {
|
ColoredCodesetSegmentList::ColoredCodesetSegmentList(const ColoredCodesetSegmentList &other) {
|
||||||
replace_myself(other);
|
replace_myself(other);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColoredCodesetSegmentList::free_myself() {
|
void ColoredCodesetSegmentList::free_myself() {
|
||||||
ColoredCodesetSegment* cur = first;
|
ColoredCodesetSegment* cur = first;
|
||||||
while (cur) {
|
while (cur) {
|
||||||
ColoredCodesetSegment* nxt = cur->next;
|
ColoredCodesetSegment* nxt = cur->next;
|
||||||
delete cur;
|
delete cur;
|
||||||
cur = nxt;
|
cur = nxt;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ColoredCodesetSegmentList::~ColoredCodesetSegmentList() {
|
ColoredCodesetSegmentList::~ColoredCodesetSegmentList() {
|
||||||
free_myself();
|
free_myself();
|
||||||
}
|
}
|
||||||
|
|
||||||
ColoredCodesetSegmentList& ColoredCodesetSegmentList::operator=(const ColoredCodesetSegmentList &other) {
|
ColoredCodesetSegmentList& ColoredCodesetSegmentList::operator=(const ColoredCodesetSegmentList &other) {
|
||||||
free_myself();
|
free_myself();
|
||||||
replace_myself(other);
|
replace_myself(other);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
ColoredCodeset::ColoredCodeset(uint64_t dummy_n): DummyN(dummy_n) {
|
ColoredCodeset::ColoredCodeset(uint64_t dummy_n): DummyN(dummy_n) {
|
||||||
requests = {{}};
|
requests = {{}};
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColoredCodeset::split_phase(const codeset_t &X) {
|
void ColoredCodeset::split_phase(const codeset_t &X) {
|
||||||
|
|
||||||
uint32_t cA = 0;
|
uint32_t cA = 0;
|
||||||
ColoredCodesetSegment* cur_seg = list.first;
|
ColoredCodesetSegment* cur_seg = list.first;
|
||||||
@ -106,9 +107,9 @@ void ColoredCodeset::split_phase(const codeset_t &X) {
|
|||||||
advance_old();
|
advance_old();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColoredCodeset::apply_divisor(const codeset_t &X) {
|
void ColoredCodeset::apply_divisor(const codeset_t &X) {
|
||||||
split_phase(X);
|
split_phase(X);
|
||||||
size_t X_id = nxt_request_id++;
|
size_t X_id = nxt_request_id++;
|
||||||
size_t m = requests.size();
|
size_t m = requests.size();
|
||||||
@ -154,9 +155,9 @@ void ColoredCodeset::apply_divisor(const codeset_t &X) {
|
|||||||
cur = cur->next;
|
cur = cur->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ColoredCodeset::get_splits_of_non_dummy(std::vector<codeset_t> &res_input,
|
void ColoredCodeset::get_splits_of_non_dummy(std::vector<codeset_t> &res_input,
|
||||||
std::vector<std::vector<size_t>> &res_color_to_requests) {
|
std::vector<std::vector<size_t>> &res_color_to_requests) {
|
||||||
size_t n = requests.size();
|
size_t n = requests.size();
|
||||||
std::vector<ssize_t> nonclean_to_clean(n, -1);
|
std::vector<ssize_t> nonclean_to_clean(n, -1);
|
||||||
@ -180,4 +181,5 @@ void ColoredCodeset::get_splits_of_non_dummy(std::vector<codeset_t> &res_input,
|
|||||||
L = cur->right_code + 1;
|
L = cur->right_code + 1;
|
||||||
cur = cur->next;
|
cur = cur->next;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -7,9 +7,9 @@
|
|||||||
|
|
||||||
#include <libregexis024fa/codeset.h>
|
#include <libregexis024fa/codeset.h>
|
||||||
|
|
||||||
/* Used for determinizer. Nowhere else */
|
namespace regexis024 {
|
||||||
|
/* Used for determinizer. Nowhere else */
|
||||||
struct ColoredCodesetSegment {
|
struct ColoredCodesetSegment {
|
||||||
uint32_t color;
|
uint32_t color;
|
||||||
uint32_t right_code;
|
uint32_t right_code;
|
||||||
ColoredCodesetSegment* next = NULL;
|
ColoredCodesetSegment* next = NULL;
|
||||||
@ -18,11 +18,11 @@ struct ColoredCodesetSegment {
|
|||||||
bool divisor_on_left = false;
|
bool divisor_on_left = false;
|
||||||
|
|
||||||
ColoredCodesetSegment(uint32_t color, uint32_t right_code);
|
ColoredCodesetSegment(uint32_t color, uint32_t right_code);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Warning!!! This stupid class is OOM-unsafe!!!
|
/* Warning!!! This stupid class is OOM-unsafe!!!
|
||||||
* This is not an issue as far as you don't show any of it's instance to the user of libregexis024 */
|
* This is not an issue as far as you don't show any of it's instance to the user of libregexis024 */
|
||||||
struct ColoredCodesetSegmentList {
|
struct ColoredCodesetSegmentList {
|
||||||
ColoredCodesetSegment* first = NULL;
|
ColoredCodesetSegment* first = NULL;
|
||||||
|
|
||||||
ColoredCodesetSegmentList();
|
ColoredCodesetSegmentList();
|
||||||
@ -38,10 +38,10 @@ struct ColoredCodesetSegmentList {
|
|||||||
~ColoredCodesetSegmentList();
|
~ColoredCodesetSegmentList();
|
||||||
|
|
||||||
ColoredCodesetSegmentList& operator=(const ColoredCodesetSegmentList& other);
|
ColoredCodesetSegmentList& operator=(const ColoredCodesetSegmentList& other);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Highly unoptimized algorithm on this data structure O(C^2) time*/
|
/* Highly unoptimized algorithm on this data structure O(C^2) time*/
|
||||||
class ColoredCodeset {
|
class ColoredCodeset {
|
||||||
ColoredCodesetSegmentList list;
|
ColoredCodesetSegmentList list;
|
||||||
/* Size of this vector is equal to the number of colors */
|
/* Size of this vector is equal to the number of colors */
|
||||||
std::vector<std::vector<size_t>> requests;
|
std::vector<std::vector<size_t>> requests;
|
||||||
@ -49,7 +49,7 @@ class ColoredCodeset {
|
|||||||
size_t nxt_request_id = 0;
|
size_t nxt_request_id = 0;
|
||||||
|
|
||||||
void split_phase(const codeset_t& X);
|
void split_phase(const codeset_t& X);
|
||||||
public:
|
public:
|
||||||
/* First dummy_n split requests will be viewed as 'dummy requests', when complete map of splits is requested,
|
/* First dummy_n split requests will be viewed as 'dummy requests', when complete map of splits is requested,
|
||||||
* colors that are registed indide only dummy requests won't be returned. */
|
* colors that are registed indide only dummy requests won't be returned. */
|
||||||
ColoredCodeset(uint64_t dummy_n);
|
ColoredCodeset(uint64_t dummy_n);
|
||||||
@ -60,7 +60,7 @@ public:
|
|||||||
/* Returned 'requests' mapping will feature request id's with DummyN substituted from them */
|
/* Returned 'requests' mapping will feature request id's with DummyN substituted from them */
|
||||||
void get_splits_of_non_dummy(std::vector<codeset_t>& res_input,
|
void get_splits_of_non_dummy(std::vector<codeset_t>& res_input,
|
||||||
std::vector<std::vector<size_t>>& res_color_to_requests);
|
std::vector<std::vector<size_t>>& res_color_to_requests);
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -7,7 +7,8 @@
|
|||||||
// #include <debugging_regexis024/debug_through_graphviz.h>
|
// #include <debugging_regexis024/debug_through_graphviz.h>
|
||||||
// #endif
|
// #endif
|
||||||
|
|
||||||
REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_Container& resultFa) {
|
namespace regexis024 {
|
||||||
|
REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_Container& resultFa) {
|
||||||
assert(sourceFa.start);
|
assert(sourceFa.start);
|
||||||
REGEX_IS024_FA_FirstStageFixInfo info;
|
REGEX_IS024_FA_FirstStageFixInfo info;
|
||||||
|
|
||||||
@ -140,19 +141,19 @@ REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_C
|
|||||||
d->search_mark = -1;
|
d->search_mark = -1;
|
||||||
}
|
}
|
||||||
return info;
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodeOfOneCharRead* generate_alt_ending(const codeset_t& restriction, FA_Container& fa){
|
FA_NodeOfOneCharRead* generate_alt_ending(const codeset_t& restriction, FA_Container& fa){
|
||||||
FA_NodeOfOneCharRead* n1 = fa.makeOneCharRead(restriction, true);
|
FA_NodeOfOneCharRead* n1 = fa.makeOneCharRead(restriction, true);
|
||||||
FA_NodeOfMatch* n2 = fa.makeMatch();
|
FA_NodeOfMatch* n2 = fa.makeMatch();
|
||||||
n2->ext_filter_added = true; // Won't actually be used
|
n2->ext_filter_added = true; // Won't actually be used
|
||||||
reattach_fa_node_edge(&(n1->nxt_node), n2);
|
reattach_fa_node_edge(&(n1->nxt_node), n2);
|
||||||
return n1;
|
return n1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& resultFa,
|
void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& resultFa,
|
||||||
const REGEX_IS024_FA_FirstStageFixInfo &info1)
|
const REGEX_IS024_FA_FirstStageFixInfo &info1)
|
||||||
{
|
{
|
||||||
assert(resultFa.all.empty() && !resultFa.start);
|
assert(resultFa.all.empty() && !resultFa.start);
|
||||||
if (!sourceFa.start)
|
if (!sourceFa.start)
|
||||||
return;
|
return;
|
||||||
@ -192,4 +193,5 @@ void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& result
|
|||||||
FA_NodeOfOneCharRead* ns = resultFa.makeOneCharRead(codeset_of_all, true);
|
FA_NodeOfOneCharRead* ns = resultFa.makeOneCharRead(codeset_of_all, true);
|
||||||
yay_new_start(resultFa, ns);
|
yay_new_start(resultFa, ns);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,16 +3,18 @@
|
|||||||
|
|
||||||
#include "finite_automaton.h"
|
#include "finite_automaton.h"
|
||||||
|
|
||||||
struct REGEX_IS024_FA_FirstStageFixInfo{
|
namespace regexis024 {
|
||||||
|
struct REGEX_IS024_FA_FirstStageFixInfo{
|
||||||
bool fed_chars_extend_one_left = false;
|
bool fed_chars_extend_one_left = false;
|
||||||
bool fed_chars_extend_one_right = false;
|
bool fed_chars_extend_one_right = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Will look for look_one_ahead nodes and apply their filter to reading filters ahead *
|
/* Will look for look_one_ahead nodes and apply their filter to reading filters ahead *
|
||||||
* sourceFa will be ruined. The output will be in resultFa */
|
* sourceFa will be ruined. The output will be in resultFa */
|
||||||
REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_Container& resultFa);
|
REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_Container& resultFa);
|
||||||
|
|
||||||
void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& resultFa,
|
void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& resultFa,
|
||||||
const REGEX_IS024_FA_FirstStageFixInfo &info1);
|
const REGEX_IS024_FA_FirstStageFixInfo &info1);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_FA_FIRST_STAGE_FIX_H
|
#endif //LIBREGEXIS024_FA_FIRST_STAGE_FIX_H
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
#include <libregexis024fa/fa_make_deterministic.h>
|
#include <libregexis024fa/fa_make_deterministic.h>
|
||||||
#include <libregexis024fa/misc_fa_funcs.h>
|
#include <libregexis024fa/misc_fa_funcs.h>
|
||||||
#include <libregexis024vm/utils.h> /* to get exitf */
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <libregexis024fa/tracking_fa_nodes.h>
|
#include <libregexis024fa/tracking_fa_nodes.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -17,38 +16,38 @@
|
|||||||
#define PR_DEB
|
#define PR_DEB
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* debug nonsence */
|
namespace regexis024 {
|
||||||
void input_fa_assert(const FA_Container& fa){
|
/* debug nonsence */
|
||||||
|
void input_fa_assert(const FA_Container& fa){
|
||||||
assert(fa.start);
|
assert(fa.start);
|
||||||
for (FA_Node* node: fa.all){
|
for (FA_Node* node: fa.all){
|
||||||
if (node->type == one_char_read){
|
if (node->type == one_char_read){
|
||||||
assert(!dynamic_cast<FA_NodeOfOneCharRead*>(node)->second_ns);
|
assert(!dynamic_cast<FA_NodeOfOneCharRead*>(node)->second_ns);
|
||||||
} else if (node->type == look_one_ahead ||
|
} else if (node->type == look_one_ahead || node->type == det_char_crossroads) {
|
||||||
node->type == det_char_crossroads){
|
assert(false);
|
||||||
exitf("not allowed at this stage\n");
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
struct OperHistoryNodeTransition {
|
struct OperHistoryNodeTransition {
|
||||||
TrackingOperationInFa op;
|
TrackingOperationInFa op;
|
||||||
size_t u;
|
size_t u;
|
||||||
|
|
||||||
OperHistoryNodeTransition(const TrackingOperationInFa &op, size_t u): op(op), u(u) {}
|
OperHistoryNodeTransition(const TrackingOperationInFa &op, size_t u): op(op), u(u) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct OperHistoryNode {
|
struct OperHistoryNode {
|
||||||
std::vector<OperHistoryNodeTransition> next;
|
std::vector<OperHistoryNodeTransition> next;
|
||||||
/* When it is part of clean history, this */
|
/* When it is part of clean history, this */
|
||||||
std::vector<uint64_t> compressed_selarr;
|
std::vector<uint64_t> compressed_selarr;
|
||||||
std::vector<uint64_t> raisin;
|
std::vector<uint64_t> raisin;
|
||||||
|
|
||||||
OperHistoryNode() = default;
|
OperHistoryNode() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* This object can describe an empty superstate (needed to describe clean history nodes without raisin)
|
/* This object can describe an empty superstate (needed to describe clean history nodes without raisin)
|
||||||
* If det_stops is empty, interpret it as empty superstate */
|
* If det_stops is empty, interpret it as empty superstate */
|
||||||
struct SuperState {
|
struct SuperState {
|
||||||
std::vector<uint64_t> sorted_raisin;
|
std::vector<uint64_t> sorted_raisin;
|
||||||
std::vector<uint64_t> double_compressed_selarr;
|
std::vector<uint64_t> double_compressed_selarr;
|
||||||
|
|
||||||
@ -74,33 +73,33 @@ struct SuperState {
|
|||||||
return "sorted_raisin: {" + f1_raisin + "}, double_comp_selarr: {" + f2_selarr + "}";
|
return "sorted_raisin: {" + f1_raisin + "}, double_comp_selarr: {" + f2_selarr + "}";
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CleanOperHistoryNode {
|
struct CleanOperHistoryNode {
|
||||||
std::vector<OperHistoryNodeTransition> next;
|
std::vector<OperHistoryNodeTransition> next;
|
||||||
SuperState exit;
|
SuperState exit;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct SelarrCompressionScheme {
|
struct SelarrCompressionScheme {
|
||||||
size_t SN1, SN2 = 0, SN3 = 0;
|
size_t SN1, SN2 = 0, SN3 = 0;
|
||||||
std::vector<int32_t> S1_to_S2;
|
std::vector<int32_t> S1_to_S2;
|
||||||
std::vector<regex_tai_t> S2_to_sifter;
|
std::vector<tai_t> S2_to_sifter;
|
||||||
std::vector<regex_tai_t> S3_to_sifter;
|
std::vector<tai_t> S3_to_sifter;
|
||||||
const RegexPriorityTable& sifter;
|
const RegexPriorityTable& sifter;
|
||||||
|
|
||||||
SelarrCompressionScheme(size_t sn1, const RegexPriorityTable &sifter) : SN1(sn1), sifter(sifter) {
|
SelarrCompressionScheme(size_t sn1, const RegexPriorityTable &sifter) : SN1(sn1), sifter(sifter) {
|
||||||
assert(sifter.size() <= UINT32_MAX);
|
assert(sifter.size() <= UINT32_MAX);
|
||||||
S1_to_S2.assign(SN1, -1);
|
S1_to_S2.assign(SN1, -1);
|
||||||
for (regex_tai_t i = 0; i < sifter.size(); i++) {
|
for (tai_t i = 0; i < sifter.size(); i++) {
|
||||||
auto& act = sifter[i].pos;
|
auto& act = sifter[i].pos;
|
||||||
regex_tai_t first_on_s2 = S2_to_sifter.size();
|
tai_t first_on_s2 = S2_to_sifter.size();
|
||||||
S2_to_sifter.push_back(i);
|
S2_to_sifter.push_back(i);
|
||||||
S1_to_S2[act.first] = first_on_s2;
|
S1_to_S2[act.first] = first_on_s2;
|
||||||
if (act.type != tracking_var_types::dot_cur_pos) {
|
if (act.type != tracking_var_types::dot_cur_pos) {
|
||||||
S3_to_sifter.push_back(i);
|
S3_to_sifter.push_back(i);
|
||||||
}
|
}
|
||||||
if (act.type == tracking_var_types::range) {
|
if (act.type == tracking_var_types::range) {
|
||||||
regex_tai_t second_on_s2 = S2_to_sifter.size();
|
tai_t second_on_s2 = S2_to_sifter.size();
|
||||||
S2_to_sifter.push_back(i);
|
S2_to_sifter.push_back(i);
|
||||||
S1_to_S2[act.second] = second_on_s2;
|
S1_to_S2[act.second] = second_on_s2;
|
||||||
}
|
}
|
||||||
@ -110,9 +109,9 @@ struct SelarrCompressionScheme {
|
|||||||
assert(SN3 <= SN2 && SN2 <= SN1 && SN1 <= UINT16_MAX);
|
assert(SN3 <= SN2 && SN2 <= SN1 && SN1 <= UINT16_MAX);
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<uint64_t> compress_compressed_selarr(const std::vector<uint64_t>& S2,
|
std::vector<uint64_t> compress_compressed_selarr(const std::vector<uint64_t>& S2,
|
||||||
const SelarrCompressionScheme& cmp) {
|
const SelarrCompressionScheme& cmp) {
|
||||||
std::vector<uint64_t> S3(cmp.SN3);
|
std::vector<uint64_t> S3(cmp.SN3);
|
||||||
for (size_t i = 0; i < cmp.SN3; i++) {
|
for (size_t i = 0; i < cmp.SN3; i++) {
|
||||||
@ -127,9 +126,9 @@ std::vector<uint64_t> compress_compressed_selarr(const std::vector<uint64_t>& S2
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return S3;
|
return S3;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool compressed_selarr_A_outranks_B(const std::vector<uint64_t>& A, const std::vector<uint64_t>& B,
|
bool compressed_selarr_A_outranks_B(const std::vector<uint64_t>& A, const std::vector<uint64_t>& B,
|
||||||
const SelarrCompressionScheme& cmp) {
|
const SelarrCompressionScheme& cmp) {
|
||||||
for (const RegexPriorityTableAction& act: cmp.sifter) {
|
for (const RegexPriorityTableAction& act: cmp.sifter) {
|
||||||
uint64_t valA = A[cmp.S1_to_S2[act.pos.first]];
|
uint64_t valA = A[cmp.S1_to_S2[act.pos.first]];
|
||||||
@ -145,11 +144,11 @@ bool compressed_selarr_A_outranks_B(const std::vector<uint64_t>& A, const std::v
|
|||||||
return (valA < valB) == act.minimize;
|
return (valA < valB) == act.minimize;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Beacuse of the way wash_history_bush builds this structure, root is te last node.
|
/* Beacuse of the way wash_history_bush builds this structure, root is te last node.
|
||||||
* rankdir is from left to right (guaranteed). Can be empty if original history contained no raisin */
|
* rankdir is from left to right (guaranteed). Can be empty if original history contained no raisin */
|
||||||
struct RaisinBush {
|
struct RaisinBush {
|
||||||
std::vector<CleanOperHistoryNode> clean_history;
|
std::vector<CleanOperHistoryNode> clean_history;
|
||||||
ssize_t start = -1;
|
ssize_t start = -1;
|
||||||
|
|
||||||
@ -201,9 +200,9 @@ struct RaisinBush {
|
|||||||
printLines(wrapWithBox(text));
|
printLines(wrapWithBox(text));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
void wash_history_bush(const std::vector<OperHistoryNode>& history, RaisinBush& answer,
|
void wash_history_bush(const std::vector<OperHistoryNode>& history, RaisinBush& answer,
|
||||||
const SelarrCompressionScheme& cmp) {
|
const SelarrCompressionScheme& cmp) {
|
||||||
assert(!history.empty());
|
assert(!history.empty());
|
||||||
std::vector<bool> has_raisin(history.size());
|
std::vector<bool> has_raisin(history.size());
|
||||||
@ -252,14 +251,14 @@ void wash_history_bush(const std::vector<OperHistoryNode>& history, RaisinBush&
|
|||||||
answer.start = dirty_to_clean[0];
|
answer.start = dirty_to_clean[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If is_it_after_read is false, unknown selarr range variable border and cur pos are evaluated to 0.
|
/* If is_it_after_read is false, unknown selarr range variable border and cur pos are evaluated to 0.
|
||||||
* Otherwise, cur pos considered to be greater than previous values of selarr ange variable boundaries */
|
* Otherwise, cur pos considered to be greater than previous values of selarr ange variable boundaries */
|
||||||
void building_detour(const SelarrCompressionScheme& cmp,
|
void building_detour(const SelarrCompressionScheme& cmp,
|
||||||
const std::vector<uint64_t>& outer_selarr, const std::vector<FA_Node*>& zeroeps, const codeset_t& I,
|
const std::vector<uint64_t>& outer_selarr, const std::vector<FA_Node*>& zeroeps, const codeset_t& I,
|
||||||
RaisinBush& answer, bool is_it_after_read)
|
RaisinBush& answer, bool is_it_after_read)
|
||||||
{
|
{
|
||||||
#ifdef PR_DEB
|
#ifdef PR_DEB
|
||||||
printf("Det Debug: build_detour started with zeroeps:{");
|
printf("Det Debug: build_detour started with zeroeps:{");
|
||||||
for (FA_Node* node: zeroeps)
|
for (FA_Node* node: zeroeps)
|
||||||
@ -368,7 +367,7 @@ void building_detour(const SelarrCompressionScheme& cmp,
|
|||||||
val2[key_s2] = tv->imm_value;
|
val2[key_s2] = tv->imm_value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
add_history_update(TrackingOperationInFa(tv->operation, tv->key, tv->imm_value), Hop, Hv);
|
add_history_update(TrackingOperationInFa{tv->operation, tv->key, tv->imm_value}, Hop, Hv);
|
||||||
} else if (v->type == track_array_mov_halfinvariant) {
|
} else if (v->type == track_array_mov_halfinvariant) {
|
||||||
FA_NodeOfTrackArrayMovHalfinvariant* tv = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant*>(v);
|
FA_NodeOfTrackArrayMovHalfinvariant* tv = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant*>(v);
|
||||||
if (isSelarrOpcode(tv->operation)) {
|
if (isSelarrOpcode(tv->operation)) {
|
||||||
@ -383,7 +382,7 @@ void building_detour(const SelarrCompressionScheme& cmp,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
add_history_update(TrackingOperationInFa(tv->operation, tv->key), Hop, Hv);
|
add_history_update(TrackingOperationInFa{tv->operation, tv->key}, Hop, Hv);
|
||||||
}
|
}
|
||||||
} else if (v->type == match || v->type == one_char_read) {
|
} else if (v->type == match || v->type == one_char_read) {
|
||||||
// Determinization stop
|
// Determinization stop
|
||||||
@ -414,22 +413,22 @@ void building_detour(const SelarrCompressionScheme& cmp,
|
|||||||
}
|
}
|
||||||
/* Packaging the answer (we do a little bit of dfs here) */
|
/* Packaging the answer (we do a little bit of dfs here) */
|
||||||
wash_history_bush(history, answer, cmp);
|
wash_history_bush(history, answer, cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void update_had_to_fork_status(const RaisinBush& bush, int& had_to_fork) {
|
void update_had_to_fork_status(const RaisinBush& bush, int& had_to_fork) {
|
||||||
for (const CleanOperHistoryNode& node: bush.clean_history) {
|
for (const CleanOperHistoryNode& node: bush.clean_history) {
|
||||||
if (node.next.size() > 1 || (!node.next.empty() && !node.exit.empty())) {
|
if (node.next.size() > 1 || (!node.next.empty() && !node.exit.empty())) {
|
||||||
had_to_fork = 1;
|
had_to_fork = 1;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef size_t superstate_id_t;
|
typedef size_t superstate_id_t;
|
||||||
|
|
||||||
typedef std::vector<std::pair<FA_Node**, superstate_id_t>> homework_t;
|
typedef std::vector<std::pair<FA_Node**, superstate_id_t>> homework_t;
|
||||||
|
|
||||||
struct LessSuperState {
|
struct LessSuperState {
|
||||||
bool operator()(const SuperState& A, const SuperState& B) const {
|
bool operator()(const SuperState& A, const SuperState& B) const {
|
||||||
std::less<std::vector<uint64_t>> f1L;
|
std::less<std::vector<uint64_t>> f1L;
|
||||||
if (f1L(A.sorted_raisin, B.sorted_raisin))
|
if (f1L(A.sorted_raisin, B.sorted_raisin))
|
||||||
@ -438,17 +437,17 @@ struct LessSuperState {
|
|||||||
return false;
|
return false;
|
||||||
return f1L(A.double_compressed_selarr, B.double_compressed_selarr);
|
return f1L(A.double_compressed_selarr, B.double_compressed_selarr);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GlobalDetourProgress {
|
struct GlobalDetourProgress {
|
||||||
std::map<SuperState, superstate_id_t, LessSuperState> superstates;
|
std::map<SuperState, superstate_id_t, LessSuperState> superstates;
|
||||||
/* Each element is a root of some megabush in resFa */
|
/* Each element is a root of some megabush in resFa */
|
||||||
std::vector<FA_Node*> superstate_megabush_constructed;
|
std::vector<FA_Node*> superstate_megabush_constructed;
|
||||||
std::vector<SuperState> todo_superstaes;
|
std::vector<SuperState> todo_superstaes;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* If x was not previously achieved, it will also add it to t o d o list of global detour */
|
/* If x was not previously achieved, it will also add it to t o d o list of global detour */
|
||||||
superstate_id_t convertSuperstateToId(const SuperState& x, GlobalDetourProgress& gdp) {
|
superstate_id_t convertSuperstateToId(const SuperState& x, GlobalDetourProgress& gdp) {
|
||||||
if (gdp.superstates.count(x)) {
|
if (gdp.superstates.count(x)) {
|
||||||
return gdp.superstates[x];
|
return gdp.superstates[x];
|
||||||
}
|
}
|
||||||
@ -457,13 +456,13 @@ superstate_id_t convertSuperstateToId(const SuperState& x, GlobalDetourProgress&
|
|||||||
gdp.todo_superstaes.push_back(x);
|
gdp.todo_superstaes.push_back(x);
|
||||||
gdp.superstate_megabush_constructed.push_back(NULL);
|
gdp.superstate_megabush_constructed.push_back(NULL);
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_Node* build_dead_end(FA_Container& resFa) {
|
FA_Node* build_dead_end(FA_Container& resFa) {
|
||||||
return resFa.makeForking();
|
return resFa.makeForking();
|
||||||
}
|
}
|
||||||
|
|
||||||
void build_bush(const RaisinBush& alpha, FA_Node** sowing_location, FA_Container& resFa,
|
void build_bush(const RaisinBush& alpha, FA_Node** sowing_location, FA_Container& resFa,
|
||||||
homework_t& homework, GlobalDetourProgress& gdp) {
|
homework_t& homework, GlobalDetourProgress& gdp) {
|
||||||
size_t n = alpha.clean_history.size();
|
size_t n = alpha.clean_history.size();
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
@ -504,9 +503,9 @@ void build_bush(const RaisinBush& alpha, FA_Node** sowing_location, FA_Container
|
|||||||
reattach_fa_node_edge(sl, forker);
|
reattach_fa_node_edge(sl, forker);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ColoredCodeset get_pretreated_cc(FA_Container& sourceFa) {
|
ColoredCodeset get_pretreated_cc(FA_Container& sourceFa) {
|
||||||
std::set<codeset_t> little_insects;
|
std::set<codeset_t> little_insects;
|
||||||
for (FA_Node* v: sourceFa.all) {
|
for (FA_Node* v: sourceFa.all) {
|
||||||
if (v->type == look_one_behind) {
|
if (v->type == look_one_behind) {
|
||||||
@ -518,12 +517,12 @@ ColoredCodeset get_pretreated_cc(FA_Container& sourceFa) {
|
|||||||
pretreated_cc.apply_divisor(cs);
|
pretreated_cc.apply_divisor(cs);
|
||||||
}
|
}
|
||||||
return pretreated_cc;
|
return pretreated_cc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo add a check on size of dfa
|
// todo add a check on size of dfa
|
||||||
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, regex_tai_t selarr_sz,
|
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, tai_t selarr_sz,
|
||||||
const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork)
|
const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork)
|
||||||
{
|
{
|
||||||
/* During execuion, i will create pointers to field res.start and store them (inside the scope of this function)
|
/* During execuion, i will create pointers to field res.start and store them (inside the scope of this function)
|
||||||
* Luckily res argument is already immovable in this scope. */
|
* Luckily res argument is already immovable in this scope. */
|
||||||
error = 0;
|
error = 0;
|
||||||
@ -661,5 +660,5 @@ void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter
|
|||||||
for (auto& p: homework) {
|
for (auto& p: homework) {
|
||||||
reattach_fa_node_edge(p.first, gdp.superstate_megabush_constructed[p.second]);
|
reattach_fa_node_edge(p.first, gdp.superstate_megabush_constructed[p.second]);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,7 +4,9 @@
|
|||||||
#include <libregexis024fa/fa_first_stage_fix.h>
|
#include <libregexis024fa/fa_first_stage_fix.h>
|
||||||
#include <libregexis024fa/selarr_priority_table.h>
|
#include <libregexis024fa/selarr_priority_table.h>
|
||||||
|
|
||||||
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, regex_tai_t selarr_sz,
|
namespace regexis024 {
|
||||||
|
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, tai_t selarr_sz,
|
||||||
const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork);
|
const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_FA_MAKE_DETERMINISTIC_H
|
#endif //LIBREGEXIS024_FA_MAKE_DETERMINISTIC_H
|
||||||
|
@ -2,140 +2,142 @@
|
|||||||
#include <libregexis024vm/utils.h>
|
#include <libregexis024vm/utils.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
bool FA_Node::empty() {
|
namespace regexis024 {
|
||||||
|
bool FA_Node::empty() {
|
||||||
return type != one_char_read && type != det_char_crossroads;
|
return type != one_char_read && type != det_char_crossroads;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FA_Node::apply_lookahead_restriction(const codeset_t &restriction) {}
|
void FA_Node::apply_lookahead_restriction(const codeset_t &restriction) {}
|
||||||
|
|
||||||
void FA_Node::reAdd_references() {
|
void FA_Node::reAdd_references() {
|
||||||
for (FA_Node** nxtPtr: get_all_transitions()){
|
for (FA_Node** nxtPtr: get_all_transitions()){
|
||||||
if (*nxtPtr)
|
if (*nxtPtr)
|
||||||
(**nxtPtr).refs++;
|
(**nxtPtr).refs++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_Node::get_all_transitions() {
|
std::vector<FA_Node **> FA_Node::get_all_transitions() {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_Node::get_all_empty_valid_transitions() {
|
std::vector<FA_Node **> FA_Node::get_all_empty_valid_transitions() {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_NodePathPart::get_all_transitions() {
|
std::vector<FA_Node **> FA_NodePathPart::get_all_transitions() {
|
||||||
return {&nxt_node};
|
return {&nxt_node};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_NodePathPart::get_all_empty_valid_transitions() {
|
std::vector<FA_Node **> FA_NodePathPart::get_all_empty_valid_transitions() {
|
||||||
if (nxt_node)
|
if (nxt_node)
|
||||||
return {&nxt_node};
|
return {&nxt_node};
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodeOfMatch::FA_NodeOfMatch() {type = match;}
|
FA_NodeOfMatch::FA_NodeOfMatch() {type = match;}
|
||||||
|
|
||||||
void FA_NodeOfMatch::apply_lookahead_restriction(const codeset_t &restriction) {
|
void FA_NodeOfMatch::apply_lookahead_restriction(const codeset_t &restriction) {
|
||||||
ext_filter_added = true;
|
ext_filter_added = true;
|
||||||
pending_filter = restriction;
|
pending_filter = restriction;
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodeOfOneCharRead::FA_NodeOfOneCharRead(const codeset_t &filter, bool second_namespace) : filter(filter),
|
FA_NodeOfOneCharRead::FA_NodeOfOneCharRead(const codeset_t &filter, bool second_namespace) : filter(filter),
|
||||||
second_ns(second_namespace) { type = one_char_read;}
|
second_ns(second_namespace) { type = one_char_read;}
|
||||||
|
|
||||||
void FA_NodeOfOneCharRead::apply_lookahead_restriction(const codeset_t &restriction) {
|
void FA_NodeOfOneCharRead::apply_lookahead_restriction(const codeset_t &restriction) {
|
||||||
filter = intersect_sets(filter, restriction);
|
filter = intersect_sets(filter, restriction);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_NodeOfOneCharRead::get_all_empty_valid_transitions() {
|
std::vector<FA_Node **> FA_NodeOfOneCharRead::get_all_empty_valid_transitions() {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodeOfForking::FA_NodeOfForking() {type = forking;}
|
FA_NodeOfForking::FA_NodeOfForking() {type = forking;}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_NodeOfForking::get_all_empty_valid_transitions() {
|
std::vector<FA_Node **> FA_NodeOfForking::get_all_empty_valid_transitions() {
|
||||||
std::vector<FA_Node**> res;
|
std::vector<FA_Node**> res;
|
||||||
for (size_t i = 0; i < nxt_options.size(); i++)
|
for (size_t i = 0; i < nxt_options.size(); i++)
|
||||||
if (nxt_options[i])
|
if (nxt_options[i])
|
||||||
res.push_back(&nxt_options[i]);
|
res.push_back(&nxt_options[i]);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_NodeOfForking::get_all_transitions() {
|
std::vector<FA_Node **> FA_NodeOfForking::get_all_transitions() {
|
||||||
std::vector<FA_Node**> res;
|
std::vector<FA_Node**> res;
|
||||||
for (size_t i = 0; i < nxt_options.size(); i++)
|
for (size_t i = 0; i < nxt_options.size(); i++)
|
||||||
res.push_back(&nxt_options[i]);
|
res.push_back(&nxt_options[i]);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodeOfLookOneBehind::FA_NodeOfLookOneBehind(const codeset_t &filter) : filter(filter) {type = look_one_behind;}
|
FA_NodeOfLookOneBehind::FA_NodeOfLookOneBehind(const codeset_t &filter) : filter(filter) {type = look_one_behind;}
|
||||||
|
|
||||||
FA_NodeOfLookOneAhead::FA_NodeOfLookOneAhead(const codeset_t &restriction) : restriction(restriction) {
|
FA_NodeOfLookOneAhead::FA_NodeOfLookOneAhead(const codeset_t &restriction) : restriction(restriction) {
|
||||||
type = look_one_ahead;
|
type = look_one_ahead;
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodeOfTrackArrayMovImm::FA_NodeOfTrackArrayMovImm(regex024_opcode operation, uint16_t key, uint64_t immValue) :
|
FA_NodeOfTrackArrayMovImm::FA_NodeOfTrackArrayMovImm(opcode_t operation, uint16_t key, uint64_t immValue) :
|
||||||
operation(operation), key(key), imm_value(immValue) {type = track_array_mov_imm;}
|
operation(operation), key(key), imm_value(immValue) {type = track_array_mov_imm;}
|
||||||
//
|
//
|
||||||
|
|
||||||
FA_NodeOfTrackArrayMovHalfinvariant::FA_NodeOfTrackArrayMovHalfinvariant(regex024_opcode operation, uint16_t key):
|
FA_NodeOfTrackArrayMovHalfinvariant::FA_NodeOfTrackArrayMovHalfinvariant(opcode_t operation, uint16_t key):
|
||||||
operation(operation), key(key){type = track_array_mov_halfinvariant;}
|
operation(operation), key(key){type = track_array_mov_halfinvariant;}
|
||||||
//
|
//
|
||||||
|
|
||||||
void FA_NodeOfDetCharCrossroads::apply_lookahead_restriction(const codeset_t &restriction) {
|
void FA_NodeOfDetCharCrossroads::apply_lookahead_restriction(const codeset_t &restriction) {
|
||||||
exitf("What?? Oh, no, no. I am NOT doing it");
|
assert(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodeOfDetCharCrossroads::FA_NodeOfDetCharCrossroads(const std::vector<DFA_CrossroadPath> &crossroads)
|
FA_NodeOfDetCharCrossroads::FA_NodeOfDetCharCrossroads(const std::vector<DFA_CrossroadPath> &crossroads)
|
||||||
: crossroads(crossroads) {type = det_char_crossroads;}
|
: crossroads(crossroads) {type = det_char_crossroads;}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_NodeOfDetCharCrossroads::get_all_empty_valid_transitions() {
|
std::vector<FA_Node **> FA_NodeOfDetCharCrossroads::get_all_empty_valid_transitions() {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<FA_Node **> FA_NodeOfDetCharCrossroads::get_all_transitions() {
|
std::vector<FA_Node **> FA_NodeOfDetCharCrossroads::get_all_transitions() {
|
||||||
std::vector<FA_Node**> res;
|
std::vector<FA_Node**> res;
|
||||||
for (auto& tr: crossroads)
|
for (auto& tr: crossroads)
|
||||||
res.push_back(&tr.nxt_node);
|
res.push_back(&tr.nxt_node);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If transferring ownership of node to container has failed, node is freed (which means it is ivalidated)
|
/* If transferring ownership of node to container has failed, node is freed (which means it is ivalidated)
|
||||||
* If this semi-ownership transfer succeded (no std::bad_alloc), then node is still valid to use, and at the end
|
* If this semi-ownership transfer succeded (no std::bad_alloc), then node is still valid to use, and at the end
|
||||||
* of FA_Container lifetime it is guaranteed to be deleted
|
* of FA_Container lifetime it is guaranteed to be deleted
|
||||||
*/
|
*/
|
||||||
void FA_Container::registerNew(FA_Node *node) {
|
void FA_Container::registerNew(FA_Node *node) {
|
||||||
try {
|
try {
|
||||||
node->nodeId = (int64_t)all.size();
|
node->nodeId = (int64_t)all.size();
|
||||||
all.push_back(node);
|
all.push_back(node);
|
||||||
} catch (const std::bad_alloc& ba) {
|
} catch (const std::exception& ba) {
|
||||||
delete node;
|
delete node;
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DFA_CrossroadPath::DFA_CrossroadPath(const codeset_t &input, FA_Node *nxt_node): input(input),nxt_node(nxt_node) {}
|
DFA_CrossroadPath::DFA_CrossroadPath(const codeset_t &input, FA_Node *nxt_node): input(input),nxt_node(nxt_node) {}
|
||||||
//
|
//
|
||||||
|
|
||||||
FA_Container::~FA_Container() {
|
FA_Container::~FA_Container() {
|
||||||
for (FA_Node* n: all)
|
for (FA_Node* n: all)
|
||||||
delete n;
|
delete n;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define bs(name, args, params) \
|
#define bs(name, args, params) \
|
||||||
FA_NodeOf ## name *FA_Container::make ## name(args) { \
|
FA_NodeOf ## name *FA_Container::make ## name(args) { \
|
||||||
FA_NodeOf ## name *node = new FA_NodeOf ## name(params); \
|
FA_NodeOf ## name *node = new FA_NodeOf ## name(params); \
|
||||||
registerNew(node); \
|
registerNew(node); \
|
||||||
return node; \
|
return node; \
|
||||||
}
|
}
|
||||||
#define COMMA ,
|
#define COMMA ,
|
||||||
|
|
||||||
bs(Match, , )
|
bs(Match, , )
|
||||||
bs(OneCharRead, const codeset_t& filter COMMA bool second_namespace, filter COMMA second_namespace)
|
bs(OneCharRead, const codeset_t& filter COMMA bool second_namespace, filter COMMA second_namespace)
|
||||||
bs(Forking, , )
|
bs(Forking, , )
|
||||||
bs(LookOneBehind, const codeset_t& filter, filter)
|
bs(LookOneBehind, const codeset_t& filter, filter)
|
||||||
bs(LookOneAhead, const codeset_t& filter, filter)
|
bs(LookOneAhead, const codeset_t& filter, filter)
|
||||||
bs(TrackArrayMovImm, regex024_opcode operation COMMA uint16_t key COMMA uint64_t immValue,
|
bs(TrackArrayMovImm, opcode_t operation COMMA uint16_t key COMMA uint64_t immValue,
|
||||||
operation COMMA key COMMA immValue)
|
operation COMMA key COMMA immValue)
|
||||||
bs(TrackArrayMovHalfinvariant, regex024_opcode operation COMMA uint16_t key, operation COMMA key)
|
bs(TrackArrayMovHalfinvariant, opcode_t operation COMMA uint16_t key, operation COMMA key)
|
||||||
bs(DetCharCrossroads, ,{})
|
bs(DetCharCrossroads, ,{})
|
||||||
|
}
|
||||||
|
@ -6,7 +6,8 @@
|
|||||||
#include <libregexis024fa/codeset.h>
|
#include <libregexis024fa/codeset.h>
|
||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
|
|
||||||
enum FA_Node_type: uint8_t {
|
namespace regexis024 {
|
||||||
|
enum FA_Node_type: uint8_t {
|
||||||
match,
|
match,
|
||||||
one_char_read,
|
one_char_read,
|
||||||
forking,
|
forking,
|
||||||
@ -16,9 +17,9 @@ enum FA_Node_type: uint8_t {
|
|||||||
track_array_mov_halfinvariant,
|
track_array_mov_halfinvariant,
|
||||||
/* Used for DFA */
|
/* Used for DFA */
|
||||||
det_char_crossroads,
|
det_char_crossroads,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FA_Node{
|
struct FA_Node{
|
||||||
size_t refs = 0;
|
size_t refs = 0;
|
||||||
/* If node is not in searched subset (at least yet), `search mark == -1`, otherwise
|
/* If node is not in searched subset (at least yet), `search mark == -1`, otherwise
|
||||||
* it is an index (for that particular node) in the vector that captures all nodes in
|
* it is an index (for that particular node) in the vector that captures all nodes in
|
||||||
@ -33,35 +34,35 @@ struct FA_Node{
|
|||||||
void reAdd_references();
|
void reAdd_references();
|
||||||
virtual ~FA_Node() = default;
|
virtual ~FA_Node() = default;
|
||||||
virtual std::vector<FA_Node**> get_all_transitions();
|
virtual std::vector<FA_Node**> get_all_transitions();
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FA_NodePathPart: public FA_Node{
|
struct FA_NodePathPart: public FA_Node{
|
||||||
FA_Node* nxt_node = NULL;
|
FA_Node* nxt_node = NULL;
|
||||||
|
|
||||||
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
||||||
std::vector<FA_Node **> get_all_transitions() override;
|
std::vector<FA_Node **> get_all_transitions() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FA_NodeOfMatch: public FA_Node{
|
struct FA_NodeOfMatch: public FA_Node{
|
||||||
bool ext_filter_added = false;
|
bool ext_filter_added = false;
|
||||||
codeset_t pending_filter;
|
codeset_t pending_filter;
|
||||||
|
|
||||||
explicit FA_NodeOfMatch();
|
explicit FA_NodeOfMatch();
|
||||||
void apply_lookahead_restriction(const codeset_t &restriction) override;
|
void apply_lookahead_restriction(const codeset_t &restriction) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* .type == one_char_read */
|
/* .type == one_char_read */
|
||||||
struct FA_NodeOfOneCharRead: public FA_NodePathPart{
|
struct FA_NodeOfOneCharRead: public FA_NodePathPart{
|
||||||
codeset_t filter;
|
codeset_t filter;
|
||||||
bool second_ns = false;
|
bool second_ns = false;
|
||||||
|
|
||||||
FA_NodeOfOneCharRead(const codeset_t &filter, bool second_namespace);
|
FA_NodeOfOneCharRead(const codeset_t &filter, bool second_namespace);
|
||||||
void apply_lookahead_restriction(const codeset_t &restriction) override;
|
void apply_lookahead_restriction(const codeset_t &restriction) override;
|
||||||
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* .type == forking */
|
/* .type == forking */
|
||||||
struct FA_NodeOfForking: public FA_Node{
|
struct FA_NodeOfForking: public FA_Node{
|
||||||
/* Won't be modified after init (in regexp compilation into NFA) */
|
/* Won't be modified after init (in regexp compilation into NFA) */
|
||||||
std::vector<FA_Node*> nxt_options;
|
std::vector<FA_Node*> nxt_options;
|
||||||
int64_t stopId = -1;
|
int64_t stopId = -1;
|
||||||
@ -69,51 +70,51 @@ struct FA_NodeOfForking: public FA_Node{
|
|||||||
explicit FA_NodeOfForking();
|
explicit FA_NodeOfForking();
|
||||||
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
||||||
std::vector<FA_Node **> get_all_transitions() override;
|
std::vector<FA_Node **> get_all_transitions() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* .type == look_one_behind */
|
/* .type == look_one_behind */
|
||||||
struct FA_NodeOfLookOneBehind: public FA_NodePathPart{
|
struct FA_NodeOfLookOneBehind: public FA_NodePathPart{
|
||||||
/* [0; UINT32_MAX] is equivalent to no filter */
|
/* [0; UINT32_MAX] is equivalent to no filter */
|
||||||
codeset_t filter;
|
codeset_t filter;
|
||||||
|
|
||||||
explicit FA_NodeOfLookOneBehind(const codeset_t &filter);
|
explicit FA_NodeOfLookOneBehind(const codeset_t &filter);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* .type == look_one_ahead */
|
/* .type == look_one_ahead */
|
||||||
struct FA_NodeOfLookOneAhead: public FA_NodePathPart{
|
struct FA_NodeOfLookOneAhead: public FA_NodePathPart{
|
||||||
/* [0; UINT32_MAX] is equivalent to no restriction */
|
/* [0; UINT32_MAX] is equivalent to no restriction */
|
||||||
codeset_t restriction;
|
codeset_t restriction;
|
||||||
|
|
||||||
explicit FA_NodeOfLookOneAhead(const codeset_t &restriction);
|
explicit FA_NodeOfLookOneAhead(const codeset_t &restriction);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* .type == track_array_mov_imm */
|
/* .type == track_array_mov_imm */
|
||||||
struct FA_NodeOfTrackArrayMovImm: public FA_NodePathPart{
|
struct FA_NodeOfTrackArrayMovImm: public FA_NodePathPart{
|
||||||
regex024_opcode operation;
|
opcode_t operation;
|
||||||
uint16_t key;
|
uint16_t key;
|
||||||
uint64_t imm_value;
|
uint64_t imm_value;
|
||||||
|
|
||||||
FA_NodeOfTrackArrayMovImm(regex024_opcode operation, uint16_t key, uint64_t immValue);
|
FA_NodeOfTrackArrayMovImm(opcode_t operation, uint16_t key, uint64_t immValue);
|
||||||
};
|
};
|
||||||
|
|
||||||
/* .type == track_array_mov_halfinvariant */
|
/* .type == track_array_mov_halfinvariant */
|
||||||
struct FA_NodeOfTrackArrayMovHalfinvariant: public FA_NodePathPart{
|
struct FA_NodeOfTrackArrayMovHalfinvariant: public FA_NodePathPart{
|
||||||
regex024_opcode operation;
|
opcode_t operation;
|
||||||
uint16_t key;
|
uint16_t key;
|
||||||
|
|
||||||
FA_NodeOfTrackArrayMovHalfinvariant(regex024_opcode operation, uint16_t key);
|
FA_NodeOfTrackArrayMovHalfinvariant(opcode_t operation, uint16_t key);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct DFA_CrossroadPath{
|
struct DFA_CrossroadPath{
|
||||||
codeset_t input;
|
codeset_t input;
|
||||||
FA_Node* nxt_node = NULL;
|
FA_Node* nxt_node = NULL;
|
||||||
|
|
||||||
DFA_CrossroadPath(const codeset_t &input, FA_Node *nxt_node);
|
DFA_CrossroadPath(const codeset_t &input, FA_Node *nxt_node);
|
||||||
DFA_CrossroadPath() = default;
|
DFA_CrossroadPath() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* .type == det_char_crossroads */
|
/* .type == det_char_crossroads */
|
||||||
struct FA_NodeOfDetCharCrossroads: public FA_Node{
|
struct FA_NodeOfDetCharCrossroads: public FA_Node{
|
||||||
std::vector<DFA_CrossroadPath> crossroads;
|
std::vector<DFA_CrossroadPath> crossroads;
|
||||||
bool matching = false;
|
bool matching = false;
|
||||||
bool second_ns = false;
|
bool second_ns = false;
|
||||||
@ -122,9 +123,9 @@ struct FA_NodeOfDetCharCrossroads: public FA_Node{
|
|||||||
void apply_lookahead_restriction(const codeset_t &restriction) override;
|
void apply_lookahead_restriction(const codeset_t &restriction) override;
|
||||||
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
std::vector<FA_Node **> get_all_empty_valid_transitions() override;
|
||||||
std::vector<FA_Node **> get_all_transitions() override;
|
std::vector<FA_Node **> get_all_transitions() override;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FA_Container{
|
struct FA_Container{
|
||||||
FA_Container(const FA_Container&) = delete;
|
FA_Container(const FA_Container&) = delete;
|
||||||
FA_Container& operator=(const FA_Container&) = delete;
|
FA_Container& operator=(const FA_Container&) = delete;
|
||||||
FA_Container() = default;
|
FA_Container() = default;
|
||||||
@ -139,11 +140,12 @@ struct FA_Container{
|
|||||||
FA_NodeOfForking* makeForking();
|
FA_NodeOfForking* makeForking();
|
||||||
FA_NodeOfLookOneBehind* makeLookOneBehind(const codeset_t& filter);
|
FA_NodeOfLookOneBehind* makeLookOneBehind(const codeset_t& filter);
|
||||||
FA_NodeOfLookOneAhead* makeLookOneAhead(const codeset_t& filter);
|
FA_NodeOfLookOneAhead* makeLookOneAhead(const codeset_t& filter);
|
||||||
FA_NodeOfTrackArrayMovImm* makeTrackArrayMovImm(regex024_opcode operation, uint16_t key, uint64_t immValue);
|
FA_NodeOfTrackArrayMovImm* makeTrackArrayMovImm(opcode_t operation, uint16_t key, uint64_t immValue);
|
||||||
FA_NodeOfTrackArrayMovHalfinvariant* makeTrackArrayMovHalfinvariant(regex024_opcode operation, uint16_t key);
|
FA_NodeOfTrackArrayMovHalfinvariant* makeTrackArrayMovHalfinvariant(opcode_t operation, uint16_t key);
|
||||||
FA_NodeOfDetCharCrossroads* makeDetCharCrossroads();
|
FA_NodeOfDetCharCrossroads* makeDetCharCrossroads();
|
||||||
|
|
||||||
~FA_Container();
|
~FA_Container();
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_FINITE_AUTOMATON_H
|
#endif //LIBREGEXIS024_FINITE_AUTOMATON_H
|
||||||
|
@ -5,11 +5,11 @@
|
|||||||
|
|
||||||
#include <libregexis024fa/graph_to_bytecode/filter.h>
|
#include <libregexis024fa/graph_to_bytecode/filter.h>
|
||||||
|
|
||||||
|
namespace regexis024 {
|
||||||
#define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; }
|
#define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; }
|
||||||
|
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
|
||||||
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
|
|
||||||
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error)
|
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error)
|
||||||
{
|
{
|
||||||
bookmark_id_t node_start_bm_offset = bookmark_manager.new_range_of_bookmarks(fa.all.size());
|
bookmark_id_t node_start_bm_offset = bookmark_manager.new_range_of_bookmarks(fa.all.size());
|
||||||
std::vector<size_t> not_yet_dedicated_second_read_ns_ssids;
|
std::vector<size_t> not_yet_dedicated_second_read_ns_ssids;
|
||||||
first_read_ns = 0;
|
first_read_ns = 0;
|
||||||
@ -72,7 +72,7 @@ void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_b
|
|||||||
}
|
}
|
||||||
if (nxt_options.size() >= 2) {
|
if (nxt_options.size() >= 2) {
|
||||||
nonthrowing_assert(fork_ss_ns < UINT32_MAX);
|
nonthrowing_assert(fork_ss_ns < UINT32_MAX);
|
||||||
regex_sslot_id_t sslot = fork_ss_ns++;
|
sslot_id_t sslot = fork_ss_ns++;
|
||||||
for (size_t i = 0; i + 1 < nxt_options.size(); i++) {
|
for (size_t i = 0; i + 1 < nxt_options.size(); i++) {
|
||||||
cmd_FORK(result, bookmark_manager, sslot, nodesBookmark(nxt_options[i]));
|
cmd_FORK(result, bookmark_manager, sslot, nodesBookmark(nxt_options[i]));
|
||||||
addBranching(nxt_options[i]);
|
addBranching(nxt_options[i]);
|
||||||
@ -114,4 +114,5 @@ void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_b
|
|||||||
for (size_t j = 0; j < not_yet_dedicated_second_read_ns_ssids.size(); j++) {
|
for (size_t j = 0; j < not_yet_dedicated_second_read_ns_ssids.size(); j++) {
|
||||||
belated_sslot_id(result, not_yet_dedicated_second_read_ns_ssids[j], j + first_read_ns);
|
belated_sslot_id(result, not_yet_dedicated_second_read_ns_ssids[j], j + first_read_ns);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,9 @@
|
|||||||
#include <libregexis024fa/finite_automaton.h>
|
#include <libregexis024fa/finite_automaton.h>
|
||||||
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
|
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
|
||||||
|
|
||||||
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
|
namespace regexis024 {
|
||||||
|
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
|
||||||
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error);
|
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -7,74 +7,75 @@
|
|||||||
|
|
||||||
#include <libregexis024fa/graph_to_bytecode/core.h>
|
#include <libregexis024fa/graph_to_bytecode/core.h>
|
||||||
|
|
||||||
void write_priority_table_actions(std::vector<uint8_t>& result, RegexPriorityTable &priority_table) {
|
namespace regexis024 {
|
||||||
|
void write_priority_table_actions(std::vector<uint8_t>& result, RegexPriorityTable &priority_table) {
|
||||||
for (RegexPriorityTableAction& act: priority_table) {
|
for (RegexPriorityTableAction& act: priority_table) {
|
||||||
if (act.pos.isForRange()) {
|
if (act.pos.isForRange()) {
|
||||||
write_byte(result, regex024_opcodes::DDIST_RABX_SELARR);
|
write_byte(result, opcodes::DDIST_RABX_SELARR);
|
||||||
write_tai(result, act.pos.first);
|
write_tai(result, act.pos.first);
|
||||||
write_tai(result, act.pos.second);
|
write_tai(result, act.pos.second);
|
||||||
} else {
|
} else {
|
||||||
write_byte(result, regex024_opcodes::DMOV_RABX_SELARR);
|
write_byte(result, opcodes::DMOV_RABX_SELARR);
|
||||||
write_tai(result, act.pos.first);
|
write_tai(result, act.pos.first);
|
||||||
}
|
}
|
||||||
write_byte(result, act.minimize ?
|
write_byte(result, act.minimize ?
|
||||||
regex024_opcodes::SIFTPRIOR_MIN_RABX :
|
opcodes::SIFTPRIOR_MIN_RABX :
|
||||||
regex024_opcodes::SIFTPRIOR_MAX_RABX);
|
opcodes::SIFTPRIOR_MAX_RABX);
|
||||||
|
}
|
||||||
|
write_byte(result, opcodes::SIFT_DONE);
|
||||||
}
|
}
|
||||||
write_byte(result, regex024_opcodes::SIFT_DONE);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct belate_initialization_parameters {
|
struct belate_initialization_parameters {
|
||||||
size_t todo_pos_read_ss_n;
|
size_t todo_pos_read_ss_n;
|
||||||
size_t todo_pos_fork_ss_n;
|
size_t todo_pos_fork_ss_n;
|
||||||
size_t todo_pos_second_ns_size;
|
size_t todo_pos_second_ns_size;
|
||||||
|
|
||||||
void complete_it(std::vector<uint8_t>& result,
|
void complete_it(std::vector<uint8_t>& result,
|
||||||
regex_sslot_id_t first_read_ns, regex_sslot_id_t second_read_ns, regex_sslot_id_t fork_ss_ns)
|
sslot_id_t first_read_ns, sslot_id_t second_read_ns, sslot_id_t fork_ss_ns)
|
||||||
{
|
{
|
||||||
assert((uint64_t)first_read_ns + (uint64_t)second_read_ns <= UINT32_MAX);
|
assert((uint64_t)first_read_ns + (uint64_t)second_read_ns <= UINT32_MAX);
|
||||||
belated_sslot_id(result, todo_pos_read_ss_n , first_read_ns + second_read_ns);
|
belated_sslot_id(result, todo_pos_read_ss_n , first_read_ns + second_read_ns);
|
||||||
belated_sslot_id(result, todo_pos_fork_ss_n, fork_ss_ns);
|
belated_sslot_id(result, todo_pos_fork_ss_n, fork_ss_ns);
|
||||||
belated_sslot_id(result, todo_pos_second_ns_size, second_read_ns);
|
belated_sslot_id(result, todo_pos_second_ns_size, second_read_ns);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* when I compile initializational part of program, I don't yet know what to put in
|
/* when I compile initializational part of program, I don't yet know what to put in
|
||||||
* PARAM_READ_SS_NUMBER, PARAM_FORK_SS_NUMBER and MSG_FED_INPUT_EXTENDED (second namespace size).
|
* PARAM_READ_SS_NUMBER, PARAM_FORK_SS_NUMBER and MSG_FED_INPUT_EXTENDED (second namespace size).
|
||||||
* These values are belate. */
|
* These values are belate. */
|
||||||
belate_initialization_parameters write_some_normal_initialization(std::vector<uint8_t>& result,
|
belate_initialization_parameters write_some_normal_initialization(std::vector<uint8_t>& result,
|
||||||
size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1)
|
size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1)
|
||||||
{
|
{
|
||||||
belate_initialization_parameters todo;
|
belate_initialization_parameters todo;
|
||||||
|
|
||||||
write_byte(result, regex024_opcodes::PARAM_READ_SS_NUMBER);
|
write_byte(result, opcodes::PARAM_READ_SS_NUMBER);
|
||||||
todo.todo_pos_read_ss_n = result.size();
|
todo.todo_pos_read_ss_n = result.size();
|
||||||
write_sslot_id(result, 0); // Belate
|
write_sslot_id(result, 0); // Belate
|
||||||
|
|
||||||
write_byte(result, regex024_opcodes::PARAM_FORK_SS_NUMBER);
|
write_byte(result, opcodes::PARAM_FORK_SS_NUMBER);
|
||||||
todo.todo_pos_fork_ss_n = result.size();
|
todo.todo_pos_fork_ss_n = result.size();
|
||||||
write_sslot_id(result, 0); // Belate
|
write_sslot_id(result, 0); // Belate
|
||||||
|
|
||||||
write_byte(result, regex024_opcodes::PARAM_SELARR_LEN);
|
write_byte(result, opcodes::PARAM_SELARR_LEN);
|
||||||
write_tai(result, selarr_size);
|
write_tai(result, selarr_size);
|
||||||
|
|
||||||
write_byte(result, regex024_opcodes::MSG_MULTISTART_ALLOWED);
|
write_byte(result, opcodes::MSG_MULTISTART_ALLOWED);
|
||||||
write_byte(result, 1);
|
write_byte(result, 1);
|
||||||
|
|
||||||
write_byte(result, regex024_opcodes::MSG_FED_INPUT_EXTENDED);
|
write_byte(result, opcodes::MSG_FED_INPUT_EXTENDED);
|
||||||
write_byte(result, info1.fed_chars_extend_one_left ? 1 : 0);
|
write_byte(result, info1.fed_chars_extend_one_left ? 1 : 0);
|
||||||
write_byte(result, info1.fed_chars_extend_one_right ? 1 : 0);
|
write_byte(result, info1.fed_chars_extend_one_right ? 1 : 0);
|
||||||
todo.todo_pos_second_ns_size = result.size();
|
todo.todo_pos_second_ns_size = result.size();
|
||||||
write_sslot_id(result, 0); // Belate
|
write_sslot_id(result, 0); // Belate
|
||||||
|
|
||||||
write_byte(result, regex024_opcodes::INIT);
|
write_byte(result, opcodes::INIT);
|
||||||
return todo;
|
return todo;
|
||||||
}
|
}
|
||||||
|
|
||||||
void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result,
|
void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result,
|
||||||
FA_Container &fa, RegexPriorityTable &priority_table,
|
FA_Container &fa, RegexPriorityTable &priority_table,
|
||||||
size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1, int& error)
|
size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1, int& error)
|
||||||
{
|
{
|
||||||
error = 0;
|
error = 0;
|
||||||
explicit_bookmarks bookmark_manager;
|
explicit_bookmarks bookmark_manager;
|
||||||
|
|
||||||
@ -87,7 +88,7 @@ void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result,
|
|||||||
write_priority_table_actions(result, priority_table);
|
write_priority_table_actions(result, priority_table);
|
||||||
bookmark_manager.land_bookmark(result, BM_after_sift);
|
bookmark_manager.land_bookmark(result, BM_after_sift);
|
||||||
|
|
||||||
write_byte(result, regex024_opcodes::PARAM_COLSIFTFUNC_SET);
|
write_byte(result, opcodes::PARAM_COLSIFTFUNC_SET);
|
||||||
bookmark_manager.write_unresolved_reference(result, BM_sift_function);
|
bookmark_manager.write_unresolved_reference(result, BM_sift_function);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -99,4 +100,5 @@ void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result,
|
|||||||
return;
|
return;
|
||||||
init_param_todo.complete_it(result, first_read_ns, second_read_ns, fork_ss_ns);
|
init_param_todo.complete_it(result, first_read_ns, second_read_ns, fork_ss_ns);
|
||||||
bookmark_manager.finish(result);
|
bookmark_manager.finish(result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,8 +7,10 @@
|
|||||||
#include <libregexis024fa/selarr_priority_table.h>
|
#include <libregexis024fa/selarr_priority_table.h>
|
||||||
#include <libregexis024fa/fa_first_stage_fix.h>
|
#include <libregexis024fa/fa_first_stage_fix.h>
|
||||||
|
|
||||||
void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result, FA_Container& fa, RegexPriorityTable& priority_table,
|
namespace regexis024 {
|
||||||
|
void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result, FA_Container& fa, RegexPriorityTable& priority_table,
|
||||||
size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1, int& error);
|
size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1, int& error);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -4,42 +4,43 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <libregexis024fa/graph_to_bytecode/writing_commands.h>
|
#include <libregexis024fa/graph_to_bytecode/writing_commands.h>
|
||||||
|
|
||||||
std::vector<FilterSegment> convert_to_compSeg(const std::vector<codeset_t>& crossroad_codesets)
|
namespace regexis024 {
|
||||||
{
|
std::vector<FilterSegment> convert_to_compSeg(const std::vector<codeset_t>& crossroad_codesets)
|
||||||
|
{
|
||||||
std::vector<FilterSegment> compSeg;
|
std::vector<FilterSegment> compSeg;
|
||||||
std::vector<FilterSegment> seg;
|
std::vector<FilterSegment> seg;
|
||||||
for (size_t i = 0; i < crossroad_codesets.size(); i++) {
|
for (size_t i = 0; i < crossroad_codesets.size(); i++) {
|
||||||
for (auto& p: crossroad_codesets[i]) {
|
for (auto& p: crossroad_codesets[i]) {
|
||||||
seg.emplace_back(i, p.first, p.second);
|
seg.push_back({(ssize_t)i, p.first, p.second});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
std::sort(seg.begin(), seg.end(),
|
std::sort(seg.begin(), seg.end(),
|
||||||
[](const FilterSegment& a, const FilterSegment& b)->bool{return a.L < b.L;});
|
[](const FilterSegment& a, const FilterSegment& b)->bool{return a.L < b.L;});
|
||||||
if (seg.empty()) {
|
if (seg.empty()) {
|
||||||
compSeg.emplace_back(-1, 0, UINT32_MAX);
|
compSeg.push_back({-1, 0, UINT32_MAX});
|
||||||
} else {
|
} else {
|
||||||
if (seg[0].L > 0)
|
if (seg[0].L > 0)
|
||||||
compSeg.emplace_back(-1, 0, seg[0].L - 1);
|
compSeg.push_back({-1, 0, seg[0].L - 1});
|
||||||
size_t N = seg.size();
|
size_t N = seg.size();
|
||||||
for (size_t i = 0; i + 1 < N; i++) {
|
for (size_t i = 0; i + 1 < N; i++) {
|
||||||
compSeg.push_back(seg[i]);
|
compSeg.push_back(seg[i]);
|
||||||
assert(seg[i].R < seg[i + 1].L);
|
assert(seg[i].R < seg[i + 1].L);
|
||||||
if (seg[i].R + 1 < seg[i + 1].L)
|
if (seg[i].R + 1 < seg[i + 1].L)
|
||||||
compSeg.emplace_back(-1, seg[i].R + 1, seg[i + 1].L - 1);
|
compSeg.push_back({-1, seg[i].R + 1, seg[i + 1].L - 1});
|
||||||
}
|
}
|
||||||
compSeg.push_back(seg.back());
|
compSeg.push_back(seg.back());
|
||||||
if (seg.back().R < UINT32_MAX)
|
if (seg.back().R < UINT32_MAX)
|
||||||
compSeg.emplace_back(-1, seg[N - 1].R + 1, UINT32_MAX);
|
compSeg.push_back({-1, seg[N - 1].R + 1, UINT32_MAX});
|
||||||
}
|
}
|
||||||
assert(!compSeg.empty());
|
assert(!compSeg.empty());
|
||||||
return compSeg;
|
return compSeg;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return whether the resulting bytecode relies on me placing [0]'th node at the end */
|
/* Return whether the resulting bytecode relies on me placing [0]'th node at the end */
|
||||||
void write_filter_exit(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager,
|
void write_filter_exit(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager,
|
||||||
const std::vector<bookmark_id_t>& crossroad_marks,
|
const std::vector<bookmark_id_t>& crossroad_marks,
|
||||||
ssize_t color, bool at_the_end, bool& relies_on_proper_ending)
|
ssize_t color, bool at_the_end, bool& relies_on_proper_ending)
|
||||||
{
|
{
|
||||||
if (color < 0) {
|
if (color < 0) {
|
||||||
cmd_DIE(result);
|
cmd_DIE(result);
|
||||||
} else if (color != 0 || !at_the_end) {
|
} else if (color != 0 || !at_the_end) {
|
||||||
@ -47,12 +48,12 @@ void write_filter_exit(std::vector<uint8_t>& result, explicit_bookmarks& bookmar
|
|||||||
} else {
|
} else {
|
||||||
relies_on_proper_ending = true;
|
relies_on_proper_ending = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: use return value of this function
|
// todo: use return value of this function
|
||||||
bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager,
|
bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager,
|
||||||
const std::vector<codeset_t>& crossroad_codesets, const std::vector<bookmark_id_t>& crossroad_marks)
|
const std::vector<codeset_t>& crossroad_codesets, const std::vector<bookmark_id_t>& crossroad_marks)
|
||||||
{
|
{
|
||||||
bool relies_on_proper_ending = false;
|
bool relies_on_proper_ending = false;
|
||||||
|
|
||||||
std::vector<FilterSegment> compSeg = convert_to_compSeg(crossroad_codesets);
|
std::vector<FilterSegment> compSeg = convert_to_compSeg(crossroad_codesets);
|
||||||
@ -114,7 +115,5 @@ bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_man
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return relies_on_proper_ending;
|
return relies_on_proper_ending;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FilterSegment::FilterSegment(ssize_t color, uint32_t l, uint32_t r): color(color), L(l), R(r) {}
|
|
||||||
//
|
|
||||||
|
@ -6,16 +6,17 @@
|
|||||||
#include <libregexis024fa/codeset.h>
|
#include <libregexis024fa/codeset.h>
|
||||||
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
|
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
|
||||||
|
|
||||||
struct FilterSegment {
|
namespace regexis024 {
|
||||||
|
struct FilterSegment {
|
||||||
ssize_t color;
|
ssize_t color;
|
||||||
uint32_t L, R;
|
uint32_t L;
|
||||||
|
uint32_t R;
|
||||||
|
};
|
||||||
|
|
||||||
FilterSegment(ssize_t color, uint32_t l, uint32_t r);
|
/* Return whether user of function must place [0]'th option after the filter
|
||||||
};
|
|
||||||
|
|
||||||
/* Return whether user of function must place [0]'th option after the filter
|
|
||||||
* The filter can end up being written in such a way that the end will never be reached */
|
* The filter can end up being written in such a way that the end will never be reached */
|
||||||
bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager,
|
bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager,
|
||||||
const std::vector<codeset_t>& crossroad_codesets, const std::vector<bookmark_id_t>& crossroad_marks);
|
const std::vector<codeset_t>& crossroad_codesets, const std::vector<bookmark_id_t>& crossroad_marks);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,93 +2,93 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
|
|
||||||
|
namespace regexis024 {
|
||||||
#define push_to_res_least_signif result.push_back(x & 0xffLU); x >>= 8
|
#define push_to_res_least_signif result.push_back(x & 0xffLU); x >>= 8
|
||||||
|
void write_byte(std::vector<uint8_t>& result, uint8_t x) {
|
||||||
void write_byte(std::vector<uint8_t>& result, uint8_t x) {
|
|
||||||
result.push_back(x);
|
result.push_back(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_word(std::vector<uint8_t>& result, uint16_t x) {
|
void write_word(std::vector<uint8_t>& result, uint16_t x) {
|
||||||
push_to_res_least_signif; push_to_res_least_signif;
|
push_to_res_least_signif; push_to_res_least_signif;
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_doubleword(std::vector<uint8_t>& result, uint32_t x) {
|
void write_doubleword(std::vector<uint8_t>& result, uint32_t x) {
|
||||||
push_to_res_least_signif; push_to_res_least_signif; push_to_res_least_signif; push_to_res_least_signif;
|
push_to_res_least_signif; push_to_res_least_signif; push_to_res_least_signif; push_to_res_least_signif;
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_quadword(std::vector<uint8_t>& result, uint64_t x) {
|
void write_quadword(std::vector<uint8_t>& result, uint64_t x) {
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
push_to_res_least_signif;
|
push_to_res_least_signif;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef push_to_res_least_signif
|
#undef push_to_res_least_signif
|
||||||
|
|
||||||
#define put_belated_to_res assert(result[pos] == 0); result[pos++] = value & 0xffLU; value >>= 8
|
#define put_belated_to_res assert(result[pos] == 0); result[pos++] = value & 0xffLU; value >>= 8
|
||||||
void belated_byte(std::vector<uint8_t>& result, size_t pos, uint8_t value) {
|
void belated_byte(std::vector<uint8_t>& result, size_t pos, uint8_t value) {
|
||||||
assert(pos < result.size());
|
assert(pos < result.size());
|
||||||
result[pos] = value;
|
result[pos] = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
void belated_word(std::vector<uint8_t>& result, size_t pos, uint16_t value) {
|
void belated_word(std::vector<uint8_t>& result, size_t pos, uint16_t value) {
|
||||||
assert(pos + 2 <= result.size());
|
assert(pos + 2 <= result.size());
|
||||||
put_belated_to_res; put_belated_to_res;
|
put_belated_to_res; put_belated_to_res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void belated_doubleword(std::vector<uint8_t>& result, size_t pos, uint32_t value) {
|
void belated_doubleword(std::vector<uint8_t>& result, size_t pos, uint32_t value) {
|
||||||
assert(pos + 4 <= result.size());
|
assert(pos + 4 <= result.size());
|
||||||
put_belated_to_res; put_belated_to_res; put_belated_to_res; put_belated_to_res;
|
put_belated_to_res; put_belated_to_res; put_belated_to_res; put_belated_to_res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void belated_quadword(std::vector<uint8_t>& result, size_t pos, uint64_t value) {
|
void belated_quadword(std::vector<uint8_t>& result, size_t pos, uint64_t value) {
|
||||||
assert(pos + 8 <= result.size());
|
assert(pos + 8 <= result.size());
|
||||||
for (int i = 0; i < 8; i++) {
|
for (int i = 0; i < 8; i++) {
|
||||||
put_belated_to_res;
|
put_belated_to_res;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#undef put_belated_to_res
|
#undef put_belated_to_res
|
||||||
|
|
||||||
void write_sslot_id(std::vector<uint8_t>& result, regex_sslot_id_t x) {
|
void write_sslot_id(std::vector<uint8_t>& result, sslot_id_t x) {
|
||||||
write_doubleword(result, x);
|
write_doubleword(result, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_tai(std::vector<uint8_t>& result, regex_tai_t x) {
|
void write_tai(std::vector<uint8_t>& result, tai_t x) {
|
||||||
write_word(result, x);
|
write_word(result, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_near_ptr(std::vector<uint8_t>& result, regex_near_ptr_t x) {
|
void write_near_ptr(std::vector<uint8_t>& result, near_ptr_t x) {
|
||||||
write_quadword(result, x);
|
write_quadword(result, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, regex_sslot_id_t value) {
|
void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, sslot_id_t value) {
|
||||||
belated_doubleword(result, pos, value);
|
belated_doubleword(result, pos, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void belated_tai(std::vector<uint8_t>& result, size_t pos, regex_tai_t value) {
|
void belated_tai(std::vector<uint8_t>& result, size_t pos, tai_t value) {
|
||||||
belated_word(result, pos, value);
|
belated_word(result, pos, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, regex_near_ptr_t value) {
|
void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, near_ptr_t value) {
|
||||||
belated_quadword(result, pos, value);
|
belated_quadword(result, pos, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
bookmark_id_t explicit_bookmarks::new_bookmark() {
|
bookmark_id_t explicit_bookmarks::new_bookmark() {
|
||||||
pile.emplace_back();
|
pile.emplace_back();
|
||||||
return free_bid++;
|
return free_bid++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void explicit_bookmarks::write_unresolved_reference(std::vector<uint8_t> &result, bookmark_id_t bm) {
|
void explicit_bookmarks::write_unresolved_reference(std::vector<uint8_t> &result, bookmark_id_t bm) {
|
||||||
size_t where_to_fill_later = result.size();
|
size_t where_to_fill_later = result.size();
|
||||||
write_near_ptr(result, 0);
|
write_near_ptr(result, 0);
|
||||||
pile[bm].positions_of_belated_refs.push_back(where_to_fill_later);
|
pile[bm].positions_of_belated_refs.push_back(where_to_fill_later);
|
||||||
}
|
}
|
||||||
|
|
||||||
void explicit_bookmarks::land_bookmark(std::vector<uint8_t> &result, bookmark_id_t bm) {
|
void explicit_bookmarks::land_bookmark(std::vector<uint8_t> &result, bookmark_id_t bm) {
|
||||||
assert(!pile[bm].placed_somewhere);
|
assert(!pile[bm].placed_somewhere);
|
||||||
pile[bm].placed_somewhere = true;
|
pile[bm].placed_somewhere = true;
|
||||||
pile[bm].actual_position = result.size();
|
pile[bm].actual_position = result.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
void explicit_bookmarks::finish(std::vector<uint8_t> &result) {
|
void explicit_bookmarks::finish(std::vector<uint8_t> &result) {
|
||||||
for (explicit_bookmark_info& bmi: pile) {
|
for (explicit_bookmark_info& bmi: pile) {
|
||||||
assert(bmi.positions_of_belated_refs.empty() || bmi.placed_somewhere);
|
assert(bmi.positions_of_belated_refs.empty() || bmi.placed_somewhere);
|
||||||
if (bmi.placed_somewhere) {
|
if (bmi.placed_somewhere) {
|
||||||
@ -97,19 +97,20 @@ void explicit_bookmarks::finish(std::vector<uint8_t> &result) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bookmark_id_t explicit_bookmarks::new_range_of_bookmarks(size_t n) {
|
bookmark_id_t explicit_bookmarks::new_range_of_bookmarks(size_t n) {
|
||||||
bookmark_id_t offset = free_bid;
|
bookmark_id_t offset = free_bid;
|
||||||
free_bid += n;
|
free_bid += n;
|
||||||
for (size_t i = 0; i < n; i++) {
|
for (size_t i = 0; i < n; i++) {
|
||||||
pile.emplace_back();
|
pile.emplace_back();
|
||||||
}
|
}
|
||||||
return offset;
|
return offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool explicit_bookmarks::has_landed(bookmark_id_t bm) {
|
bool explicit_bookmarks::has_landed(bookmark_id_t bm) {
|
||||||
return pile[bm].placed_somewhere;
|
return pile[bm].placed_somewhere;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef put_belated_to_res
|
#undef put_belated_to_res
|
||||||
|
@ -4,40 +4,40 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <libregexis024vm/vm_opcodes_types.h>
|
#include <libregexis024vm/vm_opcodes_types.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
namespace regexis024 {
|
||||||
|
void write_byte(std::vector<uint8_t>& result, uint8_t x);
|
||||||
|
void write_word(std::vector<uint8_t>& result, uint16_t x);
|
||||||
|
void write_doubleword(std::vector<uint8_t>& result, uint32_t x);
|
||||||
|
void write_quadword(std::vector<uint8_t>& result, uint64_t x);
|
||||||
|
|
||||||
void write_byte(std::vector<uint8_t>& result, uint8_t x);
|
void belated_byte(std::vector<uint8_t>& result, size_t pos, uint8_t value);
|
||||||
void write_word(std::vector<uint8_t>& result, uint16_t x);
|
void belated_word(std::vector<uint8_t>& result, size_t pos, uint16_t value);
|
||||||
void write_doubleword(std::vector<uint8_t>& result, uint32_t x);
|
void belated_doubleword(std::vector<uint8_t>& result, size_t pos, uint32_t value);
|
||||||
void write_quadword(std::vector<uint8_t>& result, uint64_t x);
|
void belated_quadword(std::vector<uint8_t>& result, size_t pos, uint64_t value);
|
||||||
|
|
||||||
void belated_byte(std::vector<uint8_t>& result, size_t pos, uint8_t value);
|
|
||||||
void belated_word(std::vector<uint8_t>& result, size_t pos, uint16_t value);
|
|
||||||
void belated_doubleword(std::vector<uint8_t>& result, size_t pos, uint32_t value);
|
|
||||||
void belated_quadword(std::vector<uint8_t>& result, size_t pos, uint64_t value);
|
|
||||||
|
|
||||||
|
|
||||||
void write_sslot_id(std::vector<uint8_t>& result, regex_sslot_id_t x);
|
void write_sslot_id(std::vector<uint8_t>& result, sslot_id_t x);
|
||||||
void write_tai(std::vector<uint8_t>& result, regex_tai_t x);
|
void write_tai(std::vector<uint8_t>& result, tai_t x);
|
||||||
void write_near_ptr(std::vector<uint8_t>& result, regex_near_ptr_t x);
|
void write_near_ptr(std::vector<uint8_t>& result, near_ptr_t x);
|
||||||
|
|
||||||
void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, regex_sslot_id_t value);
|
void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, sslot_id_t value);
|
||||||
void belated_tai(std::vector<uint8_t>& result, size_t pos, regex_tai_t value);
|
void belated_tai(std::vector<uint8_t>& result, size_t pos, tai_t value);
|
||||||
void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, regex_near_ptr_t value);
|
void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, near_ptr_t value);
|
||||||
|
|
||||||
// constexpr uint64_t INSTRUCTION_SZ = REGEX024_BYTECODE_INSTRUCTION_SZ;
|
// constexpr uint64_t INSTRUCTION_SZ = REGEX024_BYTECODE_INSTRUCTION_SZ;
|
||||||
// constexpr uint64_t SSLOT_ID_SZ = REGEX024_BYTECODE_SSLOT_ID_SZ;
|
// constexpr uint64_t SSLOT_ID_SZ = REGEX024_BYTECODE_SSLOT_ID_SZ;
|
||||||
// constexpr uint64_t TRACK_ARRAY_INDEX_ID_SZ = REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ;
|
// constexpr uint64_t TRACK_ARRAY_INDEX_ID_SZ = REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ;
|
||||||
// constexpr uint64_t NEAR_POINTER_SZ = REGEX024_BYTECODE_NEAR_POINTER_SZ;
|
// constexpr uint64_t NEAR_POINTER_SZ = REGEX024_BYTECODE_NEAR_POINTER_SZ;
|
||||||
|
|
||||||
typedef size_t bookmark_id_t;
|
typedef size_t bookmark_id_t;
|
||||||
|
|
||||||
struct explicit_bookmark_info {
|
struct explicit_bookmark_info {
|
||||||
std::vector<size_t> positions_of_belated_refs;
|
std::vector<size_t> positions_of_belated_refs;
|
||||||
bool placed_somewhere = false;
|
bool placed_somewhere = false;
|
||||||
size_t actual_position;
|
size_t actual_position;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct explicit_bookmarks {
|
struct explicit_bookmarks {
|
||||||
bookmark_id_t free_bid = 0;
|
bookmark_id_t free_bid = 0;
|
||||||
/* For each named explicit bookmark there is an element in PILE */
|
/* For each named explicit bookmark there is an element in PILE */
|
||||||
std::vector<explicit_bookmark_info> pile;
|
std::vector<explicit_bookmark_info> pile;
|
||||||
@ -57,7 +57,7 @@ struct explicit_bookmarks {
|
|||||||
bookmark_id_t new_range_of_bookmarks(size_t n);
|
bookmark_id_t new_range_of_bookmarks(size_t n);
|
||||||
|
|
||||||
bool has_landed(bookmark_id_t bm);
|
bool has_landed(bookmark_id_t bm);
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,22 +2,23 @@
|
|||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest) {
|
namespace regexis024 {
|
||||||
write_byte(result, regex024_opcodes::JUMP);
|
void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest) {
|
||||||
|
write_byte(result, opcodes::JUMP);
|
||||||
bookmark_manager.write_unresolved_reference(result, dest);
|
bookmark_manager.write_unresolved_reference(result, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr regex024_opcode cmp_EQUAL[4] = {regex024_opcodes::JCEQUAL_B, regex024_opcodes::JCEQUAL_W,
|
constexpr opcode_t cmp_EQUAL[4] = {opcodes::JCEQUAL_B, opcodes::JCEQUAL_W,
|
||||||
regex024_opcodes::JCEQUAL_DW, regex024_opcodes::JCEQUAL_QW};
|
opcodes::JCEQUAL_DW, opcodes::JCEQUAL_QW};
|
||||||
constexpr regex024_opcode cmp_LESS[4] = {regex024_opcodes::JCLESS_B, regex024_opcodes::JCLESS_W,
|
constexpr opcode_t cmp_LESS[4] = {opcodes::JCLESS_B, opcodes::JCLESS_W,
|
||||||
regex024_opcodes::JCLESS_DW, regex024_opcodes::JCLESS_QW};
|
opcodes::JCLESS_DW, opcodes::JCLESS_QW};
|
||||||
constexpr regex024_opcode cmp_GRTR[4] = {regex024_opcodes::JCGRTR_B, regex024_opcodes::JCGRTR_W,
|
constexpr opcode_t cmp_GRTR[4] = {opcodes::JCGRTR_B, opcodes::JCGRTR_W,
|
||||||
regex024_opcodes::JCGRTR_DW, regex024_opcodes::JCGRTR_QW};
|
opcodes::JCGRTR_DW, opcodes::JCGRTR_QW};
|
||||||
|
|
||||||
|
|
||||||
void cmd_JC(const regex024_opcode cmpT[4],
|
void cmd_JC(const opcode_t cmpT[4],
|
||||||
std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest)
|
std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest)
|
||||||
{
|
{
|
||||||
if (val <= UINT8_MAX) {
|
if (val <= UINT8_MAX) {
|
||||||
write_byte(result, cmpT[0]);
|
write_byte(result, cmpT[0]);
|
||||||
write_byte(result, static_cast<uint8_t>(val));
|
write_byte(result, static_cast<uint8_t>(val));
|
||||||
@ -32,44 +33,45 @@ void cmd_JC(const regex024_opcode cmpT[4],
|
|||||||
write_quadword(result, val);
|
write_quadword(result, val);
|
||||||
}
|
}
|
||||||
bookmark_manager.write_unresolved_reference(result, dest);
|
bookmark_manager.write_unresolved_reference(result, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cmd_JCEQUAL(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest) {
|
void cmd_JCEQUAL(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest) {
|
||||||
cmd_JC(cmp_EQUAL, result, bookmark_manager, val, dest);
|
cmd_JC(cmp_EQUAL, result, bookmark_manager, val, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmd_JCLESS(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest) {
|
void cmd_JCLESS(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest) {
|
||||||
cmd_JC(cmp_LESS, result, bookmark_manager, val, dest);
|
cmd_JC(cmp_LESS, result, bookmark_manager, val, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmd_JCGRTR(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest) {
|
void cmd_JCGRTR(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest) {
|
||||||
cmd_JC(cmp_GRTR, result, bookmark_manager, val, dest);
|
cmd_JC(cmp_GRTR, result, bookmark_manager, val, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmd_DIE(std::vector<uint8_t> &result) {
|
void cmd_DIE(std::vector<uint8_t> &result) {
|
||||||
write_byte(result, regex024_opcodes::DIE);
|
write_byte(result, opcodes::DIE);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmd_MATCH(std::vector<uint8_t> &result) {
|
void cmd_MATCH(std::vector<uint8_t> &result) {
|
||||||
write_byte(result, regex024_opcodes::MATCH);
|
write_byte(result, opcodes::MATCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot) {
|
void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot) {
|
||||||
assert(slot <= UINT32_MAX);
|
assert(slot <= UINT32_MAX);
|
||||||
write_byte(result, regex024_opcodes::READ);
|
write_byte(result, opcodes::READ);
|
||||||
write_sslot_id(result, slot);
|
write_sslot_id(result, slot);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest) {
|
void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest) {
|
||||||
assert(slot <= UINT32_MAX);
|
assert(slot <= UINT32_MAX);
|
||||||
write_byte(result, regex024_opcodes::FORK);
|
write_byte(result, opcodes::FORK);
|
||||||
write_sslot_id(result, slot);
|
write_sslot_id(result, slot);
|
||||||
bookmark_manager.write_unresolved_reference(result, dest);
|
bookmark_manager.write_unresolved_reference(result, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args) {
|
void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args) {
|
||||||
write_byte(result, regex024_opcodes::READ);
|
write_byte(result, opcodes::READ);
|
||||||
belate_second_read_ns_slot_args.push_back(result.size());
|
belate_second_read_ns_slot_args.push_back(result.size());
|
||||||
write_sslot_id(result, 0);
|
write_sslot_id(result, 0);
|
||||||
|
}
|
||||||
}
|
}
|
@ -4,17 +4,19 @@
|
|||||||
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
|
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
|
||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
|
|
||||||
void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest);
|
namespace regexis024 {
|
||||||
|
void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest);
|
||||||
|
|
||||||
void cmd_JCEQUAL(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest);
|
void cmd_JCEQUAL(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest);
|
||||||
void cmd_JCLESS(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest);
|
void cmd_JCLESS(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest);
|
||||||
void cmd_JCGRTR(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest);
|
void cmd_JCGRTR(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest);
|
||||||
|
|
||||||
void cmd_DIE(std::vector<uint8_t>& result);
|
void cmd_DIE(std::vector<uint8_t>& result);
|
||||||
void cmd_MATCH(std::vector<uint8_t>& result);
|
void cmd_MATCH(std::vector<uint8_t>& result);
|
||||||
|
|
||||||
void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot);
|
void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot);
|
||||||
void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args);
|
void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args);
|
||||||
void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest);
|
void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -3,7 +3,8 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <libregexis024vm/utils.h>
|
#include <libregexis024vm/utils.h>
|
||||||
|
|
||||||
void reattach_fa_node_edge(FA_Node **old_node_ptr, FA_Node *new_node) {
|
namespace regexis024 {
|
||||||
|
void reattach_fa_node_edge(FA_Node **old_node_ptr, FA_Node *new_node) {
|
||||||
assert(old_node_ptr);
|
assert(old_node_ptr);
|
||||||
if (*old_node_ptr){
|
if (*old_node_ptr){
|
||||||
assert((**old_node_ptr).refs);
|
assert((**old_node_ptr).refs);
|
||||||
@ -12,28 +13,27 @@ void reattach_fa_node_edge(FA_Node **old_node_ptr, FA_Node *new_node) {
|
|||||||
if (new_node)
|
if (new_node)
|
||||||
new_node->refs++;
|
new_node->refs++;
|
||||||
*old_node_ptr = new_node;
|
*old_node_ptr = new_node;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We basically reattch fa.start to node */
|
/* We basically reattch fa.start to node */
|
||||||
void yay_new_start(FA_Container &fa, FA_NodePathPart *node) {
|
void yay_new_start(FA_Container &fa, FA_NodePathPart *node) {
|
||||||
assert(node);
|
assert(node);
|
||||||
node->refs++;
|
node->refs++;
|
||||||
node->nxt_node = fa.start;
|
node->nxt_node = fa.start;
|
||||||
fa.start = node;
|
fa.start = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_option_to_fork_node(FA_NodeOfForking *fnode, FA_Node *transition_dest) {
|
void add_option_to_fork_node(FA_NodeOfForking *fnode, FA_Node *transition_dest) {
|
||||||
fnode->nxt_options.push_back(transition_dest);
|
fnode->nxt_options.push_back(transition_dest);
|
||||||
if(transition_dest)
|
if(transition_dest)
|
||||||
transition_dest->refs++;
|
transition_dest->refs++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void reattach_nxt_node(FA_NodePathPart *node, FA_Node *dest) {
|
void reattach_nxt_node(FA_NodePathPart *node, FA_Node *dest) {
|
||||||
reattach_fa_node_edge(&(node->nxt_node), dest);
|
reattach_fa_node_edge(&(node->nxt_node), dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
// todo: get rid of exitf in the whole project
|
FA_Node* copy_node_no_container_adjustments(FA_Node& node){
|
||||||
FA_Node* copy_node_no_container_adjustments(FA_Node& node){
|
|
||||||
FA_Node* res;
|
FA_Node* res;
|
||||||
/* Using implicitly defined copy constructors */
|
/* Using implicitly defined copy constructors */
|
||||||
#define typeCase(etype, ctype) case etype: res = new ctype((ctype&)node); break;
|
#define typeCase(etype, ctype) case etype: res = new ctype((ctype&)node); break;
|
||||||
@ -53,19 +53,20 @@ FA_Node* copy_node_no_container_adjustments(FA_Node& node){
|
|||||||
res->refs = 0;
|
res->refs = 0;
|
||||||
res->search_mark = -1;
|
res->search_mark = -1;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* In case when transferring the ownership of this new raw pointer has failed, node is destroyed, exception is thrown */
|
/* In case when transferring the ownership of this new raw pointer has failed, node is destroyed, exception is thrown */
|
||||||
FA_Node *copy_fa_node(FA_Node& node, FA_Container &fa) {
|
FA_Node *copy_fa_node(FA_Node& node, FA_Container &fa) {
|
||||||
FA_Node* res = copy_node_no_container_adjustments(node);
|
FA_Node* res = copy_node_no_container_adjustments(node);
|
||||||
/* Can invalidate ponter res (in which case it also throws exeption, so none of this matters in the end) */
|
/* Can invalidate ponter res (in which case it also throws exeption, so none of this matters in the end) */
|
||||||
fa.registerNew(res);
|
fa.registerNew(res);
|
||||||
res->reAdd_references();
|
res->reAdd_references();
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_Node *copy_fa_node_to_another_fa(FA_Node& node, FA_Container &resultFa) {
|
FA_Node *copy_fa_node_to_another_fa(FA_Node& node, FA_Container &resultFa) {
|
||||||
FA_Node* res = copy_node_no_container_adjustments(node);
|
FA_Node* res = copy_node_no_container_adjustments(node);
|
||||||
resultFa.registerNew(res);
|
resultFa.registerNew(res);
|
||||||
return res;
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
@ -4,14 +4,16 @@
|
|||||||
#include "finite_automaton.h"
|
#include "finite_automaton.h"
|
||||||
#include "fa_first_stage_fix.h"
|
#include "fa_first_stage_fix.h"
|
||||||
|
|
||||||
FA_Node* copy_fa_node(FA_Node& node, FA_Container& fa);
|
namespace regexis024 {
|
||||||
void yay_new_start(FA_Container& fa, FA_NodePathPart* node);
|
FA_Node* copy_fa_node(FA_Node& node, FA_Container& fa);
|
||||||
void reattach_fa_node_edge(FA_Node** old_node_ptr, FA_Node* new_node);
|
void yay_new_start(FA_Container& fa, FA_NodePathPart* node);
|
||||||
void add_option_to_fork_node(FA_NodeOfForking* fnode, FA_Node* transition_dest);
|
void reattach_fa_node_edge(FA_Node** old_node_ptr, FA_Node* new_node);
|
||||||
void reattach_nxt_node(FA_NodePathPart* node, FA_Node* dest);
|
void add_option_to_fork_node(FA_NodeOfForking* fnode, FA_Node* transition_dest);
|
||||||
|
void reattach_nxt_node(FA_NodePathPart* node, FA_Node* dest);
|
||||||
|
|
||||||
/* This is a one weird operation. New node in resultFa will still point to nodes in sourceFa,
|
/* This is a one weird operation. New node in resultFa will still point to nodes in sourceFa,
|
||||||
* without increasing refcount of those nodes. YOU HAVE TO FIX IT ASAP */
|
* without increasing refcount of those nodes. YOU HAVE TO FIX IT ASAP */
|
||||||
FA_Node* copy_fa_node_to_another_fa(FA_Node& node, FA_Container& resultFa);
|
FA_Node* copy_fa_node_to_another_fa(FA_Node& node, FA_Container& resultFa);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_MISC_FA_FUNCS_H
|
#endif //LIBREGEXIS024_MISC_FA_FUNCS_H
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
#include <libregexis024fa/selarr_priority_table.h>
|
#include <libregexis024fa/selarr_priority_table.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
namespace regexis024 {
|
||||||
bool RegexPriorityTableAction_Pos::isForRange() const {
|
bool RegexPriorityTableAction_Pos::isForRange() const {
|
||||||
return second >= 0;
|
return second >= 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexPriorityTableAction_Pos::RegexPriorityTableAction_Pos(int first, int second, tracking_var_type type):
|
RegexPriorityTableAction_Pos::RegexPriorityTableAction_Pos(int first, int second, tracking_var_type_t type):
|
||||||
first(first),second(second), type(type) {}
|
first(first),second(second), type(type) {}
|
||||||
//
|
//
|
||||||
|
|
||||||
RegexPriorityTableAction::RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type type):
|
RegexPriorityTableAction::RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type_t type):
|
||||||
minimize(minimize), pos(first, second, type) {}
|
minimize(minimize), pos(first, second, type) {}
|
||||||
//
|
//
|
||||||
|
}
|
||||||
|
@ -5,22 +5,24 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <libregexis024fa/tracking_variables.h>
|
#include <libregexis024fa/tracking_variables.h>
|
||||||
|
|
||||||
struct RegexPriorityTableAction_Pos{
|
namespace regexis024 {
|
||||||
|
struct RegexPriorityTableAction_Pos{
|
||||||
/* first and second are indexes in selarr (but second can be -1 if it is unused) */
|
/* first and second are indexes in selarr (but second can be -1 if it is unused) */
|
||||||
int first;
|
int first;
|
||||||
int second;
|
int second;
|
||||||
tracking_var_type type;
|
tracking_var_type_t type;
|
||||||
bool isForRange() const;
|
bool isForRange() const;
|
||||||
|
|
||||||
RegexPriorityTableAction_Pos(int first, int second, tracking_var_type type);
|
RegexPriorityTableAction_Pos(int first, int second, tracking_var_type_t type);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct RegexPriorityTableAction{
|
struct RegexPriorityTableAction{
|
||||||
bool minimize;
|
bool minimize;
|
||||||
RegexPriorityTableAction_Pos pos;
|
RegexPriorityTableAction_Pos pos;
|
||||||
RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type type);
|
RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type_t type);
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef std::vector<RegexPriorityTableAction> RegexPriorityTable;
|
typedef std::vector<RegexPriorityTableAction> RegexPriorityTable;
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024FA_SELARR_PRIORITY_TABLE_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024FA_SELARR_PRIORITY_TABLE_H
|
||||||
|
@ -1,53 +1,48 @@
|
|||||||
#include <libregexis024fa/tracking_fa_nodes.h>
|
#include <libregexis024fa/tracking_fa_nodes.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
bool isImmMovOpcode(regex024_opcode inst) {
|
namespace regexis024 {
|
||||||
return inst == regex024_opcodes::MOV_COLARR_IMM || inst == regex024_opcodes::MOV_SELARR_IMM;
|
bool isImmMovOpcode(opcode_t inst) {
|
||||||
}
|
return inst == opcodes::MOV_COLARR_IMM || inst == opcodes::MOV_SELARR_IMM;
|
||||||
|
}
|
||||||
|
|
||||||
bool isCurPosMovOpcode(regex024_opcode inst) {
|
bool isCurPosMovOpcode(opcode_t inst) {
|
||||||
return inst == regex024_opcodes::MOV_COLARR_BTPOS || inst == regex024_opcodes::MOV_SELARR_CHPOS;
|
return inst == opcodes::MOV_COLARR_BTPOS || inst == opcodes::MOV_SELARR_CHPOS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isColarrOpcode(regex024_opcode inst) {
|
bool isColarrOpcode(opcode_t inst) {
|
||||||
return inst == regex024_opcodes::MOV_COLARR_IMM || inst == regex024_opcodes::MOV_COLARR_BTPOS;
|
return inst == opcodes::MOV_COLARR_IMM || inst == opcodes::MOV_COLARR_BTPOS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isSelarrOpcode(regex024_opcode inst) {
|
bool isSelarrOpcode(opcode_t inst) {
|
||||||
return inst == regex024_opcodes::MOV_SELARR_IMM || inst == regex024_opcodes::MOV_SELARR_CHPOS;
|
return inst == opcodes::MOV_SELARR_IMM || inst == opcodes::MOV_SELARR_CHPOS;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isTrackingFaNode(const FA_Node *n) {
|
bool isTrackingFaNode(const FA_Node *n) {
|
||||||
return n->type == track_array_mov_imm || n->type == track_array_mov_halfinvariant;
|
return n->type == track_array_mov_imm || n->type == track_array_mov_halfinvariant;
|
||||||
}
|
}
|
||||||
|
|
||||||
TrackingOperationInFa::TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key, uint64_t imm_value)
|
std::string TrackingOperationInFa::toString() const {
|
||||||
: opcode(opcode), key(key), immValue(imm_value) {}
|
|
||||||
|
|
||||||
TrackingOperationInFa::TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key)
|
|
||||||
: opcode(opcode), key(key) {}
|
|
||||||
|
|
||||||
std::string TrackingOperationInFa::toString() const {
|
|
||||||
switch (opcode){
|
switch (opcode){
|
||||||
case regex024_opcodes::MOV_COLARR_IMM:
|
case opcodes::MOV_COLARR_IMM:
|
||||||
return "colarr[" + std::to_string(key) + "] := " + std::to_string(immValue);
|
return "colarr[" + std::to_string(key) + "] := " + std::to_string(immValue);
|
||||||
case regex024_opcodes::MOV_SELARR_IMM:
|
case opcodes::MOV_SELARR_IMM:
|
||||||
return "selarr[" + std::to_string(key) + "] := " + std::to_string(immValue);
|
return "selarr[" + std::to_string(key) + "] := " + std::to_string(immValue);
|
||||||
case regex024_opcodes::MOV_COLARR_BTPOS:
|
case opcodes::MOV_COLARR_BTPOS:
|
||||||
return "colarr[" + std::to_string(key) + "] := cur byte position";
|
return "colarr[" + std::to_string(key) + "] := cur byte position";
|
||||||
case regex024_opcodes::MOV_SELARR_CHPOS:
|
case opcodes::MOV_SELARR_CHPOS:
|
||||||
return "selarr[" + std::to_string(key) + "] := cur char position";
|
return "selarr[" + std::to_string(key) + "] := cur char position";
|
||||||
default:
|
default:
|
||||||
return "wrong collection operation";
|
return "wrong collection operation";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
FA_NodePathPart* convert_to_node(const TrackingOperationInFa& op, FA_Container& fa) {
|
FA_NodePathPart* convert_to_node(const TrackingOperationInFa& op, FA_Container& fa) {
|
||||||
if (isImmMovOpcode(op.opcode)) {
|
if (isImmMovOpcode(op.opcode)) {
|
||||||
return fa.makeTrackArrayMovImm(op.opcode, op.key, op.immValue);
|
return fa.makeTrackArrayMovImm(op.opcode, op.key, op.immValue);
|
||||||
}
|
}
|
||||||
assert(isCurPosMovOpcode(op.opcode));
|
assert(isCurPosMovOpcode(op.opcode));
|
||||||
return fa.makeTrackArrayMovHalfinvariant(op.opcode, op.key);
|
return fa.makeTrackArrayMovHalfinvariant(op.opcode, op.key);
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,27 +5,24 @@
|
|||||||
#include <libregexis024fa/finite_automaton.h>
|
#include <libregexis024fa/finite_automaton.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
bool isImmMovOpcode(regex024_opcode inst);
|
namespace regexis024 {
|
||||||
bool isCurPosMovOpcode(regex024_opcode inst);
|
bool isImmMovOpcode(opcode_t inst);
|
||||||
bool isColarrOpcode(regex024_opcode inst);
|
bool isCurPosMovOpcode(opcode_t inst);
|
||||||
bool isSelarrOpcode(regex024_opcode inst);
|
bool isColarrOpcode(opcode_t inst);
|
||||||
|
bool isSelarrOpcode(opcode_t inst);
|
||||||
|
|
||||||
bool isTrackingFaNode(const FA_Node* n);
|
bool isTrackingFaNode(const FA_Node* n);
|
||||||
|
|
||||||
struct TrackingOperationInFa {
|
struct TrackingOperationInFa {
|
||||||
regex024_opcode opcode;
|
opcode_t opcode;
|
||||||
regex_tai_t key;
|
tai_t key;
|
||||||
/* Not needed for halfinvariant operations */
|
/* Not needed for halfinvariant operations */
|
||||||
uint64_t immValue;
|
uint64_t immValue;
|
||||||
|
|
||||||
TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key, uint64_t imm_value);
|
|
||||||
|
|
||||||
TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key);
|
|
||||||
|
|
||||||
std::string toString() const;
|
std::string toString() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
FA_NodePathPart* convert_to_node(const TrackingOperationInFa& op, FA_Container& fa);
|
|
||||||
|
|
||||||
|
FA_NodePathPart* convert_to_node(const TrackingOperationInFa& op, FA_Container& fa);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,14 +1,16 @@
|
|||||||
#ifndef LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H
|
#ifndef LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H
|
||||||
#define LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H
|
#define LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H
|
||||||
|
|
||||||
namespace tracking_var_types {
|
namespace regexis024 {
|
||||||
|
namespace tracking_var_types {
|
||||||
enum tracking_var_type_I {
|
enum tracking_var_type_I {
|
||||||
range,
|
range,
|
||||||
dot_cur_pos,
|
dot_cur_pos,
|
||||||
dot_immediate
|
dot_immediate
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef tracking_var_types::tracking_var_type_I tracking_var_type_t;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef tracking_var_types::tracking_var_type_I tracking_var_type;
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,7 +2,8 @@
|
|||||||
#include <libregexis024sol/sol_misc_base.h>
|
#include <libregexis024sol/sol_misc_base.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
uint32_t read_hex(REGEX_IS024_MeaningContext& ctx, int sz){
|
namespace regexis024 {
|
||||||
|
uint32_t read_hex(REGEX_IS024_MeaningContext& ctx, int sz){
|
||||||
uint32_t res = 0;
|
uint32_t res = 0;
|
||||||
for (int i = 0; i < sz; i++){
|
for (int i = 0; i < sz; i++){
|
||||||
int32_t ch = peep(ctx);
|
int32_t ch = peep(ctx);
|
||||||
@ -19,19 +20,19 @@ uint32_t read_hex(REGEX_IS024_MeaningContext& ctx, int sz){
|
|||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void unicode_in_bs_case(REGEX_IS024_MeaningContext &ctx, bool &ret_is_multicode, codeset_t &ret_set, int sz){
|
void unicode_in_bs_case(REGEX_IS024_MeaningContext &ctx, bool &ret_is_multicode, codeset_t &ret_set, int sz){
|
||||||
ret_is_multicode = false;
|
ret_is_multicode = false;
|
||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
uint32_t hc = read_hex(ctx, sz); // Might create an error
|
uint32_t hc = read_hex(ctx, sz); // Might create an error
|
||||||
ret_set = codeset_of_one_char(hc);
|
ret_set = codeset_of_one_char(hc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
backslash_expression_parsing_try_regular(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc,
|
backslash_expression_parsing_try_regular(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc,
|
||||||
bool &ret_is_multicode, codeset_t &ret_set)
|
bool &ret_is_multicode, codeset_t &ret_set)
|
||||||
{
|
{
|
||||||
int32_t leader = peep(ctx);
|
int32_t leader = peep(ctx);
|
||||||
if (ctx.error)
|
if (ctx.error)
|
||||||
return;
|
return;
|
||||||
@ -59,4 +60,5 @@ backslash_expression_parsing_try_regular(REGEX_IS024_MeaningContext &ctx, const
|
|||||||
report(ctx, "backslash in the wrong place");
|
report(ctx, "backslash in the wrong place");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -5,32 +5,33 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
struct ParseCall{
|
namespace regexis024 {
|
||||||
|
struct ParseCall{
|
||||||
virtual ~ParseCall() = default;
|
virtual ~ParseCall() = default;
|
||||||
virtual std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext& ctx) { assert(false); }
|
virtual std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext& ctx) { assert(false); }
|
||||||
virtual std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext& ctx) { assert(false); }
|
virtual std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext& ctx) { assert(false); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Top_ParseCall: public ParseCall{
|
struct Top_ParseCall: public ParseCall{
|
||||||
Command& res;
|
Command& res;
|
||||||
explicit Top_ParseCall(Command &res) : res(res) {}
|
explicit Top_ParseCall(Command &res) : res(res) {}
|
||||||
std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext &ctx) override;
|
std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext &ctx) override;
|
||||||
std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext &ctx) override;
|
std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext &ctx) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Bracker_ParseCall: public ParseCall{
|
struct Bracker_ParseCall: public ParseCall{
|
||||||
std::vector<CommandArgument>& res;
|
std::vector<CommandArgument>& res;
|
||||||
bool closingBraceEnded = false;
|
bool closingBraceEnded = false;
|
||||||
explicit Bracker_ParseCall(std::vector<CommandArgument> &res) : res(res) {}
|
explicit Bracker_ParseCall(std::vector<CommandArgument> &res) : res(res) {}
|
||||||
std::unique_ptr<ParseCall> argReadProc(REGEX_IS024_MeaningContext& ctx);
|
std::unique_ptr<ParseCall> argReadProc(REGEX_IS024_MeaningContext& ctx);
|
||||||
std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext &ctx) override;
|
std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext &ctx) override;
|
||||||
std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext &ctx) override;
|
std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext &ctx) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define call_ERROR_CHECK do { if (ctx.error) { return NULL; } } while (0)
|
#define call_ERROR_CHECK do { if (ctx.error) { return NULL; } } while (0)
|
||||||
#define call_THROW(str) do { report(ctx, "command expression: " str); return NULL; } while (0)
|
#define call_THROW(str) do { report(ctx, "command expression: " str); return NULL; } while (0)
|
||||||
|
|
||||||
std::unique_ptr<ParseCall> Top_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx) {
|
std::unique_ptr<ParseCall> Top_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx) {
|
||||||
assert(readChar(ctx) == U'!');
|
assert(readChar(ctx) == U'!');
|
||||||
int32_t ch = peep(ctx); call_ERROR_CHECK;
|
int32_t ch = peep(ctx); call_ERROR_CHECK;
|
||||||
if (ch == U'~'){
|
if (ch == U'~'){
|
||||||
@ -50,23 +51,23 @@ std::unique_ptr<ParseCall> Top_ParseCall::firstTime(REGEX_IS024_MeaningContext &
|
|||||||
return std::make_unique<Bracker_ParseCall>(res.arguments);
|
return std::make_unique<Bracker_ParseCall>(res.arguments);
|
||||||
}
|
}
|
||||||
call_THROW("top lvl: command call should be ended with ';' or '{...}'");
|
call_THROW("top lvl: command call should be ended with ';' or '{...}'");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<ParseCall> Top_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx) {
|
std::unique_ptr<ParseCall> Top_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<ParseCall> Bracker_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx) {
|
std::unique_ptr<ParseCall> Bracker_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx) {
|
||||||
assert(readChar(ctx) == U'{');
|
assert(readChar(ctx) == U'{');
|
||||||
return argReadProc(ctx);
|
return argReadProc(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<ParseCall> Bracker_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx) {
|
std::unique_ptr<ParseCall> Bracker_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx) {
|
||||||
closingBraceEnded = true;
|
closingBraceEnded = true;
|
||||||
return argReadProc(ctx);
|
return argReadProc(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<ParseCall> Bracker_ParseCall::argReadProc(REGEX_IS024_MeaningContext &ctx) {
|
std::unique_ptr<ParseCall> Bracker_ParseCall::argReadProc(REGEX_IS024_MeaningContext &ctx) {
|
||||||
repeat:
|
repeat:
|
||||||
int32_t ch = peep(ctx); call_ERROR_CHECK;
|
int32_t ch = peep(ctx); call_ERROR_CHECK;
|
||||||
if (ch == U';'){
|
if (ch == U';'){
|
||||||
@ -98,9 +99,9 @@ std::unique_ptr<ParseCall> Bracker_ParseCall::argReadProc(REGEX_IS024_MeaningCon
|
|||||||
call_THROW("brace lvl: argument ends with ';' or {...}");
|
call_THROW("brace lvl: argument ends with ';' or {...}");
|
||||||
}
|
}
|
||||||
call_THROW("brace lvl: argument starts with ';' or it's name");
|
call_THROW("brace lvl: argument starts with ';' or it's name");
|
||||||
}
|
}
|
||||||
|
|
||||||
Command command_expr_parse(REGEX_IS024_MeaningContext &ctx) {
|
Command command_expr_parse(REGEX_IS024_MeaningContext &ctx) {
|
||||||
std::vector<std::unique_ptr<ParseCall>> callStack;
|
std::vector<std::unique_ptr<ParseCall>> callStack;
|
||||||
Command res;
|
Command res;
|
||||||
callStack.push_back(std::make_unique<Top_ParseCall>(res));
|
callStack.push_back(std::make_unique<Top_ParseCall>(res));
|
||||||
@ -118,16 +119,16 @@ Command command_expr_parse(REGEX_IS024_MeaningContext &ctx) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* commands_for_codesets[] = {"word", "space", "digit", "variable", "any", "A", NULL};
|
const char* commands_for_codesets[] = {"word", "space", "digit", "variable", "any", "A", NULL};
|
||||||
|
|
||||||
bool is_command_for_charset(const Command &cmd) {
|
bool is_command_for_charset(const Command &cmd) {
|
||||||
return !cmd.tilda && cmd.arguments.empty() && is_string_in_stringset(cmd.name.c_str(), commands_for_codesets);
|
return !cmd.tilda && cmd.arguments.empty() && is_string_in_stringset(cmd.name.c_str(), commands_for_codesets);
|
||||||
}
|
}
|
||||||
|
|
||||||
void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command &cmd, codeset_t& ret)
|
void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command &cmd, codeset_t& ret)
|
||||||
{
|
{
|
||||||
if (cmd.name == "word")
|
if (cmd.name == "word")
|
||||||
ret = cc.word_constituents;
|
ret = cc.word_constituents;
|
||||||
else if (cmd.name == "space")
|
else if (cmd.name == "space")
|
||||||
@ -140,4 +141,5 @@ void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command
|
|||||||
ret = codeset_of_all;
|
ret = codeset_of_all;
|
||||||
else
|
else
|
||||||
assert(false);
|
assert(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <libregexis024sol/common_codesets.h>
|
#include <libregexis024sol/common_codesets.h>
|
||||||
|
|
||||||
CommonCodesets::CommonCodesets() {
|
namespace regexis024 {
|
||||||
|
CommonCodesets::CommonCodesets() {
|
||||||
spaces = set_add_char(spaces, U'\n');
|
spaces = set_add_char(spaces, U'\n');
|
||||||
spaces = set_add_char(spaces, U' ');
|
spaces = set_add_char(spaces, U' ');
|
||||||
spaces = set_add_char(spaces, U'\t');
|
spaces = set_add_char(spaces, U'\t');
|
||||||
@ -10,4 +11,5 @@ CommonCodesets::CommonCodesets() {
|
|||||||
digits = codeset_t({{'0', '9'}});
|
digits = codeset_t({{'0', '9'}});
|
||||||
variable_constituents = set_add_char(word_constituents, U'-');
|
variable_constituents = set_add_char(word_constituents, U'-');
|
||||||
variable_constituents = merge_sets(variable_constituents, digits);
|
variable_constituents = merge_sets(variable_constituents, digits);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,12 +3,14 @@
|
|||||||
|
|
||||||
#include <libregexis024fa/codeset.h>
|
#include <libregexis024fa/codeset.h>
|
||||||
|
|
||||||
struct CommonCodesets {
|
namespace regexis024 {
|
||||||
|
struct CommonCodesets {
|
||||||
codeset_t spaces;
|
codeset_t spaces;
|
||||||
codeset_t word_constituents;
|
codeset_t word_constituents;
|
||||||
codeset_t digits;
|
codeset_t digits;
|
||||||
codeset_t variable_constituents;
|
codeset_t variable_constituents;
|
||||||
CommonCodesets();
|
CommonCodesets();
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -23,12 +23,13 @@
|
|||||||
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
||||||
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
||||||
|
|
||||||
/* ****************************** Top */
|
namespace regexis024 {
|
||||||
|
/* ****************************** Top */
|
||||||
|
|
||||||
const char* dfa_arg_aliases_condone[] = {"forgive", "condone", "okay", "optional", "nonimportant", "ifpossible", NULL};
|
const char* dfa_arg_aliases_condone[] = {"forgive", "condone", "okay", "optional", "nonimportant", "ifpossible", NULL};
|
||||||
const char* dfa_arg_aliases_acerbic[] = {"acerbic", "angry", "pedantic", "nofork", "pure", "important", "fierce", NULL};
|
const char* dfa_arg_aliases_acerbic[] = {"acerbic", "angry", "pedantic", "nofork", "pure", "important", "fierce", NULL};
|
||||||
|
|
||||||
void dfa_command_processing(REGEX_IS024_MeaningContext &ctx, ParsingContext& pctx, const Command& cmdBuf){
|
void dfa_command_processing(REGEX_IS024_MeaningContext &ctx, ParsingContext& pctx, const Command& cmdBuf){
|
||||||
if (pctx.dfa_cmd_activated){
|
if (pctx.dfa_cmd_activated){
|
||||||
report(ctx, "repeating !dfa command");
|
report(ctx, "repeating !dfa command");
|
||||||
return;
|
return;
|
||||||
@ -48,9 +49,9 @@ void dfa_command_processing(REGEX_IS024_MeaningContext &ctx, ParsingContext& pct
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
report(ctx, "wrong arguments in !dfa command");
|
report(ctx, "wrong arguments in !dfa command");
|
||||||
}
|
}
|
||||||
|
|
||||||
void select_command_processing(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, const Command& cmdBuf){
|
void select_command_processing(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, const Command& cmdBuf){
|
||||||
if (pctx.select_cmd_encountered)
|
if (pctx.select_cmd_encountered)
|
||||||
aux_THROW("repeating !select command");
|
aux_THROW("repeating !select command");
|
||||||
pctx.select_cmd_encountered = true;
|
pctx.select_cmd_encountered = true;
|
||||||
@ -89,9 +90,9 @@ void select_command_processing(REGEX_IS024_MeaningContext& ctx, ParsingContext&
|
|||||||
pctx.is_inside_of_these_sa_subexpressions.assign(ctx.ktr.retrieval_info.size(), false);
|
pctx.is_inside_of_these_sa_subexpressions.assign(ctx.ktr.retrieval_info.size(), false);
|
||||||
/* Other info will be filled once a tracking-unit with such name will be actually found in regex */
|
/* Other info will be filled once a tracking-unit with such name will be actually found in regex */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void jump_into_madness(ctx_t& ctx, ParsingContext& pctx, FA_Container &fa, int hn){
|
void jump_into_madness(ctx_t& ctx, ParsingContext& pctx, FA_Container &fa, int hn){
|
||||||
while (true){
|
while (true){
|
||||||
int32_t pch = peep(ctx); aux_ERROR_CHECK;
|
int32_t pch = peep(ctx); aux_ERROR_CHECK;
|
||||||
if (pch != U'!'){
|
if (pch != U'!'){
|
||||||
@ -115,26 +116,26 @@ void jump_into_madness(ctx_t& ctx, ParsingContext& pctx, FA_Container &fa, int h
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka TopLvl_ParseCall::firstTime(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) {
|
chekushka TopLvl_ParseCall::firstTime(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) {
|
||||||
result.assertDefault();
|
result.assertDefault();
|
||||||
jump_into_madness(ctx, pctx, fa, 1);
|
jump_into_madness(ctx, pctx, fa, 1);
|
||||||
if (ctx.have_comment_tail)
|
if (ctx.have_comment_tail)
|
||||||
return NULL;
|
return NULL;
|
||||||
return std::make_unique<ForkLvl_ParseCall>(result);
|
return std::make_unique<ForkLvl_ParseCall>(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka TopLvl_ParseCall::afterReceive(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) {
|
chekushka TopLvl_ParseCall::afterReceive(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) {
|
||||||
jump_into_madness(ctx, pctx, fa, 2);
|
jump_into_madness(ctx, pctx, fa, 2);
|
||||||
if (!isEnd(ctx))
|
if (!isEnd(ctx))
|
||||||
call_THROW("top lvl: EOF expected");
|
call_THROW("top lvl: EOF expected");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ********************************* Bracket */
|
/* ********************************* Bracket */
|
||||||
|
|
||||||
chekushka BracketLvl_ParseCall::firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
chekushka BracketLvl_ParseCall::firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
||||||
result.assertDefault();
|
result.assertDefault();
|
||||||
assert(readChar(ctx) == U'(');
|
assert(readChar(ctx) == U'(');
|
||||||
/* sequence lvl already took care about resolving name and configuring SubtrackingNameInfo */
|
/* sequence lvl already took care about resolving name and configuring SubtrackingNameInfo */
|
||||||
@ -148,9 +149,9 @@ chekushka BracketLvl_ParseCall::firstTime(REGEX_IS024_MeaningContext& ctx, Parsi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return std::make_unique<ForkLvl_ParseCall>(tmp_ret_buff);
|
return std::make_unique<ForkLvl_ParseCall>(tmp_ret_buff);
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka BracketLvl_ParseCall::afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
chekushka BracketLvl_ParseCall::afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
||||||
if (peep(ctx) != U')')
|
if (peep(ctx) != U')')
|
||||||
call_THROW("missing ')'");
|
call_THROW("missing ')'");
|
||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
@ -161,32 +162,32 @@ chekushka BracketLvl_ParseCall::afterReceive(REGEX_IS024_MeaningContext& ctx, Pa
|
|||||||
assert(tai_slots.colarr_first >= 0 && tai_slots.colarr_first < UINT16_MAX);
|
assert(tai_slots.colarr_first >= 0 && tai_slots.colarr_first < UINT16_MAX);
|
||||||
assert(tai_slots.colarr_second >= 0 && tai_slots.colarr_second < UINT16_MAX);
|
assert(tai_slots.colarr_second >= 0 && tai_slots.colarr_second < UINT16_MAX);
|
||||||
result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
||||||
regex024_opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_first)), result);
|
opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_first)), result);
|
||||||
result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
||||||
regex024_opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_second)));
|
opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_second)));
|
||||||
}
|
}
|
||||||
if (tai_slots.stored_in_sa){
|
if (tai_slots.stored_in_sa){
|
||||||
assert(tai_slots.selarr_first >= 0 && tai_slots.selarr_first < UINT16_MAX);
|
assert(tai_slots.selarr_first >= 0 && tai_slots.selarr_first < UINT16_MAX);
|
||||||
assert(tai_slots.selarr_second >= 0 && tai_slots.selarr_second < UINT16_MAX);
|
assert(tai_slots.selarr_second >= 0 && tai_slots.selarr_second < UINT16_MAX);
|
||||||
result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
||||||
regex024_opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_first)), result);
|
opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_first)), result);
|
||||||
result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
|
||||||
regex024_opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_second)));
|
opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_second)));
|
||||||
pctx.is_inside_of_these_sa_subexpressions[namedSubexpressionId] = false;
|
pctx.is_inside_of_these_sa_subexpressions[namedSubexpressionId] = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ******************************* Fork */
|
/* ******************************* Fork */
|
||||||
|
|
||||||
chekushka ForkLvl_ParseCall::firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
chekushka ForkLvl_ParseCall::firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
||||||
result.assertDefault();
|
result.assertDefault();
|
||||||
options.emplace_back(); // Default one contains nothing. It will be overwritten
|
options.emplace_back(); // Default one contains nothing. It will be overwritten
|
||||||
return std::make_unique<Sequence_ParseCall>(options.back());
|
return std::make_unique<Sequence_ParseCall>(options.back());
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka ForkLvl_ParseCall::afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
chekushka ForkLvl_ParseCall::afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) {
|
||||||
int32_t end_reason = peep(ctx); call_ERROR_CHECK;
|
int32_t end_reason = peep(ctx); call_ERROR_CHECK;
|
||||||
if (end_reason == U'|'){
|
if (end_reason == U'|'){
|
||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
@ -194,9 +195,9 @@ chekushka ForkLvl_ParseCall::afterReceive(REGEX_IS024_MeaningContext& ctx, Parsi
|
|||||||
}
|
}
|
||||||
result = forkify(options, fa);
|
result = forkify(options, fa);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void parseBody(REGEX_IS024_MeaningContext& ctx, FA_Container& fa, SubExprCompiled& result, ParsingContext& pctx){
|
void parseBody(REGEX_IS024_MeaningContext& ctx, FA_Container& fa, SubExprCompiled& result, ParsingContext& pctx){
|
||||||
std::vector<std::shared_ptr<ParseCall>> callStack;
|
std::vector<std::shared_ptr<ParseCall>> callStack;
|
||||||
callStack.push_back(std::make_unique<TopLvl_ParseCall>(result));
|
callStack.push_back(std::make_unique<TopLvl_ParseCall>(result));
|
||||||
bool first_time = true;
|
bool first_time = true;
|
||||||
@ -222,10 +223,10 @@ void parseBody(REGEX_IS024_MeaningContext& ctx, FA_Container& fa, SubExprCompile
|
|||||||
pctx.priority_table.emplace_back(sni.minimizing, sni.selarr_first, sni.selarr_second, sni.type);
|
pctx.priority_table.emplace_back(sni.minimizing, sni.selarr_first, sni.selarr_second, sni.type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
REGEX_IS024_MeaningContext::REGEX_IS024_MeaningContext(size_t inputSize, const char *input) : input_size(inputSize),
|
REGEX_IS024_MeaningContext::REGEX_IS024_MeaningContext(size_t inputSize, const char *input) : input_size(inputSize),
|
||||||
input(reinterpret_cast<const uint8_t *>(input)) {
|
input(input) {
|
||||||
CommonCodesets codeset_collection;
|
CommonCodesets codeset_collection;
|
||||||
FA_Container fa;
|
FA_Container fa;
|
||||||
FA_Container fa_1f;
|
FA_Container fa_1f;
|
||||||
@ -277,4 +278,5 @@ REGEX_IS024_MeaningContext::REGEX_IS024_MeaningContext(size_t inputSize, const c
|
|||||||
report(*this, "Failed to compile graph representation to bytecode representation");
|
report(*this, "Failed to compile graph representation to bytecode representation");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,14 +5,11 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
// todo: SUPER HIGHT PRIORITY: MOVE all this spaces digits variable_constituents junk out of this class
|
|
||||||
// todo: also PLEEEASE, write static before literally nearly every single one little stupid function in this library
|
|
||||||
#include <libregexis024sol/part_of_expr_that_tracks.h>
|
#include <libregexis024sol/part_of_expr_that_tracks.h>
|
||||||
|
namespace regexis024 {
|
||||||
struct REGEX_IS024_MeaningContext{
|
struct REGEX_IS024_MeaningContext{
|
||||||
size_t input_size;
|
size_t input_size;
|
||||||
const uint8_t* input;
|
const char* input;
|
||||||
|
|
||||||
bool error = false;
|
bool error = false;
|
||||||
std::string error_msg;
|
std::string error_msg;
|
||||||
@ -29,6 +26,6 @@ struct REGEX_IS024_MeaningContext{
|
|||||||
uint16_t free_colarr_tai = 0;
|
uint16_t free_colarr_tai = 0;
|
||||||
|
|
||||||
REGEX_IS024_MeaningContext(size_t inputSize, const char *input);
|
REGEX_IS024_MeaningContext(size_t inputSize, const char *input);
|
||||||
};
|
};
|
||||||
|
}
|
||||||
#endif //LIBREGEXIS024_EXPR_COMPILER_H
|
#endif //LIBREGEXIS024_EXPR_COMPILER_H
|
||||||
|
@ -6,23 +6,24 @@
|
|||||||
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
||||||
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
||||||
|
|
||||||
const char* header_command_dfa_names[] = {"dfa", "determinize", NULL};
|
namespace regexis024 {
|
||||||
|
const char* header_command_dfa_names[] = {"dfa", "determinize", NULL};
|
||||||
|
|
||||||
const char* header_command_select_names[] = {"s", "select", "selarr", "selectional", NULL};
|
const char* header_command_select_names[] = {"s", "select", "selarr", "selectional", NULL};
|
||||||
|
|
||||||
bool is_header_cmd(const Command &cmd) {
|
bool is_header_cmd(const Command &cmd) {
|
||||||
return cmd.tilda || is_header_dfa_cmd(cmd), is_header_dfa_cmd(cmd);
|
return cmd.tilda || is_header_dfa_cmd(cmd), is_header_dfa_cmd(cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_header_dfa_cmd(const Command &cmd) {
|
bool is_header_dfa_cmd(const Command &cmd) {
|
||||||
return is_string_in_stringset(cmd.name.c_str(), header_command_dfa_names);
|
return is_string_in_stringset(cmd.name.c_str(), header_command_dfa_names);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_header_select_cmd(const Command &cmd) {
|
bool is_header_select_cmd(const Command &cmd) {
|
||||||
return is_string_in_stringset(cmd.name.c_str(), header_command_select_names);
|
return is_string_in_stringset(cmd.name.c_str(), header_command_select_names);
|
||||||
}
|
}
|
||||||
|
|
||||||
void int_parse_with_limit_concern(const std::string &str, REGEX_IS024_MeaningContext &ctx, size_t &res, int lim) {
|
void int_parse_with_limit_concern(const std::string &str, REGEX_IS024_MeaningContext &ctx, size_t &res, int lim) {
|
||||||
res = 0;
|
res = 0;
|
||||||
for (char ch: str){
|
for (char ch: str){
|
||||||
if (!('0' <= ch && ch <= '9'))
|
if (!('0' <= ch && ch <= '9'))
|
||||||
@ -31,4 +32,5 @@ void int_parse_with_limit_concern(const std::string &str, REGEX_IS024_MeaningCon
|
|||||||
if (res > (size_t)lim)
|
if (res > (size_t)lim)
|
||||||
aux_THROW("integer is too big");
|
aux_THROW("integer is too big");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,11 @@
|
|||||||
|
|
||||||
#include <libregexis024sol/special_terminals.h>
|
#include <libregexis024sol/special_terminals.h>
|
||||||
|
|
||||||
bool is_header_cmd(const Command& cmd);
|
namespace regexis024 {
|
||||||
bool is_header_dfa_cmd(const Command& cmd);
|
bool is_header_cmd(const Command& cmd);
|
||||||
bool is_header_select_cmd(const Command& cmd);
|
bool is_header_dfa_cmd(const Command& cmd);
|
||||||
void int_parse_with_limit_concern(const std::string& str, REGEX_IS024_MeaningContext &ctx, size_t& res, int lim);
|
bool is_header_select_cmd(const Command& cmd);
|
||||||
|
void int_parse_with_limit_concern(const std::string& str, REGEX_IS024_MeaningContext &ctx, size_t& res, int lim);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_COMMAND_RECOGNITION_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_COMMAND_RECOGNITION_H
|
||||||
|
@ -14,9 +14,10 @@
|
|||||||
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
||||||
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
||||||
|
|
||||||
/* **************************** Sequence */
|
namespace regexis024 {
|
||||||
|
/* **************************** Sequence */
|
||||||
|
|
||||||
void in_case_of_backslash(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc, FA_Container &fa, SubExprCompiled& backPart) {
|
void in_case_of_backslash(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc, FA_Container &fa, SubExprCompiled& backPart) {
|
||||||
assert(readChar(ctx) == U'\\');
|
assert(readChar(ctx) == U'\\');
|
||||||
int32_t leader = peep(ctx); aux_ERROR_CHECK;
|
int32_t leader = peep(ctx); aux_ERROR_CHECK;
|
||||||
if (leader == U'b'){
|
if (leader == U'b'){
|
||||||
@ -62,9 +63,9 @@ void in_case_of_backslash(REGEX_IS024_MeaningContext &ctx, const CommonCodesets&
|
|||||||
return; // To avoid reading leader again (it gets read in the end)
|
return; // To avoid reading leader again (it gets read in the end)
|
||||||
}
|
}
|
||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void repeat_stuff_with_check(REGEX_IS024_MeaningContext& ctx,
|
void repeat_stuff_with_check(REGEX_IS024_MeaningContext& ctx,
|
||||||
SubExprCompiled &patient, FA_Container& fa, size_t min_allowed, size_t max_allowed){
|
SubExprCompiled &patient, FA_Container& fa, size_t min_allowed, size_t max_allowed){
|
||||||
if (min_allowed > max_allowed)
|
if (min_allowed > max_allowed)
|
||||||
aux_THROW("repeat operation: min > max");
|
aux_THROW("repeat operation: min > max");
|
||||||
@ -75,9 +76,9 @@ void repeat_stuff_with_check(REGEX_IS024_MeaningContext& ctx,
|
|||||||
"выражение корректно и не вызвает бесконечного цикла, напишите об этом в жалобную книгу: "
|
"выражение корректно и не вызвает бесконечного цикла, напишите об этом в жалобную книгу: "
|
||||||
"По ссылке: file:///dev/null Ваши предложения по улучшению libregexis024 обязательно будут рассмотрены.");
|
"По ссылке: file:///dev/null Ваши предложения по улучшению libregexis024 обязательно будут рассмотрены.");
|
||||||
apply_repeat_to_subexpression(patient, fa, min_allowed, max_allowed);
|
apply_repeat_to_subexpression(patient, fa, min_allowed, max_allowed);
|
||||||
}
|
}
|
||||||
|
|
||||||
void repeat_command_processing(REGEX_IS024_MeaningContext &ctx, FA_Container &fa, std::vector<SubExprCompiled>& parts,
|
void repeat_command_processing(REGEX_IS024_MeaningContext &ctx, FA_Container &fa, std::vector<SubExprCompiled>& parts,
|
||||||
const Command& cmd){
|
const Command& cmd){
|
||||||
if (parts.empty())
|
if (parts.empty())
|
||||||
aux_THROW("no subexpression before !repeat command");
|
aux_THROW("no subexpression before !repeat command");
|
||||||
@ -107,10 +108,10 @@ void repeat_command_processing(REGEX_IS024_MeaningContext &ctx, FA_Container &fa
|
|||||||
aux_THROW("!repeat: min > max");
|
aux_THROW("!repeat: min > max");
|
||||||
repeat_stuff_with_check(ctx, parts.back(), fa, min_allowed, max_allowed); aux_ERROR_CHECK;
|
repeat_stuff_with_check(ctx, parts.back(), fa, min_allowed, max_allowed); aux_ERROR_CHECK;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
chekushka Sequence_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx, ParsingContext &pctx, FA_Container &fa) {
|
chekushka Sequence_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx, ParsingContext &pctx, FA_Container &fa) {
|
||||||
while (true) {
|
while (true) {
|
||||||
int32_t fst = peep(ctx);
|
int32_t fst = peep(ctx);
|
||||||
call_ERROR_CHECK;
|
call_ERROR_CHECK;
|
||||||
@ -179,22 +180,22 @@ chekushka Sequence_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx, Parsing
|
|||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
if (ctx.ktr.retrieval_info[id].stored_in_sa)
|
if (ctx.ktr.retrieval_info[id].stored_in_sa)
|
||||||
parts.emplace_back(subexpression_from_path(
|
parts.emplace_back(subexpression_from_path(
|
||||||
fa.makeTrackArrayMovImm(regex024_opcodes::MOV_SELARR_IMM,
|
fa.makeTrackArrayMovImm(opcodes::MOV_SELARR_IMM,
|
||||||
ctx.ktr.retrieval_info[id].selarr_first, value)));
|
ctx.ktr.retrieval_info[id].selarr_first, value)));
|
||||||
if (ctx.ktr.retrieval_info[id].stored_in_ca)
|
if (ctx.ktr.retrieval_info[id].stored_in_ca)
|
||||||
parts.emplace_back(subexpression_from_path(
|
parts.emplace_back(subexpression_from_path(
|
||||||
fa.makeTrackArrayMovImm(regex024_opcodes::MOV_COLARR_IMM,
|
fa.makeTrackArrayMovImm(opcodes::MOV_COLARR_IMM,
|
||||||
ctx.ktr.retrieval_info[id].colarr_first, value)));
|
ctx.ktr.retrieval_info[id].colarr_first, value)));
|
||||||
} else if (typeDet == U';'){
|
} else if (typeDet == U';'){
|
||||||
ensure_space_for_track_unit(ctx, name, tracking_var_types::dot_cur_pos); call_ERROR_CHECK;
|
ensure_space_for_track_unit(ctx, name, tracking_var_types::dot_cur_pos); call_ERROR_CHECK;
|
||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
if (ctx.ktr.retrieval_info[id].stored_in_sa)
|
if (ctx.ktr.retrieval_info[id].stored_in_sa)
|
||||||
parts.emplace_back(subexpression_from_path(
|
parts.emplace_back(subexpression_from_path(
|
||||||
fa.makeTrackArrayMovHalfinvariant(regex024_opcodes::MOV_SELARR_CHPOS,
|
fa.makeTrackArrayMovHalfinvariant(opcodes::MOV_SELARR_CHPOS,
|
||||||
ctx.ktr.retrieval_info[id].selarr_first)));
|
ctx.ktr.retrieval_info[id].selarr_first)));
|
||||||
if (ctx.ktr.retrieval_info[id].stored_in_ca)
|
if (ctx.ktr.retrieval_info[id].stored_in_ca)
|
||||||
parts.emplace_back(subexpression_from_path(
|
parts.emplace_back(subexpression_from_path(
|
||||||
fa.makeTrackArrayMovHalfinvariant(regex024_opcodes::MOV_COLARR_BTPOS,
|
fa.makeTrackArrayMovHalfinvariant(opcodes::MOV_COLARR_BTPOS,
|
||||||
ctx.ktr.retrieval_info[id].colarr_first)));
|
ctx.ktr.retrieval_info[id].colarr_first)));
|
||||||
} else
|
} else
|
||||||
call_THROW("Missing ; or ( in the beginning of tracking unit");
|
call_THROW("Missing ; or ( in the beginning of tracking unit");
|
||||||
@ -214,9 +215,10 @@ chekushka Sequence_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx, Parsing
|
|||||||
for (SubExprCompiled& part: parts)
|
for (SubExprCompiled& part: parts)
|
||||||
result = join(result, part);
|
result = join(result, part);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka Sequence_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx, ParsingContext &pctx, FA_Container &fa) {
|
chekushka Sequence_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx, ParsingContext &pctx, FA_Container &fa) {
|
||||||
// This is possible only if I received a bracket expression
|
// This is possible only if I received a bracket expression
|
||||||
return firstTime(ctx, pctx, fa);
|
return firstTime(ctx, pctx, fa);
|
||||||
|
}
|
||||||
}
|
}
|
@ -10,7 +10,8 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <libregexis024fa/selarr_priority_table.h>
|
#include <libregexis024fa/selarr_priority_table.h>
|
||||||
|
|
||||||
struct ParsingContext{
|
namespace regexis024 {
|
||||||
|
struct ParsingContext{
|
||||||
/* Those subexpressions, that are tracket by s`a are forbidden from nesting inside themselves */
|
/* Those subexpressions, that are tracket by s`a are forbidden from nesting inside themselves */
|
||||||
std::vector<bool> is_inside_of_these_sa_subexpressions;
|
std::vector<bool> is_inside_of_these_sa_subexpressions;
|
||||||
bool select_cmd_encountered = false;
|
bool select_cmd_encountered = false;
|
||||||
@ -27,24 +28,24 @@ struct ParsingContext{
|
|||||||
explicit ParsingContext(const CommonCodesets& cc_): cc(cc_){}
|
explicit ParsingContext(const CommonCodesets& cc_): cc(cc_){}
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef REGEX_IS024_MeaningContext ctx_t;
|
typedef REGEX_IS024_MeaningContext ctx_t;
|
||||||
struct ParseCall;
|
struct ParseCall;
|
||||||
typedef std::unique_ptr<ParseCall> chekushka;
|
typedef std::unique_ptr<ParseCall> chekushka;
|
||||||
struct ParseCall{
|
struct ParseCall{
|
||||||
SubExprCompiled& result;
|
SubExprCompiled& result;
|
||||||
explicit ParseCall(SubExprCompiled &result) : result(result) {}
|
explicit ParseCall(SubExprCompiled &result) : result(result) {}
|
||||||
virtual ~ParseCall() = default;
|
virtual ~ParseCall() = default;
|
||||||
virtual chekushka afterReceive(ctx_t& ctx, ParsingContext& pctx, FA_Container& fa) { assert(false); }
|
virtual chekushka afterReceive(ctx_t& ctx, ParsingContext& pctx, FA_Container& fa) { assert(false); }
|
||||||
virtual chekushka firstTime(ctx_t& ctx, ParsingContext& pctx, FA_Container& fa) { assert(false); }
|
virtual chekushka firstTime(ctx_t& ctx, ParsingContext& pctx, FA_Container& fa) { assert(false); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TopLvl_ParseCall: public ParseCall{
|
struct TopLvl_ParseCall: public ParseCall{
|
||||||
explicit TopLvl_ParseCall(SubExprCompiled &result) : ParseCall(result) {}
|
explicit TopLvl_ParseCall(SubExprCompiled &result) : ParseCall(result) {}
|
||||||
chekushka afterReceive(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) override;
|
chekushka afterReceive(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) override;
|
||||||
chekushka firstTime(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) override;
|
chekushka firstTime(ctx_t &ctx, ParsingContext &pctx, FA_Container &fa) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BracketLvl_ParseCall: public ParseCall{
|
struct BracketLvl_ParseCall: public ParseCall{
|
||||||
/* -1 if this is a normal bracket expression. Otherwise, it is an index in ctx.retrieval_info vector */
|
/* -1 if this is a normal bracket expression. Otherwise, it is an index in ctx.retrieval_info vector */
|
||||||
int64_t namedSubexpressionId;
|
int64_t namedSubexpressionId;
|
||||||
SubExprCompiled tmp_ret_buff;
|
SubExprCompiled tmp_ret_buff;
|
||||||
@ -52,23 +53,21 @@ struct BracketLvl_ParseCall: public ParseCall{
|
|||||||
ParseCall(result), namedSubexpressionId(namedSubexpressionId) {}
|
ParseCall(result), namedSubexpressionId(namedSubexpressionId) {}
|
||||||
chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) override;
|
chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) override;
|
||||||
chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) override;
|
chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ForkLvl_ParseCall: public ParseCall{
|
struct ForkLvl_ParseCall: public ParseCall{
|
||||||
std::vector<SubExprCompiled> options;
|
std::vector<SubExprCompiled> options;
|
||||||
explicit ForkLvl_ParseCall(SubExprCompiled &result) : ParseCall(result) {}
|
explicit ForkLvl_ParseCall(SubExprCompiled &result) : ParseCall(result) {}
|
||||||
chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
||||||
chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Sequence_ParseCall: public ParseCall{
|
struct Sequence_ParseCall: public ParseCall{
|
||||||
std::vector<SubExprCompiled> parts;
|
std::vector<SubExprCompiled> parts;
|
||||||
explicit Sequence_ParseCall(SubExprCompiled &result) :ParseCall(result) {}
|
explicit Sequence_ParseCall(SubExprCompiled &result) :ParseCall(result) {}
|
||||||
chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
||||||
chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
|
||||||
};
|
};
|
||||||
|
}
|
||||||
/* Some auxilary functions */
|
|
||||||
|
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_EPF_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_EPF_H
|
||||||
|
@ -4,20 +4,20 @@
|
|||||||
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
|
||||||
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
|
||||||
|
|
||||||
|
namespace regexis024 {
|
||||||
void for_one_type(REGEX_IS024_MeaningContext &ctx, uint16_t& free_ARR_tai, int& ARR_first, int& ARR_second,
|
void for_one_type(REGEX_IS024_MeaningContext &ctx, uint16_t& free_ARR_tai, int& ARR_first, int& ARR_second,
|
||||||
const std::string& ARR_NAME, tracking_var_type type){
|
const std::string& ARR_NAME, tracking_var_type_t type){
|
||||||
#define check_is_available() if (free_ARR_tai == UINT16_MAX) { \
|
#define check_is_available() if (free_ARR_tai == UINT16_MAX) { \
|
||||||
report(ctx, ("regex: " + ARR_NAME + ": key namespace overflow").c_str()); return;}
|
report(ctx, ("regex: " + ARR_NAME + ": key namespace overflow").c_str()); return;}
|
||||||
check_is_available()
|
check_is_available()
|
||||||
ARR_first = free_ARR_tai++;
|
ARR_first = free_ARR_tai++;
|
||||||
if (type == tracking_var_types::range){
|
if (type == tracking_var_types::range){
|
||||||
check_is_available()
|
check_is_available()
|
||||||
ARR_second = free_ARR_tai++;
|
ARR_second = free_ARR_tai++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type type) {
|
void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type_t type) {
|
||||||
size_t id = ctx.ktr.track_names[name];
|
size_t id = ctx.ktr.track_names[name];
|
||||||
/* Size of this verctor won't be changed. THis is a safe reference */
|
/* Size of this verctor won't be changed. THis is a safe reference */
|
||||||
SubtrackingNameInfo& info = ctx.ktr.retrieval_info[id];
|
SubtrackingNameInfo& info = ctx.ktr.retrieval_info[id];
|
||||||
@ -35,4 +35,5 @@ void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::str
|
|||||||
} else if (info.type != type){
|
} else if (info.type != type){
|
||||||
aux_THROW("tracking tool unit type mismatch");
|
aux_THROW("tracking tool unit type mismatch");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -4,7 +4,8 @@
|
|||||||
|
|
||||||
#include <libregexis024sol/expr_compiler.h>
|
#include <libregexis024sol/expr_compiler.h>
|
||||||
|
|
||||||
void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type type);
|
namespace regexis024 {
|
||||||
|
void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type_t type);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_TRACKING_UNITS_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_TRACKING_UNITS_H
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
// #include <libregexis024sol/part_of_expr_that_tracks.h>
|
|
||||||
|
|
@ -6,12 +6,13 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <libregexis024fa/tracking_variables.h>
|
#include <libregexis024fa/tracking_variables.h>
|
||||||
|
|
||||||
struct SubtrackingNameInfo{
|
namespace regexis024 {
|
||||||
|
struct SubtrackingNameInfo{
|
||||||
bool stored_in_ca = true;
|
bool stored_in_ca = true;
|
||||||
bool stored_in_sa = false;
|
bool stored_in_sa = false;
|
||||||
|
|
||||||
bool discovered = false;
|
bool discovered = false;
|
||||||
tracking_var_type type;
|
tracking_var_type_t type;
|
||||||
/* These fields will be -1 if unused */
|
/* These fields will be -1 if unused */
|
||||||
int colarr_first = -1;
|
int colarr_first = -1;
|
||||||
int colarr_second = -1;
|
int colarr_second = -1;
|
||||||
@ -20,12 +21,12 @@ struct SubtrackingNameInfo{
|
|||||||
bool minimizing = false;
|
bool minimizing = false;
|
||||||
int selarr_first = -1;
|
int selarr_first = -1;
|
||||||
int selarr_second = -1;
|
int selarr_second = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct KnownTrackingTools {
|
struct KnownTrackingTools {
|
||||||
std::map<std::string, int64_t> track_names;
|
std::map<std::string, int64_t> track_names;
|
||||||
std::vector<SubtrackingNameInfo> retrieval_info;
|
std::vector<SubtrackingNameInfo> retrieval_info;
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif //PART_OF_EXPR_THAT_TRACKS_H
|
#endif //PART_OF_EXPR_THAT_TRACKS_H
|
||||||
|
@ -1,19 +1,20 @@
|
|||||||
#include <libregexis024sol/sol_misc_base.h>
|
#include <libregexis024sol/sol_misc_base.h>
|
||||||
#include <libregexis024vm/utils.h>
|
#include <libregexis024vm/utils.h>
|
||||||
|
|
||||||
void report(REGEX_IS024_MeaningContext &ctx, const char *error) {
|
namespace regexis024 {
|
||||||
|
void report(REGEX_IS024_MeaningContext &ctx, const char *error) {
|
||||||
if (!ctx.error){
|
if (!ctx.error){
|
||||||
ctx.error = true;
|
ctx.error = true;
|
||||||
ctx.error_msg = error;
|
ctx.error_msg = error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isEnd(REGEX_IS024_MeaningContext &ctx) {
|
bool isEnd(REGEX_IS024_MeaningContext &ctx) {
|
||||||
return ctx.pos == ctx.input_size;
|
return ctx.pos == ctx.input_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t peep(REGEX_IS024_MeaningContext &ctx) {
|
int32_t peep(REGEX_IS024_MeaningContext &ctx) {
|
||||||
// printf("pos = %lu\n", ctx.pos);
|
// printf("pos = %lu\n", ctx.pos);
|
||||||
if (isEnd(ctx))
|
if (isEnd(ctx))
|
||||||
return -1; // This is probably the only place where getting negative return does not generate error
|
return -1; // This is probably the only place where getting negative return does not generate error
|
||||||
int32_t cp; size_t sz;
|
int32_t cp; size_t sz;
|
||||||
@ -21,10 +22,10 @@ int32_t peep(REGEX_IS024_MeaningContext &ctx) {
|
|||||||
if (cp < 0)
|
if (cp < 0)
|
||||||
report(ctx, "encoding error");
|
report(ctx, "encoding error");
|
||||||
return cp;
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t readChar(REGEX_IS024_MeaningContext &ctx) {
|
int32_t readChar(REGEX_IS024_MeaningContext &ctx) {
|
||||||
// printf("READ pos = %lu\n", ctx.pos);
|
// printf("READ pos = %lu\n", ctx.pos);
|
||||||
int32_t cp; size_t sz;
|
int32_t cp; size_t sz;
|
||||||
utf8_string_iterat(cp, sz, ctx.pos, ctx.input, ctx.input_size);
|
utf8_string_iterat(cp, sz, ctx.pos, ctx.input, ctx.input_size);
|
||||||
if (cp >= 0)
|
if (cp >= 0)
|
||||||
@ -32,13 +33,13 @@ int32_t readChar(REGEX_IS024_MeaningContext &ctx) {
|
|||||||
else
|
else
|
||||||
report(ctx, "bruh what?? How this even happened");
|
report(ctx, "bruh what?? How this even happened");
|
||||||
return cp;
|
return cp;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_REGEX024_nameConstituent(int32_t ch) {
|
bool is_REGEX024_nameConstituent(int32_t ch) {
|
||||||
return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z');
|
return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z');
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext &ctx) {
|
std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext &ctx) {
|
||||||
std::string res;
|
std::string res;
|
||||||
while (true){
|
while (true){
|
||||||
int32_t ch = peep(ctx);
|
int32_t ch = peep(ctx);
|
||||||
@ -50,6 +51,5 @@ std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext &ctx) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,16 +5,17 @@
|
|||||||
#include <libregexis024sol/expr_compiler.h>
|
#include <libregexis024sol/expr_compiler.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
void report(REGEX_IS024_MeaningContext& ctx, const char* error);
|
namespace regexis024 {
|
||||||
|
void report(REGEX_IS024_MeaningContext& ctx, const char* error);
|
||||||
|
|
||||||
bool isEnd(REGEX_IS024_MeaningContext& ctx);
|
bool isEnd(REGEX_IS024_MeaningContext& ctx);
|
||||||
int32_t peep(REGEX_IS024_MeaningContext& ctx);
|
int32_t peep(REGEX_IS024_MeaningContext& ctx);
|
||||||
int32_t readChar(REGEX_IS024_MeaningContext& ctx);
|
int32_t readChar(REGEX_IS024_MeaningContext& ctx);
|
||||||
|
|
||||||
|
|
||||||
bool is_REGEX024_nameConstituent(int32_t ch);
|
bool is_REGEX024_nameConstituent(int32_t ch);
|
||||||
/* Name in my library consists of [0-9a-zA-Z]. If the first peeped letter is not name constituent,
|
/* Name in my library consists of [0-9a-zA-Z]. If the first peeped letter is not name constituent,
|
||||||
* empty string is returned */
|
* empty string is returned */
|
||||||
std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext& ctx);
|
std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext& ctx);
|
||||||
|
}
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SOL_MISC_BASE_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SOL_MISC_BASE_H
|
||||||
|
@ -5,32 +5,34 @@
|
|||||||
#include <libregexis024sol/expr_compiler.h>
|
#include <libregexis024sol/expr_compiler.h>
|
||||||
#include <libregexis024sol/common_codesets.h>
|
#include <libregexis024sol/common_codesets.h>
|
||||||
|
|
||||||
/* This option of backslash usage should be checked last.
|
namespace regexis024 {
|
||||||
|
/* This option of backslash usage should be checked last.
|
||||||
* Function can generate error. Always check the error first */
|
* Function can generate error. Always check the error first */
|
||||||
void
|
void
|
||||||
backslash_expression_parsing_try_regular(REGEX_IS024_MeaningContext& ctx, const CommonCodesets& cc,
|
backslash_expression_parsing_try_regular(REGEX_IS024_MeaningContext& ctx, const CommonCodesets& cc,
|
||||||
bool& ret_is_multicode, codeset_t& ret_set);
|
bool& ret_is_multicode, codeset_t& ret_set);
|
||||||
|
|
||||||
struct CommandEntity;
|
struct CommandEntity;
|
||||||
struct Command;
|
struct Command;
|
||||||
struct CommandArgument;
|
struct CommandArgument;
|
||||||
|
|
||||||
struct CommandEntity{
|
struct CommandEntity{
|
||||||
std::string name;
|
std::string name;
|
||||||
std::vector<CommandArgument> arguments;
|
std::vector<CommandArgument> arguments;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CommandArgument: CommandEntity{
|
struct CommandArgument: CommandEntity{
|
||||||
bool is_empty = true;
|
bool is_empty = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Command: CommandEntity{
|
struct Command: CommandEntity{
|
||||||
bool tilda = false;
|
bool tilda = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Zlaya sobaka. Kidaet oshibki v context */
|
/* Zlaya sobaka. Kidaet oshibki v context */
|
||||||
Command command_expr_parse(REGEX_IS024_MeaningContext& ctx);
|
Command command_expr_parse(REGEX_IS024_MeaningContext& ctx);
|
||||||
bool is_command_for_charset(const Command& cmd);
|
bool is_command_for_charset(const Command& cmd);
|
||||||
void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command& cmd, codeset_t& ret);
|
void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command& cmd, codeset_t& ret);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SPECIAL_TERMINALS_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SPECIAL_TERMINALS_H
|
||||||
|
@ -6,82 +6,83 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
/* Can allow backslash (later should check that backslash expression is not multicharar or empty */
|
namespace regexis024 {
|
||||||
bool soundsLikeCharOrRangeStart(int32_t peeped) {
|
/* Can allow backslash (later should check that backslash expression is not multicharar or empty */
|
||||||
|
bool soundsLikeCharOrRangeStart(int32_t peeped) {
|
||||||
return peeped >= 0 && (peeped != U'[' && peeped != U']' && peeped != U'!' && \
|
return peeped >= 0 && (peeped != U'[' && peeped != U']' && peeped != U'!' && \
|
||||||
peeped != '^' && peeped != '&' && peeped != '-');
|
peeped != '^' && peeped != '&' && peeped != '-');
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef REGEX_IS024_MeaningContext ctx_t;
|
typedef REGEX_IS024_MeaningContext ctx_t;
|
||||||
|
|
||||||
struct ParseCall;
|
struct ParseCall;
|
||||||
typedef std::shared_ptr<ParseCall> chekushka;
|
typedef std::shared_ptr<ParseCall> chekushka;
|
||||||
|
|
||||||
struct ParseCall{
|
struct ParseCall{
|
||||||
codeset_t& result;
|
codeset_t& result;
|
||||||
|
|
||||||
explicit ParseCall(codeset_t &result) : result(result) {}
|
explicit ParseCall(codeset_t &result) : result(result) {}
|
||||||
virtual ~ParseCall() = default;
|
virtual ~ParseCall() = default;
|
||||||
virtual chekushka afterReceive(ctx_t& ctx, const CommonCodesets& cc) { assert(false); }
|
virtual chekushka afterReceive(ctx_t& ctx, const CommonCodesets& cc) { assert(false); }
|
||||||
virtual chekushka firstTime(ctx_t& ctx, const CommonCodesets& cc) { assert(false); }
|
virtual chekushka firstTime(ctx_t& ctx, const CommonCodesets& cc) { assert(false); }
|
||||||
};
|
};
|
||||||
|
|
||||||
#define call_ERROR_CHECK do { if (ctx.error) { return NULL; } } while (0)
|
#define call_ERROR_CHECK do { if (ctx.error) { return NULL; } } while (0)
|
||||||
#define call_THROW(str) do { report(ctx, "square bracket expression: " str); return NULL; } while (0)
|
#define call_THROW(str) do { report(ctx, "square bracket expression: " str); return NULL; } while (0)
|
||||||
|
|
||||||
/* [...] */
|
/* [...] */
|
||||||
struct ZeroLvl_ParseCall: public ParseCall{
|
struct ZeroLvl_ParseCall: public ParseCall{
|
||||||
explicit ZeroLvl_ParseCall(codeset_t &result) : ParseCall(result) {}
|
explicit ZeroLvl_ParseCall(codeset_t &result) : ParseCall(result) {}
|
||||||
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ...&...&... */
|
/* ...&...&... */
|
||||||
struct FirstLvl_ParseCall: public ParseCall{
|
struct FirstLvl_ParseCall: public ParseCall{
|
||||||
codeset_t ret_buf_for_new;
|
codeset_t ret_buf_for_new;
|
||||||
bool got_one = false;
|
bool got_one = false;
|
||||||
explicit FirstLvl_ParseCall(codeset_t& result) : ParseCall(result) {}
|
explicit FirstLvl_ParseCall(codeset_t& result) : ParseCall(result) {}
|
||||||
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ab[]vgd[]eyo[]zhz */
|
/* ab[]vgd[]eyo[]zhz */
|
||||||
struct SecondLvl_ParseCall: public ParseCall{
|
struct SecondLvl_ParseCall: public ParseCall{
|
||||||
codeset_t ret_buf_for_new;
|
codeset_t ret_buf_for_new;
|
||||||
explicit SecondLvl_ParseCall(codeset_t& result) : ParseCall(result) {}
|
explicit SecondLvl_ParseCall(codeset_t& result) : ParseCall(result) {}
|
||||||
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ^... */
|
/* ^... */
|
||||||
struct CircumflexLvl_ParseCall: public ParseCall{
|
struct CircumflexLvl_ParseCall: public ParseCall{
|
||||||
codeset_t ret_buf_for_new;
|
codeset_t ret_buf_for_new;
|
||||||
explicit CircumflexLvl_ParseCall(codeset_t& result) : ParseCall(result) {}
|
explicit CircumflexLvl_ParseCall(codeset_t& result) : ParseCall(result) {}
|
||||||
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka afterReceive(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
chekushka firstTime(ctx_t &ctx, const CommonCodesets& cc) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* ********* ZeroLvl_ParseCall ********** */
|
/* ********* ZeroLvl_ParseCall ********** */
|
||||||
|
|
||||||
chekushka ZeroLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka ZeroLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
assert(readChar(ctx) == U'[');
|
assert(readChar(ctx) == U'[');
|
||||||
return std::make_shared<FirstLvl_ParseCall>(result);
|
return std::make_shared<FirstLvl_ParseCall>(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka ZeroLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka ZeroLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
if (peep(ctx) != U']')
|
if (peep(ctx) != U']')
|
||||||
call_THROW("lvl 0: missing ]");
|
call_THROW("lvl 0: missing ]");
|
||||||
readChar(ctx);
|
readChar(ctx);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ********* FirstLvl_ParseCall ********** */
|
/* ********* FirstLvl_ParseCall ********** */
|
||||||
|
|
||||||
chekushka FirstLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka FirstLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
return std::make_shared<SecondLvl_ParseCall>(result);
|
return std::make_shared<SecondLvl_ParseCall>(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka FirstLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka FirstLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
if (got_one)
|
if (got_one)
|
||||||
result = intersect_sets(result, ret_buf_for_new);
|
result = intersect_sets(result, ret_buf_for_new);
|
||||||
else
|
else
|
||||||
@ -91,11 +92,11 @@ chekushka FirstLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc)
|
|||||||
return std::make_shared<SecondLvl_ParseCall>(ret_buf_for_new);
|
return std::make_shared<SecondLvl_ParseCall>(ret_buf_for_new);
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ********* SecondLvl_ParseCall ********** */
|
/* ********* SecondLvl_ParseCall ********** */
|
||||||
|
|
||||||
chekushka SecondLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka SecondLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
repeat:
|
repeat:
|
||||||
int32_t ch = peep(ctx); call_ERROR_CHECK;
|
int32_t ch = peep(ctx); call_ERROR_CHECK;
|
||||||
if (ch == U'^'){
|
if (ch == U'^'){
|
||||||
@ -147,28 +148,28 @@ chekushka SecondLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
|||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka SecondLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka SecondLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
result = merge_sets(result, ret_buf_for_new);
|
result = merge_sets(result, ret_buf_for_new);
|
||||||
return firstTime(ctx, cc);
|
return firstTime(ctx, cc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ********* CircumflexLvl_ParseCall ********* */
|
/* ********* CircumflexLvl_ParseCall ********* */
|
||||||
|
|
||||||
chekushka CircumflexLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka CircumflexLvl_ParseCall::firstTime(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
assert(readChar(ctx) == U'^');
|
assert(readChar(ctx) == U'^');
|
||||||
return std::make_shared<FirstLvl_ParseCall>(ret_buf_for_new);
|
return std::make_shared<FirstLvl_ParseCall>(ret_buf_for_new);
|
||||||
}
|
}
|
||||||
|
|
||||||
chekushka CircumflexLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
chekushka CircumflexLvl_ParseCall::afterReceive(ctx_t &ctx, const CommonCodesets& cc) {
|
||||||
result = invert_set(ret_buf_for_new);
|
result = invert_set(ret_buf_for_new);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Aaaaaaaaand... The function we have all been waiting for so long! */
|
/* Aaaaaaaaand... The function we have all been waiting for so long! */
|
||||||
codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc) {
|
codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc) {
|
||||||
std::vector<std::shared_ptr<ParseCall>> callStack;
|
std::vector<std::shared_ptr<ParseCall>> callStack;
|
||||||
codeset_t res;
|
codeset_t res;
|
||||||
callStack.push_back(std::make_shared<ZeroLvl_ParseCall>(res));
|
callStack.push_back(std::make_shared<ZeroLvl_ParseCall>(res));
|
||||||
@ -186,4 +187,5 @@ codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext &ctx, const CommonCod
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <libregexis024sol/expr_compiler.h>
|
#include <libregexis024sol/expr_compiler.h>
|
||||||
#include <libregexis024sol/common_codesets.h>
|
#include <libregexis024sol/common_codesets.h>
|
||||||
|
|
||||||
codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext& ctx, const CommonCodesets& cc);
|
namespace regexis024 {
|
||||||
|
codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext& ctx, const CommonCodesets& cc);
|
||||||
|
}
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SQUARE_BRACKET_EXPRESSION_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SQUARE_BRACKET_EXPRESSION_H
|
||||||
|
@ -3,11 +3,12 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
SubExprCompiled subexpr_charset_reading_filter(const codeset_t &codeset, FA_Container &fa) {
|
namespace regexis024 {
|
||||||
|
SubExprCompiled subexpr_charset_reading_filter(const codeset_t &codeset, FA_Container &fa) {
|
||||||
return subexpression_from_path(fa.makeOneCharRead(codeset, false));
|
return subexpression_from_path(fa.makeOneCharRead(codeset, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
SubExprCompiled join(const SubExprCompiled &A, const SubExprCompiled &B) {
|
SubExprCompiled join(const SubExprCompiled &A, const SubExprCompiled &B) {
|
||||||
if (!A.start)
|
if (!A.start)
|
||||||
return B;
|
return B;
|
||||||
if (!B.start)
|
if (!B.start)
|
||||||
@ -19,18 +20,18 @@ SubExprCompiled join(const SubExprCompiled &A, const SubExprCompiled &B) {
|
|||||||
res.ends = B.ends;
|
res.ends = B.ends;
|
||||||
res.can_be_empty = A.can_be_empty && B.can_be_empty;
|
res.can_be_empty = A.can_be_empty && B.can_be_empty;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
SubExprCompiled subexpression_from_path(FA_NodePathPart *node) {
|
SubExprCompiled subexpression_from_path(FA_NodePathPart *node) {
|
||||||
SubExprCompiled res;
|
SubExprCompiled res;
|
||||||
res.start = node;
|
res.start = node;
|
||||||
res.ends.push_back(&(node->nxt_node));
|
res.ends.push_back(&(node->nxt_node));
|
||||||
/* There is only one char reading path node type */
|
/* There is only one char reading path node type */
|
||||||
res.can_be_empty = (node->type != one_char_read);
|
res.can_be_empty = (node->type != one_char_read);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
SubExprCompiled RobertAngier(const SubExprCompiled& source, FA_Container& fa) {
|
SubExprCompiled RobertAngier(const SubExprCompiled& source, FA_Container& fa) {
|
||||||
SubExprCompiled res;
|
SubExprCompiled res;
|
||||||
if (!source.start)
|
if (!source.start)
|
||||||
return res;
|
return res;
|
||||||
@ -68,9 +69,9 @@ SubExprCompiled RobertAngier(const SubExprCompiled& source, FA_Container& fa) {
|
|||||||
for (Marked& mrkd: searched)
|
for (Marked& mrkd: searched)
|
||||||
mrkd.original->search_mark = -1;
|
mrkd.original->search_mark = -1;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void reattach_all_ends_to_one_node(SubExprCompiled& patient, FA_Node* node){
|
void reattach_all_ends_to_one_node(SubExprCompiled& patient, FA_Node* node){
|
||||||
assert(node);
|
assert(node);
|
||||||
assert(patient.start);
|
assert(patient.start);
|
||||||
for (FA_Node** end: patient.ends){
|
for (FA_Node** end: patient.ends){
|
||||||
@ -78,9 +79,9 @@ void reattach_all_ends_to_one_node(SubExprCompiled& patient, FA_Node* node){
|
|||||||
printf("DEBUG %lu->->->->->%lu\n", patient.start->nodeId, node->nodeId);
|
printf("DEBUG %lu->->->->->%lu\n", patient.start->nodeId, node->nodeId);
|
||||||
reattach_fa_node_edge(end, node);
|
reattach_fa_node_edge(end, node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void apply_repeat_to_subexpression(SubExprCompiled &patient, FA_Container& fa, size_t min_allowed, size_t max_allowed) {
|
void apply_repeat_to_subexpression(SubExprCompiled &patient, FA_Container& fa, size_t min_allowed, size_t max_allowed) {
|
||||||
assert(min_allowed <= max_allowed && min_allowed <= REGEXIS024_MAX_REPEAT);
|
assert(min_allowed <= max_allowed && min_allowed <= REGEXIS024_MAX_REPEAT);
|
||||||
if (!patient.start)
|
if (!patient.start)
|
||||||
return;
|
return;
|
||||||
@ -144,9 +145,9 @@ void apply_repeat_to_subexpression(SubExprCompiled &patient, FA_Container& fa, s
|
|||||||
}
|
}
|
||||||
if (min_allowed == 0)
|
if (min_allowed == 0)
|
||||||
patient.can_be_empty = true;
|
patient.can_be_empty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
SubExprCompiled forkify(const std::vector<SubExprCompiled> &options, FA_Container& fa){
|
SubExprCompiled forkify(const std::vector<SubExprCompiled> &options, FA_Container& fa){
|
||||||
SubExprCompiled result;
|
SubExprCompiled result;
|
||||||
size_t non_empty = 0;
|
size_t non_empty = 0;
|
||||||
result.can_be_empty = false;
|
result.can_be_empty = false;
|
||||||
@ -177,8 +178,9 @@ SubExprCompiled forkify(const std::vector<SubExprCompiled> &options, FA_Containe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SubExprCompiled::assertDefault() {
|
void SubExprCompiled::assertDefault() {
|
||||||
assert(!start && ends.empty() && can_be_empty);
|
assert(!start && ends.empty() && can_be_empty);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,30 +3,31 @@
|
|||||||
|
|
||||||
#include <libregexis024fa/finite_automaton.h>
|
#include <libregexis024fa/finite_automaton.h>
|
||||||
|
|
||||||
struct SubExprCompiled{
|
namespace regexis024 {
|
||||||
|
struct SubExprCompiled{
|
||||||
FA_Node* start = NULL;
|
FA_Node* start = NULL;
|
||||||
/* After putting there values from neighbour vectors in nodes, these vectors must not change size */
|
/* After putting there values from neighbour vectors in nodes, these vectors must not change size */
|
||||||
std::vector<FA_Node**> ends;
|
std::vector<FA_Node**> ends;
|
||||||
bool can_be_empty = true;
|
bool can_be_empty = true;
|
||||||
|
|
||||||
void assertDefault();
|
void assertDefault();
|
||||||
};
|
};
|
||||||
|
|
||||||
SubExprCompiled subexpr_charset_reading_filter(const codeset_t& codeset, FA_Container& fa);
|
SubExprCompiled subexpr_charset_reading_filter(const codeset_t& codeset, FA_Container& fa);
|
||||||
|
|
||||||
SubExprCompiled join(const SubExprCompiled& A, const SubExprCompiled& B);
|
SubExprCompiled join(const SubExprCompiled& A, const SubExprCompiled& B);
|
||||||
|
|
||||||
SubExprCompiled forkify(const std::vector<SubExprCompiled>& options, FA_Container& fa);
|
SubExprCompiled forkify(const std::vector<SubExprCompiled>& options, FA_Container& fa);
|
||||||
|
|
||||||
SubExprCompiled subexpression_from_path(FA_NodePathPart* node);
|
SubExprCompiled subexpression_from_path(FA_NodePathPart* node);
|
||||||
|
|
||||||
/* And then Robert Angier said `It's prestige time` and prestiged all over the place.
|
/* And then Robert Angier said `It's prestige time` and prestiged all over the place.
|
||||||
* If you still don't get it, this function copies section of NFA of regexp */
|
* If you still don't get it, this function copies section of NFA of regexp */
|
||||||
SubExprCompiled RobertAngier(const SubExprCompiled& source, FA_Container& fa);
|
SubExprCompiled RobertAngier(const SubExprCompiled& source, FA_Container& fa);
|
||||||
|
|
||||||
#define REGEXIS024_MAX_REPEAT 64
|
#define REGEXIS024_MAX_REPEAT 64
|
||||||
|
|
||||||
/* pass REGEXIS024_MAX_REPEAT + 1 as max_allowed to allow infinite repeat */
|
/* pass REGEXIS024_MAX_REPEAT + 1 as max_allowed to allow infinite repeat */
|
||||||
void apply_repeat_to_subexpression(SubExprCompiled& patient, FA_Container& fa, size_t min_allowed, size_t max_allowed);
|
void apply_repeat_to_subexpression(SubExprCompiled& patient, FA_Container& fa, size_t min_allowed, size_t max_allowed);
|
||||||
|
}
|
||||||
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SUBEXPR_FA_TRANSFORMED_H
|
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SUBEXPR_FA_TRANSFORMED_H
|
||||||
|
@ -11,15 +11,17 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
using namespace regexis024;
|
||||||
|
|
||||||
struct assembler_context_bookmark{
|
struct assembler_context_bookmark{
|
||||||
regex_near_ptr_t pos_in_r024program;
|
near_ptr_t pos_in_r024program;
|
||||||
int LINE;
|
int LINE;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pending_bookmark{
|
struct pending_bookmark{
|
||||||
/* Must fill this byte with pos of pos_in_r024program in assembler_context_bookmark
|
/* Must fill this byte with pos of pos_in_r024program in assembler_context_bookmark
|
||||||
* In a sense, this is a pointer to a NULL pointer that is yet to become normal kinda pointer */
|
* In a sense, this is a pointer to a NULL pointer that is yet to become normal kinda pointer */
|
||||||
regex_near_ptr_t pos_in_r024program;
|
near_ptr_t pos_in_r024program;
|
||||||
const char* name;
|
const char* name;
|
||||||
/* LINE of the reference is needed in case of error */
|
/* LINE of the reference is needed in case of error */
|
||||||
int LINE;
|
int LINE;
|
||||||
@ -46,7 +48,7 @@ struct assembler_context{
|
|||||||
}
|
}
|
||||||
/* pending bookmerk requests should be added only with beg_for_bookmark method,
|
/* pending bookmerk requests should be added only with beg_for_bookmark method,
|
||||||
* or else SEGFAULT will be your frequent guest */
|
* or else SEGFAULT will be your frequent guest */
|
||||||
*reinterpret_cast<regex_near_ptr_t *>(&result[br.pos_in_r024program]) = bookmarks[br.name].pos_in_r024program;
|
*reinterpret_cast<near_ptr_t *>(&result[br.pos_in_r024program]) = bookmarks[br.name].pos_in_r024program;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,8 +11,9 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
// TODO: apply here my new change in near pointer size
|
using namespace regexis024;
|
||||||
|
|
||||||
struct landing_place_resolvance{
|
struct landing_place_resolvance{
|
||||||
size_t name_id;
|
size_t name_id;
|
||||||
@ -34,12 +35,14 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
|
|||||||
};
|
};
|
||||||
uint64_t used_names = 0;
|
uint64_t used_names = 0;
|
||||||
/* From program position -> to names[ind] & */
|
/* From program position -> to names[ind] & */
|
||||||
std::map<regex_near_ptr_t, landing_place_resolvance> bookmarks;
|
std::map<near_ptr_t, landing_place_resolvance> bookmarks;
|
||||||
regex_near_ptr_t IP = 0;
|
near_ptr_t IP = 0;
|
||||||
|
|
||||||
auto check_inboundness = [&](int region){
|
auto check_inboundness = [&](int region){
|
||||||
if (!vmprog_check_inboundness(prgSize, IP, region))
|
if (!vmprog_check_inboundness(prgSize, IP, region)) {
|
||||||
exitf("This program can't be decomposed into commands in a trivial way");
|
fprintf(stderr, "This program can't be decomposed into commands in a trivial way");
|
||||||
|
std::terminate();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
auto extract_b = [&]() -> uint8_t{
|
auto extract_b = [&]() -> uint8_t{
|
||||||
check_inboundness(1);
|
check_inboundness(1);
|
||||||
@ -60,19 +63,19 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
|
|||||||
auto extract_instruction = [&]() -> uint8_t{
|
auto extract_instruction = [&]() -> uint8_t{
|
||||||
return extract_b();
|
return extract_b();
|
||||||
};
|
};
|
||||||
auto extract_sslot_id = [&]() -> regex_sslot_id_t{
|
auto extract_sslot_id = [&]() -> sslot_id_t{
|
||||||
return extract_dw();
|
return extract_dw();
|
||||||
};
|
};
|
||||||
auto extract_near_pointer = [&]() -> regex_near_ptr_t{
|
auto extract_near_pointer = [&]() -> near_ptr_t{
|
||||||
return extract_qw();
|
return extract_qw();
|
||||||
};
|
};
|
||||||
auto extract_track_array_index = [&]() -> regex_tai_t{
|
auto extract_track_array_index = [&]() -> tai_t{
|
||||||
return extract_w();
|
return extract_w();
|
||||||
};
|
};
|
||||||
|
|
||||||
bool second_phase = false;
|
bool second_phase = false;
|
||||||
|
|
||||||
auto fph_register_landing = [&](regex_near_ptr_t pos){
|
auto fph_register_landing = [&](near_ptr_t pos){
|
||||||
if (!second_phase){
|
if (!second_phase){
|
||||||
if (bookmarks.count(pos) == 0){
|
if (bookmarks.count(pos) == 0){
|
||||||
if (used_names == names.size())
|
if (used_names == names.size())
|
||||||
@ -83,15 +86,17 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
auto get_bookmark_in_2phase = [&](regex_near_ptr_t pos) -> std::string {
|
auto get_bookmark_in_2phase = [&](near_ptr_t pos) -> std::string {
|
||||||
if (bookmarks.count(pos) == 0)
|
if (bookmarks.count(pos) == 0) {
|
||||||
exitf("bruh");
|
fprintf(stderr, "Bruh\n");
|
||||||
|
std::terminate();
|
||||||
|
}
|
||||||
return names[bookmarks[pos].name_id];
|
return names[bookmarks[pos].name_id];
|
||||||
};
|
};
|
||||||
|
|
||||||
auto one_reading = [&](){
|
auto one_reading = [&](){
|
||||||
while (IP < prgSize) {
|
while (IP < prgSize) {
|
||||||
regex_near_ptr_t start_pos = IP;
|
near_ptr_t start_pos = IP;
|
||||||
if (second_phase){
|
if (second_phase){
|
||||||
if (bookmarks.count(IP) != 0){
|
if (bookmarks.count(IP) != 0){
|
||||||
printf("%s:\n", get_bookmark_in_2phase(IP).c_str());
|
printf("%s:\n", get_bookmark_in_2phase(IP).c_str());
|
||||||
@ -102,11 +107,11 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
|
|||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
#define secPrint(fmt, ...) if (second_phase) {printf("% 3lu) " fmt, start_pos, __VA_ARGS__);} } break;
|
#define secPrint(fmt, ...) if (second_phase) {printf("% 3lu) " fmt, start_pos, __VA_ARGS__);} } break;
|
||||||
#define secPrintNoArg(str) if (second_phase) {printf("% 3lu) " str, start_pos);} } break;
|
#define secPrintNoArg(str) if (second_phase) {printf("% 3lu) " str, start_pos);} } break;
|
||||||
#define instCase(oper_code) case regex024_opcodes::oper_code: {
|
#define instCase(oper_code) case opcodes::oper_code: {
|
||||||
#define jcMess(cond, sz_uppercase, x_t, extract_method, printf_sign) \
|
#define jcMess(cond, sz_uppercase, x_t, extract_method, printf_sign) \
|
||||||
instCase(JC ## cond ## _ ## sz_uppercase) \
|
instCase(JC ## cond ## _ ## sz_uppercase) \
|
||||||
x_t x = extract_method(); \
|
x_t x = extract_method(); \
|
||||||
regex_near_ptr_t dest = extract_near_pointer(); \
|
near_ptr_t dest = extract_near_pointer(); \
|
||||||
fph_register_landing(dest); \
|
fph_register_landing(dest); \
|
||||||
secPrint("JC" #cond "_" #sz_uppercase " %" printf_sign " $%s\n", x, get_bookmark_in_2phase(dest).c_str())
|
secPrint("JC" #cond "_" #sz_uppercase " %" printf_sign " $%s\n", x, get_bookmark_in_2phase(dest).c_str())
|
||||||
#define jcCacaphony(cond) \
|
#define jcCacaphony(cond) \
|
||||||
@ -131,22 +136,22 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
|
|||||||
|
|
||||||
instCase(FORK)
|
instCase(FORK)
|
||||||
uint32_t ssid = extract_sslot_id();
|
uint32_t ssid = extract_sslot_id();
|
||||||
regex_near_ptr_t dest = extract_near_pointer();
|
near_ptr_t dest = extract_near_pointer();
|
||||||
fph_register_landing(dest);
|
fph_register_landing(dest);
|
||||||
secPrint("FORK %u $%s\n", ssid, get_bookmark_in_2phase(dest).c_str())
|
secPrint("FORK %u $%s\n", ssid, get_bookmark_in_2phase(dest).c_str())
|
||||||
simpleDimple(MATCH)
|
simpleDimple(MATCH)
|
||||||
simpleDimple(DIE)
|
simpleDimple(DIE)
|
||||||
instCase(PARAM_READ_SS_NUMBER)
|
instCase(PARAM_READ_SS_NUMBER)
|
||||||
regex_sslot_id_t ssid_max_plus_one = extract_sslot_id();
|
sslot_id_t ssid_max_plus_one = extract_sslot_id();
|
||||||
secPrint("PARAM_READ_SS_NUMBER %u\n", ssid_max_plus_one)
|
secPrint("PARAM_READ_SS_NUMBER %u\n", ssid_max_plus_one)
|
||||||
instCase(PARAM_FORK_SS_NUMBER)
|
instCase(PARAM_FORK_SS_NUMBER)
|
||||||
regex_sslot_id_t ssid_max_plus_one = extract_sslot_id();
|
sslot_id_t ssid_max_plus_one = extract_sslot_id();
|
||||||
secPrint("PARAM_FORK_SS_NUMBER %u\n", ssid_max_plus_one)
|
secPrint("PARAM_FORK_SS_NUMBER %u\n", ssid_max_plus_one)
|
||||||
instCase(PARAM_SELARR_LEN)
|
instCase(PARAM_SELARR_LEN)
|
||||||
regex_tai_t tai_max_plus_one = extract_track_array_index();
|
tai_t tai_max_plus_one = extract_track_array_index();
|
||||||
secPrint("PARAM_SELARR_LEN %hu\n", tai_max_plus_one)
|
secPrint("PARAM_SELARR_LEN %hu\n", tai_max_plus_one)
|
||||||
instCase(PARAM_COLSIFTFUNC_SET)
|
instCase(PARAM_COLSIFTFUNC_SET)
|
||||||
regex_near_ptr_t entry = extract_near_pointer();
|
near_ptr_t entry = extract_near_pointer();
|
||||||
fph_register_landing(entry);
|
fph_register_landing(entry);
|
||||||
secPrint("PARAM_COLSIFTFUNC_SET $%s\n", get_bookmark_in_2phase(entry).c_str())
|
secPrint("PARAM_COLSIFTFUNC_SET $%s\n", get_bookmark_in_2phase(entry).c_str())
|
||||||
simpleDimple(PARAM_COLSIFTFUNC_WIPE)
|
simpleDimple(PARAM_COLSIFTFUNC_WIPE)
|
||||||
@ -156,36 +161,37 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
|
|||||||
instCase(MSG_FED_INPUT_EXTENDED)
|
instCase(MSG_FED_INPUT_EXTENDED)
|
||||||
uint8_t left = extract_b();
|
uint8_t left = extract_b();
|
||||||
uint8_t right = extract_b();
|
uint8_t right = extract_b();
|
||||||
regex_sslot_id_t part = extract_sslot_id();
|
sslot_id_t part = extract_sslot_id();
|
||||||
secPrint("MSG_FED_INPUT_EXTENDED %hhu %hhu %u\n", left, right, part)
|
secPrint("MSG_FED_INPUT_EXTENDED %hhu %hhu %u\n", left, right, part)
|
||||||
instCase(DMOV_RABX_SELARR)
|
instCase(DMOV_RABX_SELARR)
|
||||||
regex_tai_t i = extract_track_array_index();
|
tai_t i = extract_track_array_index();
|
||||||
secPrint("DMOV_RABX_SELARR %hu\n", i)
|
secPrint("DMOV_RABX_SELARR %hu\n", i)
|
||||||
instCase(DDIST_RABX_SELARR)
|
instCase(DDIST_RABX_SELARR)
|
||||||
regex_tai_t s = extract_track_array_index();
|
tai_t s = extract_track_array_index();
|
||||||
regex_tai_t e = extract_track_array_index();
|
tai_t e = extract_track_array_index();
|
||||||
secPrint("DDIST_RABX_SELARR %hu %hu\n", s, e);
|
secPrint("DDIST_RABX_SELARR %hu %hu\n", s, e);
|
||||||
simpleDimple(SIFTPRIOR_MIN_RABX)
|
simpleDimple(SIFTPRIOR_MIN_RABX)
|
||||||
simpleDimple(SIFTPRIOR_MAX_RABX)
|
simpleDimple(SIFTPRIOR_MAX_RABX)
|
||||||
simpleDimple(SIFT_DONE)
|
simpleDimple(SIFT_DONE)
|
||||||
instCase(MOV_COLARR_IMM)
|
instCase(MOV_COLARR_IMM)
|
||||||
regex_tai_t tai = extract_track_array_index();
|
tai_t tai = extract_track_array_index();
|
||||||
uint64_t imm = extract_qw();
|
uint64_t imm = extract_qw();
|
||||||
secPrint("MOV_COLARR_IMM %hu %lu\n", tai, imm);
|
secPrint("MOV_COLARR_IMM %hu %lu\n", tai, imm);
|
||||||
instCase(MOV_COLARR_BTPOS)
|
instCase(MOV_COLARR_BTPOS)
|
||||||
regex_tai_t tai = extract_track_array_index();
|
tai_t tai = extract_track_array_index();
|
||||||
secPrint("MOV_COLARR_BTPOS %hu\n", tai);
|
secPrint("MOV_COLARR_BTPOS %hu\n", tai);
|
||||||
instCase(MOV_SELARR_IMM)
|
instCase(MOV_SELARR_IMM)
|
||||||
regex_tai_t tai = extract_track_array_index();
|
tai_t tai = extract_track_array_index();
|
||||||
uint64_t imm = extract_qw();
|
uint64_t imm = extract_qw();
|
||||||
secPrint("MOV_SELARR_IMM %hu %lu\n", tai, imm);
|
secPrint("MOV_SELARR_IMM %hu %lu\n", tai, imm);
|
||||||
instCase(MOV_SELARR_CHPOS)
|
instCase(MOV_SELARR_CHPOS)
|
||||||
regex_tai_t tai = extract_track_array_index();
|
tai_t tai = extract_track_array_index();
|
||||||
secPrint("MOV_SELARR_CHPOS %hu\n", tai);
|
secPrint("MOV_SELARR_CHPOS %hu\n", tai);
|
||||||
simpleDimple(INIT)
|
simpleDimple(INIT)
|
||||||
simpleDimple(THROW)
|
simpleDimple(THROW)
|
||||||
default:
|
default:
|
||||||
exitf("Bad opcode\n");
|
fprintf(stderr, "Bad opcode\n");
|
||||||
|
std::terminate();
|
||||||
#undef secPrint
|
#undef secPrint
|
||||||
#undef secPrintNoArg
|
#undef secPrintNoArg
|
||||||
#undef instCase
|
#undef instCase
|
||||||
|
@ -2,12 +2,16 @@
|
|||||||
#include <libregexis024vm/utils.h>
|
#include <libregexis024vm/utils.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
using namespace regexis024;
|
||||||
|
|
||||||
void test_ccs_fnc(const codeset_t &got, const codeset_t &expected){
|
void test_ccs_fnc(const codeset_t &got, const codeset_t &expected){
|
||||||
static int id = 1;
|
static int id = 1;
|
||||||
if (got == expected)
|
if (got == expected) {
|
||||||
printf("Test %d passed\n", id++);
|
printf("Test %d passed\n", id++);
|
||||||
else
|
} else {
|
||||||
exitf("Test %d failed\n", id);
|
printf("Test %d failed\n", id);
|
||||||
|
std::terminate();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void invert_test(const codeset_t& A, const codeset_t& C){
|
void invert_test(const codeset_t& A, const codeset_t& C){
|
||||||
|
@ -10,8 +10,8 @@ static int test_id = 0;
|
|||||||
|
|
||||||
void do_test(const std::vector<uint8_t>& prg, const std::string& str, const std::vector<bool>& prefix_matching){
|
void do_test(const std::vector<uint8_t>& prg, const std::string& str, const std::vector<bool>& prefix_matching){
|
||||||
assert(str.size() + 1 == prefix_matching.size());
|
assert(str.size() + 1 == prefix_matching.size());
|
||||||
REGEX_IS024_CONTEXT ctx{prg.size(), prg.data(), 0, 0, 1000, 1000, 1000000};
|
VMContext ctx{prg.size(), prg.data(), 0, 0, 1000, 1000, 1000000};
|
||||||
regex024_error_code ret;
|
error_code_t ret;
|
||||||
// todo
|
// todo
|
||||||
printf("TEST %d passed\n", test_id);
|
printf("TEST %d passed\n", test_id);
|
||||||
test_id++;
|
test_id++;
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#include <libregexis024sol/expr_compiler.h>
|
#include <libregexis024sol/expr_compiler.h>
|
||||||
#include <libregexis024test/byte_code_disassembler.h>
|
#include <libregexis024test/byte_code_disassembler.h>
|
||||||
|
|
||||||
|
using namespace regexis024;
|
||||||
|
|
||||||
int main(){
|
int main(){
|
||||||
std::string regular_expression = "\\>1*";
|
std::string regular_expression = "\\>1*";
|
||||||
REGEX_IS024_MeaningContext regex(regular_expression.size(), regular_expression.c_str());
|
REGEX_IS024_MeaningContext regex(regular_expression.size(), regular_expression.c_str());
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
|
using namespace regexis024;
|
||||||
|
|
||||||
struct test_id_t {
|
struct test_id_t {
|
||||||
int test_id;
|
int test_id;
|
||||||
int subtest_id;
|
int subtest_id;
|
||||||
|
@ -21,19 +21,54 @@ void test(const string& input, const string& pattern, const MatchInfo& right_ans
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main() {
|
int main() {
|
||||||
|
test("11aa", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
|
||||||
|
test("aa11", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
|
||||||
|
test("a111", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo());
|
||||||
|
test("aa11", "^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
|
||||||
|
test("1a11", "^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
|
||||||
|
test("11aa", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
|
||||||
|
test("aa11", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
|
||||||
|
test("a111", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo());
|
||||||
|
test("aa11", "!dfa;^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
|
||||||
|
test("1a11", "!dfa;^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
|
||||||
|
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
|
||||||
|
"!dfa;!select{fieldname{ca}fieldbody{ca}}^^^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n$$$",
|
||||||
|
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
|
||||||
|
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
|
||||||
|
"!dfa;!select{fieldname{ca}fieldbody{ca}}^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+\\>):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
|
||||||
|
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
|
||||||
|
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
|
||||||
|
"!dfa;!select{fieldname{ca}fieldbody{ca}}^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
|
||||||
|
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
|
||||||
|
test("LINE\r\nFirst:Second\r\n\r\n",
|
||||||
|
"!select{fieldname{ca}}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
|
||||||
|
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}}, {6, 11}));
|
||||||
|
test("LINE\r\nFirst:Second\r\n\r\n",
|
||||||
|
"!select{fieldname}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
|
||||||
|
MatchInfo({{0, 12}, {1, 18}}, {6, 11}));
|
||||||
|
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
|
||||||
|
"!select{fieldname{ca}fieldbody{ca}}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
|
||||||
|
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
|
||||||
|
test("абвгд", "абвгд", MatchInfo({}, {}));
|
||||||
|
test("абвввввввгд", "абв*г+д", MatchInfo({}, {}));
|
||||||
|
test("абвввввввд", "абв*г+д", MatchInfo());
|
||||||
|
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
|
||||||
|
"!dfa;^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
|
||||||
|
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {}));
|
||||||
|
test("LINE\r\nFirst:Second\r\n\r\n",
|
||||||
|
"LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
|
||||||
|
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}}, {}));
|
||||||
test("C111111111111", "C\\>1*", MatchInfo({}, {}));
|
test("C111111111111", "C\\>1*", MatchInfo({}, {}));
|
||||||
// return 0;
|
test("GET / HTTP/1.1\r\nHost: example.com\r\nAAAAA: a\rfaafafdf\r\n\r\n",
|
||||||
test("GET / HTTP/1.1\r\nHost: bibura sosat\r\nLos-es-raus: a\rfaafafdf\r\n\r\n",
|
|
||||||
"!dfa;(GET|POST) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
|
"!dfa;(GET|POST) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
|
||||||
MatchInfo());
|
MatchInfo());
|
||||||
test("\r24234\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
|
test("\r24234\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
|
||||||
test("\n3432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
|
test("\n3432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
|
||||||
test("3:::;;432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
|
test("3:::;;432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
|
||||||
test("3:::;;432 \r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
|
test("3:::;;432 \r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
|
||||||
test("GET / HTTP/0.9\r\nHost: bibura sosat\r\nLos-es-raus: afaafafdf\r\n\r\n",
|
test("GET / HTTP/0.9\r\nHost: bibur at\r\nContent-type: html\r\n\r\n",
|
||||||
"^(GET|POST\\>) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
|
"^(GET|POST\\>) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
|
||||||
MatchInfo({}, {}));
|
MatchInfo({}, {}));
|
||||||
// return 0;
|
|
||||||
test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {}));
|
test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {}));
|
||||||
test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2}));
|
test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2}));
|
||||||
for (int i = 0; i < 64; i++) {
|
for (int i = 0; i < 64; i++) {
|
||||||
|
@ -7,7 +7,8 @@
|
|||||||
|
|
||||||
// using namespace regexis024;
|
// using namespace regexis024;
|
||||||
|
|
||||||
void convert(regexis024::TrackingVariableInfo& to, const SubtrackingNameInfo& from) {
|
namespace regexis024 {
|
||||||
|
void convert(TrackingVariableInfo& to, const SubtrackingNameInfo& from) {
|
||||||
#define plagiat(field) to.field = from.field;
|
#define plagiat(field) to.field = from.field;
|
||||||
plagiat(type);
|
plagiat(type);
|
||||||
plagiat(colarr_first);
|
plagiat(colarr_first);
|
||||||
@ -17,11 +18,11 @@ void convert(regexis024::TrackingVariableInfo& to, const SubtrackingNameInfo& fr
|
|||||||
plagiat(selarr_second);
|
plagiat(selarr_second);
|
||||||
plagiat(stored_in_sa);
|
plagiat(stored_in_sa);
|
||||||
#undef plagiat
|
#undef plagiat
|
||||||
}
|
}
|
||||||
|
|
||||||
int regexis024::matchStrToRegexp(const std::string& input, const std::string& pattern,
|
int matchStrToRegexp(const std::string& input, const std::string& pattern,
|
||||||
MatchInfo& retMatchInfo, track_var_list& retTrackVarList, std::string& retStatus)
|
MatchInfo& retMatchInfo, track_var_list& retTrackVarList, std::string& retStatus)
|
||||||
{
|
{
|
||||||
retTrackVarList = {};
|
retTrackVarList = {};
|
||||||
retMatchInfo = MatchInfo();
|
retMatchInfo = MatchInfo();
|
||||||
retStatus = "";
|
retStatus = "";
|
||||||
@ -34,14 +35,14 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
|
|||||||
for (auto& iip: regexp.ktr.track_names) {
|
for (auto& iip: regexp.ktr.track_names) {
|
||||||
convert(retTrackVarList[iip.first], regexp.ktr.retrieval_info[iip.second]);
|
convert(retTrackVarList[iip.first], regexp.ktr.retrieval_info[iip.second]);
|
||||||
}
|
}
|
||||||
REGEX_IS024_VirtualMachine vm(regexp.compiled_program.size(), regexp.compiled_program.data(),
|
VirtualMachine vm(regexp.compiled_program.size(), regexp.compiled_program.data(),
|
||||||
UINT64_MAX, UINT16_MAX,
|
UINT64_MAX, UINT16_MAX,
|
||||||
UINT32_MAX, UINT32_MAX, UINT64_MAX);
|
UINT32_MAX, UINT32_MAX, UINT64_MAX);
|
||||||
auto getVMErrString = [&]() -> std::string {
|
auto getVMErrString = [&]() -> std::string {
|
||||||
return std::string(regex024_error_code_tostr(vm.getErrno()));
|
return std::string(error_code_to_str(vm.getErrno()));
|
||||||
};
|
};
|
||||||
|
|
||||||
if (vm.initialize() != regex024_error_codes::stable) {
|
if (vm.initialize() != error_codes::stable) {
|
||||||
retStatus = "Virtual machine initialization. " + getVMErrString();
|
retStatus = "Virtual machine initialization. " + getVMErrString();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -51,11 +52,11 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
|
|||||||
retStatus = "Unnatural extended input request.";
|
retStatus = "Unnatural extended input request.";
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (vm.addNewMatchingThread() != regex024_error_codes::stable) {
|
if (vm.addNewMatchingThread() != error_codes::stable) {
|
||||||
retStatus = "Virtual machine first kick. " + getVMErrString();
|
retStatus = "Virtual machine first kick. " + getVMErrString();
|
||||||
}
|
}
|
||||||
if (left_ext_feed) {
|
if (left_ext_feed) {
|
||||||
if (vm.extendedFeedCharacter('\n') != regex024_error_codes::stable) {
|
if (vm.extendedFeedCharacter('\n') != error_codes::stable) {
|
||||||
retStatus = "VM left extended input. " + getVMErrString();
|
retStatus = "VM left extended input. " + getVMErrString();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -63,19 +64,19 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
|
|||||||
for (size_t cur_text_pos = 0;cur_text_pos < input.size();) {
|
for (size_t cur_text_pos = 0;cur_text_pos < input.size();) {
|
||||||
int32_t inp_code;
|
int32_t inp_code;
|
||||||
size_t adj;
|
size_t adj;
|
||||||
utf8_string_iterat(inp_code, adj, cur_text_pos, reinterpret_cast<const uint8_t*>(input.data()), input.size());
|
utf8_string_iterat(inp_code, adj, cur_text_pos, input.data(), input.size());
|
||||||
if (inp_code < 0) {
|
if (inp_code < 0) {
|
||||||
retStatus = "Input string encoding error.";
|
retStatus = "Input string encoding error.";
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
if (vm.feedCharacter(static_cast<uint64_t>(inp_code), adj) != regex024_error_codes::stable) {
|
if (vm.feedCharacter(static_cast<uint64_t>(inp_code), adj) != error_codes::stable) {
|
||||||
retStatus = "VM input. " + getVMErrString();
|
retStatus = "VM input. " + getVMErrString();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
cur_text_pos += adj;
|
cur_text_pos += adj;
|
||||||
}
|
}
|
||||||
if (right_ext_feed) {
|
if (right_ext_feed) {
|
||||||
if (vm.extendedFeedCharacter('\n') != regex024_error_codes::stable) {
|
if (vm.extendedFeedCharacter('\n') != error_codes::stable) {
|
||||||
retStatus = "VM right extended input. " + getVMErrString();
|
retStatus = "VM right extended input. " + getVMErrString();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -91,18 +92,19 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
|
|||||||
std::reverse(retMatchInfo.ca_history.begin(), retMatchInfo.ca_history.end());
|
std::reverse(retMatchInfo.ca_history.begin(), retMatchInfo.ca_history.end());
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool regexis024::MatchInfo::operator==(const MatchInfo &other) const {
|
bool MatchInfo::operator==(const MatchInfo &other) const {
|
||||||
if (!have_match && !other.have_match)
|
if (!have_match && !other.have_match)
|
||||||
return true;
|
return true;
|
||||||
return (have_match == other.have_match) && (sa == other.sa) && (ca_history == other.ca_history);
|
return (have_match == other.have_match) && (sa == other.sa) && (ca_history == other.ca_history);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool regexis024::MatchInfo::operator!=(const MatchInfo &other) const {
|
bool MatchInfo::operator!=(const MatchInfo &other) const {
|
||||||
return !(*this == other);
|
return !(*this == other);
|
||||||
}
|
}
|
||||||
|
|
||||||
regexis024::MatchInfo::MatchInfo(const std::vector<REGEX_IS024_CAEvent> &ca_history, const std::vector<uint64_t> &sa):
|
MatchInfo::MatchInfo(const std::vector<CAEvent> &ca_history, const std::vector<uint64_t> &sa):
|
||||||
ca_history(ca_history), sa(sa), have_match(true) {
|
ca_history(ca_history), sa(sa), have_match(true) {
|
||||||
|
}
|
||||||
}
|
}
|
@ -11,7 +11,7 @@ namespace regexis024 {
|
|||||||
bool stored_in_ca = true;
|
bool stored_in_ca = true;
|
||||||
bool stored_in_sa = false;
|
bool stored_in_sa = false;
|
||||||
|
|
||||||
tracking_var_type type;
|
tracking_var_type_t type;
|
||||||
/* These fields will be -1 if unused */
|
/* These fields will be -1 if unused */
|
||||||
int colarr_first = -1;
|
int colarr_first = -1;
|
||||||
int colarr_second = -1;
|
int colarr_second = -1;
|
||||||
@ -24,7 +24,7 @@ namespace regexis024 {
|
|||||||
|
|
||||||
struct MatchInfo {
|
struct MatchInfo {
|
||||||
bool have_match = false;
|
bool have_match = false;
|
||||||
std::vector<REGEX_IS024_CAEvent> ca_history;
|
std::vector<CAEvent> ca_history;
|
||||||
std::vector<uint64_t> sa;
|
std::vector<uint64_t> sa;
|
||||||
|
|
||||||
bool operator==(const MatchInfo& other) const ;
|
bool operator==(const MatchInfo& other) const ;
|
||||||
@ -32,7 +32,7 @@ namespace regexis024 {
|
|||||||
|
|
||||||
MatchInfo() = default;
|
MatchInfo() = default;
|
||||||
|
|
||||||
MatchInfo(const std::vector<REGEX_IS024_CAEvent> &ca_history, const std::vector<uint64_t> &sa);
|
MatchInfo(const std::vector<CAEvent> &ca_history, const std::vector<uint64_t> &sa);
|
||||||
};
|
};
|
||||||
|
|
||||||
int matchStrToRegexp(const std::string& input, const std::string& pattern,
|
int matchStrToRegexp(const std::string& input, const std::string& pattern,
|
||||||
|
@ -1,21 +1,22 @@
|
|||||||
#include <libregexis024vm/instruction_implementation.h>
|
#include <libregexis024vm/instruction_implementation.h>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
void swap_old_settled_and_new_active(REGEX_IS024_CONTEXT &ctx, REGEX_IS024_Thread& old_settled){
|
namespace regexis024 {
|
||||||
|
void swap_old_settled_and_new_active(VMContext &ctx, Thread& old_settled){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
assert(old_settled.slot_occupation_status == SLOT_OCCUPIED_val);
|
assert(old_settled.slot_occupation_status == SLOT_OCCUPIED_val);
|
||||||
REGEX_IS024_Thread temp = old_settled;
|
Thread temp = old_settled;
|
||||||
old_settled = ctx.active_thread;
|
old_settled = ctx.active_thread;
|
||||||
old_settled.slot_occupation_status = SLOT_NEW_val;
|
old_settled.slot_occupation_status = SLOT_NEW_val;
|
||||||
ctx.active_thread = temp;
|
ctx.active_thread = temp;
|
||||||
// slot_occupation_status & SLOT_OCCUPIED of actie thread is true, because it was retrieved from old_settled
|
// slot_occupation_status & SLOT_OCCUPIED of active thread is true, because it was retrieved from old_settled
|
||||||
}
|
}
|
||||||
|
|
||||||
void start_noncloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& other){
|
void start_noncloning_conflict(VMContext& ctx, Thread& other){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
if (ctx.have_sift_function){
|
if (ctx.have_sift_function){
|
||||||
ctx.sifting_with = &other;
|
ctx.sifting_with = &other;
|
||||||
ctx.who_started_sift = regex024_opcode::READ;
|
ctx.who_started_sift = opcode_t::READ;
|
||||||
ctx.intruder_IP = ctx.active_thread.IP;
|
ctx.intruder_IP = ctx.active_thread.IP;
|
||||||
ctx.active_thread.IP = ctx.sift_function;
|
ctx.active_thread.IP = ctx.sift_function;
|
||||||
ctx.RAX = ctx.RBX = 0;
|
ctx.RAX = ctx.RBX = 0;
|
||||||
@ -23,14 +24,14 @@ void start_noncloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& oth
|
|||||||
ctx.active_thread.delete_thread();
|
ctx.active_thread.delete_thread();
|
||||||
ctx.try_to_continue_scheduled();
|
ctx.try_to_continue_scheduled();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The one that drops as an intruder here is current active.thread.IP */
|
/* The one that drops as an intruder here is current active.thread.IP */
|
||||||
void start_cloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& other, regex_near_ptr_t clone_IP){
|
void start_cloning_conflict(VMContext& ctx, Thread& other, near_ptr_t clone_IP){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
if (ctx.have_sift_function){
|
if (ctx.have_sift_function){
|
||||||
ctx.sifting_with = &other;
|
ctx.sifting_with = &other;
|
||||||
ctx.who_started_sift = regex024_opcode::FORK;
|
ctx.who_started_sift = opcode_t::FORK;
|
||||||
ctx.intruder_IP = ctx.active_thread.IP;
|
ctx.intruder_IP = ctx.active_thread.IP;
|
||||||
ctx.child_ret_IP = clone_IP;
|
ctx.child_ret_IP = clone_IP;
|
||||||
ctx.active_thread.IP = ctx.sift_function;
|
ctx.active_thread.IP = ctx.sift_function;
|
||||||
@ -38,32 +39,32 @@ void start_cloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& other,
|
|||||||
} else {
|
} else {
|
||||||
ctx.active_thread.IP = clone_IP;
|
ctx.active_thread.IP = clone_IP;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define initialization_phase_check() if (ctx.initialized){ \
|
#define initialization_phase_check() if (ctx.initialized){ \
|
||||||
ctx.error = regex024_error_codes::too_late; return; }
|
ctx.error = error_codes::too_late; return; }
|
||||||
#define general_matching_mode_check() if (!ctx.initialized){ \
|
#define general_matching_mode_check() if (!ctx.initialized){ \
|
||||||
ctx.error = regex024_error_codes::too_early; return; } if(ctx.sifting_with){ \
|
ctx.error = error_codes::too_early; return; } if(ctx.sifting_with){ \
|
||||||
ctx.error = regex024_error_codes::instruction_not_for_collision_thread; return; }
|
ctx.error = error_codes::instruction_not_for_collision_thread; return; }
|
||||||
#define sift_mode_check() if (!ctx.sifting_with){ \
|
#define sift_mode_check() if (!ctx.sifting_with){ \
|
||||||
ctx.error = regex024_error_codes::instruction_not_for_collision_thread; return; }
|
ctx.error = error_codes::instruction_not_for_collision_thread; return; }
|
||||||
|
|
||||||
/* Can append to both read_halted+new stacks of context */
|
/* Can append to both read_halted+new stacks of context */
|
||||||
void read_halted_new_type_stacks_append(REGEX_IS024_CONTEXT &ctx, regex_sslot_id_t ssid){
|
void read_halted_new_type_stacks_append(VMContext &ctx, sslot_id_t ssid){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
if (ssid < ctx.portion_of_FIRST_read_halt_ns){
|
if (ssid < ctx.portion_of_FIRST_read_halt_ns){
|
||||||
ctx.READ_halted_stack_new_first.append(ssid);
|
ctx.READ_halted_stack_new_first.append(ssid);
|
||||||
} else {
|
} else {
|
||||||
ctx.READ_halted_stack_new_second.append(ssid);
|
ctx.READ_halted_stack_new_second.append(ssid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void do_i_read(REGEX_IS024_CONTEXT &ctx, regex_sslot_id_t ssid) {
|
void do_i_read(VMContext &ctx, sslot_id_t ssid) {
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
if (ssid >= ctx.read_slots_number)
|
if (ssid >= ctx.read_slots_number)
|
||||||
smitsya(read_sslot_out_of_range);
|
smitsya(read_sslot_out_of_range);
|
||||||
REGEX_IS024_Thread& other = ctx.READ_halted_slots[ssid];
|
Thread& other = ctx.READ_halted_slots[ssid];
|
||||||
if (other.slot_occupation_status & SLOT_OCCUPIED){
|
if (other.slot_occupation_status & SLOT_OCCUPIED){
|
||||||
if (other.slot_occupation_status & SLOT_NEW){
|
if (other.slot_occupation_status & SLOT_NEW){
|
||||||
start_noncloning_conflict(ctx, other);
|
start_noncloning_conflict(ctx, other);
|
||||||
@ -79,60 +80,60 @@ void do_i_read(REGEX_IS024_CONTEXT &ctx, regex_sslot_id_t ssid) {
|
|||||||
read_halted_new_type_stacks_append(ctx, ssid);
|
read_halted_new_type_stacks_append(ctx, ssid);
|
||||||
ctx.try_to_continue_scheduled();
|
ctx.try_to_continue_scheduled();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_READ(REGEX_IS024_CONTEXT &ctx) {
|
void i_READ(VMContext &ctx) {
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ)
|
check_available_prg(BYTECODE_SSLOT_ID_SZ)
|
||||||
regex_sslot_id_t ssid = ctx.extract_sslot_id();
|
sslot_id_t ssid = ctx.extract_sslot_id();
|
||||||
do_i_read(ctx, ssid);
|
do_i_read(ctx, ssid);
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_READZ(REGEX_IS024_CONTEXT &ctx) {
|
void i_READZ(VMContext &ctx) {
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
do_i_read(ctx, 0);
|
do_i_read(ctx, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_JUMP(REGEX_IS024_CONTEXT& ctx){
|
void i_JUMP(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
check_available_prg(REGEX024_BYTECODE_NEAR_POINTER_SZ)
|
check_available_prg(BYTECODE_NEAR_POINTER_SZ)
|
||||||
ctx.active_thread.IP = ctx.extract_near_pointer();
|
ctx.active_thread.IP = ctx.extract_near_pointer();
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename conditionT, typename immArgSzT>
|
template<typename conditionT, typename immArgSzT>
|
||||||
void i_JC(REGEX_IS024_CONTEXT& ctx)
|
void i_JC(VMContext& ctx)
|
||||||
{
|
{
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
check_available_prg(immArgSzT::byte_sz + REGEX024_BYTECODE_NEAR_POINTER_SZ);
|
check_available_prg(immArgSzT::byte_sz + BYTECODE_NEAR_POINTER_SZ);
|
||||||
uint64_t imm_val_B = immArgSzT::extract(ctx);
|
uint64_t imm_val_B = immArgSzT::extract(ctx);
|
||||||
regex_near_ptr_t dest = ctx.extract_near_pointer();
|
near_ptr_t dest = ctx.extract_near_pointer();
|
||||||
uint64_t imm_val_A = ctx.INP;
|
uint64_t imm_val_A = ctx.INP;
|
||||||
if (conditionT::call(imm_val_A, imm_val_B))
|
if (conditionT::call(imm_val_A, imm_val_B))
|
||||||
ctx.active_thread.IP = dest;
|
ctx.active_thread.IP = dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct condEqual{static bool call(uint64_t A, uint64_t B){return A == B;}};
|
struct condEqual{static bool call(uint64_t A, uint64_t B){return A == B;}};
|
||||||
struct condLess{static bool call(uint64_t A, uint64_t B){return A < B;}};
|
struct condLess{static bool call(uint64_t A, uint64_t B){return A < B;}};
|
||||||
struct condGrtr{static bool call(uint64_t A, uint64_t B){return A > B;}};
|
struct condGrtr{static bool call(uint64_t A, uint64_t B){return A > B;}};
|
||||||
|
|
||||||
struct immArgByte{
|
struct immArgByte{
|
||||||
static constexpr int byte_sz = 1;
|
static constexpr int byte_sz = 1;
|
||||||
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_b();}
|
static uint64_t extract(VMContext& ctx){return ctx.extract_b();}
|
||||||
};
|
};
|
||||||
struct immArgWord{
|
struct immArgWord{
|
||||||
static constexpr int byte_sz = 2;
|
static constexpr int byte_sz = 2;
|
||||||
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_w();}
|
static uint64_t extract(VMContext& ctx){return ctx.extract_w();}
|
||||||
};
|
};
|
||||||
struct immArgDoubleWord{
|
struct immArgDoubleWord{
|
||||||
static constexpr int byte_sz = 4;
|
static constexpr int byte_sz = 4;
|
||||||
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_dw();}
|
static uint64_t extract(VMContext& ctx){return ctx.extract_dw();}
|
||||||
};
|
};
|
||||||
struct immArgQuadWord{
|
struct immArgQuadWord{
|
||||||
static constexpr int byte_sz = 8;
|
static constexpr int byte_sz = 8;
|
||||||
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_qw();}
|
static uint64_t extract(VMContext& ctx){return ctx.extract_qw();}
|
||||||
};
|
};
|
||||||
|
|
||||||
void clone_thread_into_slot(REGEX_IS024_Thread& source, REGEX_IS024_Thread& vessel){
|
void clone_thread_into_slot(Thread& source, Thread& vessel){
|
||||||
thread_print_debug(source);
|
thread_print_debug(source);
|
||||||
my_assert(!(vessel.slot_occupation_status & SLOT_OCCUPIED));
|
my_assert(!(vessel.slot_occupation_status & SLOT_OCCUPIED));
|
||||||
my_assert((source.slot_occupation_status & SLOT_OCCUPIED));
|
my_assert((source.slot_occupation_status & SLOT_OCCUPIED));
|
||||||
@ -143,18 +144,18 @@ void clone_thread_into_slot(REGEX_IS024_Thread& source, REGEX_IS024_Thread& vess
|
|||||||
if (vessel.SAptr){
|
if (vessel.SAptr){
|
||||||
vessel.SAptr[0]++;
|
vessel.SAptr[0]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* One FORK-slot governs the one single unique position in program: the next one after the fork */
|
/* One FORK-slot governs the one single unique position in program: the next one after the fork */
|
||||||
void i_FORK(REGEX_IS024_CONTEXT& ctx){
|
void i_FORK(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ + REGEX024_BYTECODE_NEAR_POINTER_SZ);
|
check_available_prg(BYTECODE_SSLOT_ID_SZ + BYTECODE_NEAR_POINTER_SZ);
|
||||||
regex_sslot_id_t ssid = ctx.extract_sslot_id();
|
sslot_id_t ssid = ctx.extract_sslot_id();
|
||||||
regex_near_ptr_t dest = ctx.extract_near_pointer();
|
near_ptr_t dest = ctx.extract_near_pointer();
|
||||||
if (ssid >= ctx.fork_slots_number)
|
if (ssid >= ctx.fork_slots_number)
|
||||||
smitsya(fork_sslot_out_of_range);
|
smitsya(fork_sslot_out_of_range);
|
||||||
REGEX_IS024_Thread& other = ctx.FORK_halted_slots[ssid];
|
Thread& other = ctx.FORK_halted_slots[ssid];
|
||||||
if (other.slot_occupation_status & SLOT_OCCUPIED){
|
if (other.slot_occupation_status & SLOT_OCCUPIED){
|
||||||
start_cloning_conflict(ctx, other, dest);
|
start_cloning_conflict(ctx, other, dest);
|
||||||
} else {
|
} else {
|
||||||
@ -162,9 +163,9 @@ void i_FORK(REGEX_IS024_CONTEXT& ctx){
|
|||||||
ctx.active_thread.IP = dest;
|
ctx.active_thread.IP = dest;
|
||||||
ctx.FORK_halted_stack.append(ssid);
|
ctx.FORK_halted_stack.append(ssid);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_MATCH(REGEX_IS024_CONTEXT& ctx){
|
void i_MATCH(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
if (ctx.matched_thread.slot_occupation_status & SLOT_OCCUPIED){
|
if (ctx.matched_thread.slot_occupation_status & SLOT_OCCUPIED){
|
||||||
@ -172,108 +173,108 @@ void i_MATCH(REGEX_IS024_CONTEXT& ctx){
|
|||||||
} else {
|
} else {
|
||||||
clone_thread_into_slot(ctx.active_thread, ctx.matched_thread);
|
clone_thread_into_slot(ctx.active_thread, ctx.matched_thread);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_DIE(REGEX_IS024_CONTEXT& ctx){
|
void i_DIE(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
ctx.active_thread.delete_thread();
|
ctx.active_thread.delete_thread();
|
||||||
ctx.try_to_continue_scheduled();
|
ctx.try_to_continue_scheduled();
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_PARAM_READ_SS_NUMBER(REGEX_IS024_CONTEXT& ctx){
|
void i_PARAM_READ_SS_NUMBER(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ)
|
check_available_prg(BYTECODE_SSLOT_ID_SZ)
|
||||||
regex_sslot_id_t read_slots_number = ctx.extract_sslot_id();
|
sslot_id_t read_slots_number = ctx.extract_sslot_id();
|
||||||
ctx.read_slots_number = read_slots_number;
|
ctx.read_slots_number = read_slots_number;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_PARAM_FORK_SS_NUMBER(REGEX_IS024_CONTEXT& ctx){
|
void i_PARAM_FORK_SS_NUMBER(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ)
|
check_available_prg(BYTECODE_SSLOT_ID_SZ)
|
||||||
regex_sslot_id_t fork_slots_number = ctx.extract_sslot_id();
|
sslot_id_t fork_slots_number = ctx.extract_sslot_id();
|
||||||
ctx.fork_slots_number = fork_slots_number;
|
ctx.fork_slots_number = fork_slots_number;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_PARAM_SELARR_LEN(REGEX_IS024_CONTEXT& ctx){
|
void i_PARAM_SELARR_LEN(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
||||||
regex_tai_t selection_array_len = ctx.extract_track_array_index();
|
tai_t selection_array_len = ctx.extract_track_array_index();
|
||||||
ctx.selection_array_len = selection_array_len;
|
ctx.selection_array_len = selection_array_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_PARAM_COLSIFTFUNC_SET(REGEX_IS024_CONTEXT& ctx){
|
void i_PARAM_COLSIFTFUNC_SET(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_NEAR_POINTER_SZ)
|
check_available_prg(BYTECODE_NEAR_POINTER_SZ)
|
||||||
regex_near_ptr_t sift_function = ctx.extract_near_pointer();
|
near_ptr_t sift_function = ctx.extract_near_pointer();
|
||||||
ctx.have_sift_function = true;
|
ctx.have_sift_function = true;
|
||||||
ctx.sift_function = sift_function;
|
ctx.sift_function = sift_function;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_PARAM_COLSIFTFUNC_WIPE(REGEX_IS024_CONTEXT& ctx){
|
void i_PARAM_COLSIFTFUNC_WIPE(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
ctx.have_sift_function = false;
|
ctx.have_sift_function = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_MSG_MULTISTART_ALLOWED(REGEX_IS024_CONTEXT& ctx){
|
void i_MSG_MULTISTART_ALLOWED(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
check_available_prg(1)
|
check_available_prg(1)
|
||||||
ctx.allows_multistart = (bool)ctx.extract_b();
|
ctx.allows_multistart = (bool)ctx.extract_b();
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_MSG_FED_INPUT_EXTENDED(REGEX_IS024_CONTEXT& ctx){
|
void i_MSG_FED_INPUT_EXTENDED(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
check_available_prg(1 + 1 + REGEX024_BYTECODE_SSLOT_ID_SZ)
|
check_available_prg(1 + 1 + BYTECODE_SSLOT_ID_SZ)
|
||||||
ctx.fed_input_extends_left = ctx.extract_b();
|
ctx.fed_input_extends_left = ctx.extract_b();
|
||||||
ctx.fed_input_extends_right = ctx.extract_b();
|
ctx.fed_input_extends_right = ctx.extract_b();
|
||||||
ctx.portion_of_second_read_halt_ns = ctx.extract_sslot_id();
|
ctx.portion_of_second_read_halt_ns = ctx.extract_sslot_id();
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t get_el_from_selarr(uint64_t* sa, regex_near_ptr_t ind){
|
uint64_t get_el_from_selarr(uint64_t* sa, near_ptr_t ind){
|
||||||
return sa ? sa[1UL + ind] : 0;
|
return sa ? sa[1UL + ind] : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_DMOV_RABX_SELARR(REGEX_IS024_CONTEXT& ctx){
|
void i_DMOV_RABX_SELARR(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
sift_mode_check()
|
sift_mode_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
||||||
regex_tai_t i1 = ctx.extract_track_array_index();
|
tai_t i1 = ctx.extract_track_array_index();
|
||||||
if (i1 >= ctx.selection_array_len)
|
if (i1 >= ctx.selection_array_len)
|
||||||
smitsya(selection_arr_out_of_range);
|
smitsya(selection_arr_out_of_range);
|
||||||
ctx.RAX = get_el_from_selarr(ctx.active_thread.SAptr, i1);
|
ctx.RAX = get_el_from_selarr(ctx.active_thread.SAptr, i1);
|
||||||
ctx.RBX = get_el_from_selarr(ctx.sifting_with->SAptr, i1);
|
ctx.RBX = get_el_from_selarr(ctx.sifting_with->SAptr, i1);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t get_selarr_el_dist(uint64_t* sa, uint16_t start, uint16_t end){
|
uint64_t get_selarr_el_dist(uint64_t* sa, uint16_t start, uint16_t end){
|
||||||
uint64_t v_start = get_el_from_selarr(sa, start);
|
uint64_t v_start = get_el_from_selarr(sa, start);
|
||||||
uint64_t v_end = get_el_from_selarr(sa, end);
|
uint64_t v_end = get_el_from_selarr(sa, end);
|
||||||
return v_end > v_start ? v_end - v_start : 0;
|
return v_end > v_start ? v_end - v_start : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_DDIST_RABX_SELARR(REGEX_IS024_CONTEXT& ctx){
|
void i_DDIST_RABX_SELARR(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
sift_mode_check()
|
sift_mode_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ * 2)
|
check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ * 2)
|
||||||
regex_tai_t i_start = ctx.extract_track_array_index();
|
tai_t i_start = ctx.extract_track_array_index();
|
||||||
if (i_start >= ctx.selection_array_len)
|
if (i_start >= ctx.selection_array_len)
|
||||||
smitsya(selection_arr_out_of_range);
|
smitsya(selection_arr_out_of_range);
|
||||||
regex_tai_t i_end = ctx.extract_track_array_index();
|
tai_t i_end = ctx.extract_track_array_index();
|
||||||
if (i_end >= ctx.selection_array_len)
|
if (i_end >= ctx.selection_array_len)
|
||||||
smitsya(selection_arr_out_of_range);
|
smitsya(selection_arr_out_of_range);
|
||||||
ctx.RAX = get_selarr_el_dist(ctx.active_thread.SAptr, i_start, i_end);
|
ctx.RAX = get_selarr_el_dist(ctx.active_thread.SAptr, i_start, i_end);
|
||||||
ctx.RBX = get_selarr_el_dist(ctx.sifting_with->SAptr, i_start, i_end);
|
ctx.RBX = get_selarr_el_dist(ctx.sifting_with->SAptr, i_start, i_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void finish_conflict_homesteader_wins(REGEX_IS024_CONTEXT& ctx){
|
void finish_conflict_homesteader_wins(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
if (ctx.who_started_sift == regex024_opcodes::READ){
|
if (ctx.who_started_sift == opcodes::READ){
|
||||||
ctx.active_thread.delete_thread();
|
ctx.active_thread.delete_thread();
|
||||||
ctx.try_to_continue_scheduled();
|
ctx.try_to_continue_scheduled();
|
||||||
} else {
|
} else {
|
||||||
@ -282,13 +283,13 @@ void finish_conflict_homesteader_wins(REGEX_IS024_CONTEXT& ctx){
|
|||||||
ctx.active_thread.IP = ctx.child_ret_IP;
|
ctx.active_thread.IP = ctx.child_ret_IP;
|
||||||
}
|
}
|
||||||
ctx.sifting_with = NULL;
|
ctx.sifting_with = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void finish_conflict_intruder_wins(REGEX_IS024_CONTEXT& ctx){
|
void finish_conflict_intruder_wins(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
ctx.sifting_with->delete_thread();
|
ctx.sifting_with->delete_thread();
|
||||||
ctx.active_thread.IP = ctx.intruder_IP;
|
ctx.active_thread.IP = ctx.intruder_IP;
|
||||||
if (ctx.who_started_sift == regex024_opcodes::READ){
|
if (ctx.who_started_sift == opcodes::READ){
|
||||||
/* noncloning conflict won by intruder+ */
|
/* noncloning conflict won by intruder+ */
|
||||||
*ctx.sifting_with = ctx.active_thread;
|
*ctx.sifting_with = ctx.active_thread;
|
||||||
ctx.active_thread.slot_occupation_status = SLOT_EMPTY_val;
|
ctx.active_thread.slot_occupation_status = SLOT_EMPTY_val;
|
||||||
@ -299,9 +300,9 @@ void finish_conflict_intruder_wins(REGEX_IS024_CONTEXT& ctx){
|
|||||||
ctx.active_thread.IP = ctx.child_ret_IP;
|
ctx.active_thread.IP = ctx.child_ret_IP;
|
||||||
}
|
}
|
||||||
ctx.sifting_with = NULL;
|
ctx.sifting_with = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_SIFTPRIOR_MIN_RABX(REGEX_IS024_CONTEXT& ctx){
|
void i_SIFTPRIOR_MIN_RABX(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
sift_mode_check()
|
sift_mode_check()
|
||||||
if (ctx.RAX < ctx.RBX){
|
if (ctx.RAX < ctx.RBX){
|
||||||
@ -309,9 +310,9 @@ void i_SIFTPRIOR_MIN_RABX(REGEX_IS024_CONTEXT& ctx){
|
|||||||
} else if (ctx.RAX > ctx.RBX){
|
} else if (ctx.RAX > ctx.RBX){
|
||||||
finish_conflict_homesteader_wins(ctx);
|
finish_conflict_homesteader_wins(ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_SIFTPRIOR_MAX_RABX(REGEX_IS024_CONTEXT& ctx){
|
void i_SIFTPRIOR_MAX_RABX(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
sift_mode_check()
|
sift_mode_check()
|
||||||
if (ctx.RAX > ctx.RBX){
|
if (ctx.RAX > ctx.RBX){
|
||||||
@ -319,45 +320,45 @@ void i_SIFTPRIOR_MAX_RABX(REGEX_IS024_CONTEXT& ctx){
|
|||||||
} else if (ctx.RAX < ctx.RBX){
|
} else if (ctx.RAX < ctx.RBX){
|
||||||
finish_conflict_homesteader_wins(ctx);
|
finish_conflict_homesteader_wins(ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_SIFT_DONE(REGEX_IS024_CONTEXT& ctx){
|
void i_SIFT_DONE(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
sift_mode_check()
|
sift_mode_check()
|
||||||
finish_conflict_homesteader_wins(ctx);
|
finish_conflict_homesteader_wins(ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Can give errors */
|
/* Can give errors */
|
||||||
void ca_branch_new_node(REGEX_IS024_CONTEXT& ctx, regex_tai_t key, uint64_t val){
|
void ca_branch_new_node(VMContext& ctx, tai_t key, uint64_t val){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
if (ctx.CAN_total >= ctx.CA_TREE_LIMIT)
|
if (ctx.CAN_total >= ctx.CA_TREE_LIMIT)
|
||||||
smitsya(ca_tree_limit_violation);
|
smitsya(ca_tree_limit_violation);
|
||||||
REGEX024_CollectionArrayNode* node = new REGEX024_CollectionArrayNode{key, val, ctx.active_thread.CAHptr, 1};
|
CollectionArrayNode* node = new CollectionArrayNode{key, val, ctx.active_thread.CAHptr, 1};
|
||||||
// if (ctx.active_thread.CAHptr)
|
// if (ctx.active_thread.CAHptr)
|
||||||
// (ctx.active_thread.CAHptr->refs)++;
|
// (ctx.active_thread.CAHptr->refs)++;
|
||||||
ctx.active_thread.CAHptr = node;
|
ctx.active_thread.CAHptr = node;
|
||||||
ctx.CAN_total++;
|
ctx.CAN_total++;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_MOV_COLARR_IMM(REGEX_IS024_CONTEXT& ctx){
|
void i_MOV_COLARR_IMM(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8)
|
check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8)
|
||||||
regex_tai_t ca_ind = ctx.extract_track_array_index();
|
tai_t ca_ind = ctx.extract_track_array_index();
|
||||||
uint64_t imm = ctx.extract_qw();
|
uint64_t imm = ctx.extract_qw();
|
||||||
ca_branch_new_node(ctx, ca_ind, imm);
|
ca_branch_new_node(ctx, ca_ind, imm);
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_MOV_COLARR_BTPOS(REGEX_IS024_CONTEXT& ctx){
|
void i_MOV_COLARR_BTPOS(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
||||||
regex_tai_t ca_ind = ctx.extract_track_array_index();
|
tai_t ca_ind = ctx.extract_track_array_index();
|
||||||
ca_branch_new_node(ctx, ca_ind, ctx.passed_bytes);
|
ca_branch_new_node(ctx, ca_ind, ctx.passed_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Can throw error, should be placed at the end. Call ONLY in general matching mode */
|
/* Can throw error, should be placed at the end. Call ONLY in general matching mode */
|
||||||
void edit_selection_array(REGEX_IS024_CONTEXT& ctx, uint64_t key, uint64_t val){
|
void edit_selection_array(VMContext& ctx, uint64_t key, uint64_t val){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
uint64_t N = ctx.selection_array_len;
|
uint64_t N = ctx.selection_array_len;
|
||||||
if (key >= N)
|
if (key >= N)
|
||||||
@ -382,41 +383,42 @@ void edit_selection_array(REGEX_IS024_CONTEXT& ctx, uint64_t key, uint64_t val){
|
|||||||
ctx.active_thread.SAptr[0]--;
|
ctx.active_thread.SAptr[0]--;
|
||||||
ctx.active_thread.SAptr = sa_instance;
|
ctx.active_thread.SAptr = sa_instance;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_MOV_SELARR_IMM(REGEX_IS024_CONTEXT& ctx){
|
void i_MOV_SELARR_IMM(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8)
|
check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8)
|
||||||
regex_tai_t sa_ind = ctx.extract_track_array_index();
|
tai_t sa_ind = ctx.extract_track_array_index();
|
||||||
uint64_t imm = ctx.extract_qw();
|
uint64_t imm = ctx.extract_qw();
|
||||||
edit_selection_array(ctx, sa_ind, imm);
|
edit_selection_array(ctx, sa_ind, imm);
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_MOV_SELARR_CHPOS(REGEX_IS024_CONTEXT& ctx){
|
void i_MOV_SELARR_CHPOS(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
general_matching_mode_check()
|
general_matching_mode_check()
|
||||||
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
|
||||||
regex_tai_t sa_ind = ctx.extract_track_array_index();
|
tai_t sa_ind = ctx.extract_track_array_index();
|
||||||
edit_selection_array(ctx, sa_ind, ctx.passed_chars);
|
edit_selection_array(ctx, sa_ind, ctx.passed_chars);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calloc_stack_slots(REGEX_IS024_Stack& stack, regex_sslot_id_t nmemb) {
|
void calloc_stack_slots(SSID_Stack& stack, sslot_id_t nmemb) {
|
||||||
assert(stack.sz == 0 && !stack.slots);
|
assert(stack.max_size == 0 && stack.sz == 0 && !stack.slots);
|
||||||
regex_sslot_id_t* storage = static_cast<regex_sslot_id_t *>(calloc(nmemb, sizeof(regex_sslot_id_t)));
|
sslot_id_t* storage = static_cast<sslot_id_t *>(calloc(nmemb, sizeof(sslot_id_t)));
|
||||||
if (!storage)
|
if (!storage)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
stack.slots = storage;
|
stack.slots = storage;
|
||||||
}
|
stack.max_size = nmemb;
|
||||||
|
}
|
||||||
|
|
||||||
REGEX_IS024_Thread* calloc_slots_array(regex_sslot_id_t nmemb) {
|
Thread* calloc_slots_array(sslot_id_t nmemb) {
|
||||||
REGEX_IS024_Thread* ptr = static_cast<REGEX_IS024_Thread *>(calloc(nmemb, sizeof(REGEX_IS024_Thread)));
|
Thread* ptr = static_cast<Thread *>(calloc(nmemb, sizeof(Thread)));
|
||||||
if (!ptr)
|
if (!ptr)
|
||||||
throw std::bad_alloc();
|
throw std::bad_alloc();
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_INIT(REGEX_IS024_CONTEXT& ctx){
|
void i_INIT(VMContext& ctx){
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
initialization_phase_check()
|
initialization_phase_check()
|
||||||
if (ctx.selection_array_len > ctx.SA_LEN_LIMIT)
|
if (ctx.selection_array_len > ctx.SA_LEN_LIMIT)
|
||||||
@ -440,21 +442,21 @@ void i_INIT(REGEX_IS024_CONTEXT& ctx){
|
|||||||
ctx.initialized = true;
|
ctx.initialized = true;
|
||||||
ctx.unnatural_started_thread_IP = ctx.active_thread.IP;
|
ctx.unnatural_started_thread_IP = ctx.active_thread.IP;
|
||||||
ctx.active_thread.delete_thread();
|
ctx.active_thread.delete_thread();
|
||||||
}
|
}
|
||||||
|
|
||||||
void i_THROW(REGEX_IS024_CONTEXT& ctx){
|
void i_THROW(VMContext& ctx){
|
||||||
ctx.error = regex024_error_codes::program_throw;
|
ctx.error = error_codes::program_throw;
|
||||||
}
|
}
|
||||||
|
|
||||||
void instruction_table(REGEX_IS024_CONTEXT &ctx) {
|
void instruction_table(VMContext &ctx) {
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
uint8_t opcode = ctx.extract_instruction();
|
uint8_t opcode = ctx.extract_instruction();
|
||||||
|
|
||||||
#define rcase(inst) case regex024_opcodes::inst: return i_ ## inst (ctx);
|
#define rcase(inst) case opcodes::inst: return i_ ## inst (ctx);
|
||||||
#define jumpC(UN, st) case regex024_opcodes::JC ## UN ## _B: return i_JC<st, immArgByte>(ctx); \
|
#define jumpC(UN, st) case opcodes::JC ## UN ## _B: return i_JC<st, immArgByte>(ctx); \
|
||||||
case regex024_opcodes::JC ## UN ## _W: return i_JC<st, immArgWord>(ctx); \
|
case opcodes::JC ## UN ## _W: return i_JC<st, immArgWord>(ctx); \
|
||||||
case regex024_opcodes::JC ## UN ## _DW: return i_JC<st, immArgDoubleWord>(ctx); \
|
case opcodes::JC ## UN ## _DW: return i_JC<st, immArgDoubleWord>(ctx); \
|
||||||
case regex024_opcodes::JC ## UN ## _QW: return i_JC<st, immArgQuadWord>(ctx);
|
case opcodes::JC ## UN ## _QW: return i_JC<st, immArgQuadWord>(ctx);
|
||||||
switch (opcode) {
|
switch (opcode) {
|
||||||
rcase(READ)
|
rcase(READ)
|
||||||
rcase(READZ)
|
rcase(READZ)
|
||||||
@ -486,6 +488,7 @@ void instruction_table(REGEX_IS024_CONTEXT &ctx) {
|
|||||||
rcase(INIT)
|
rcase(INIT)
|
||||||
rcase(THROW)
|
rcase(THROW)
|
||||||
default:
|
default:
|
||||||
ctx.error = regex024_error_codes::invalid_opcode;
|
ctx.error = error_codes::invalid_opcode;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -7,7 +7,7 @@
|
|||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#define smitsya(error_type) do {ctx.error = regex024_error_codes::error_type; return; } while (0)
|
#define smitsya(error_type) do {ctx.error = error_codes::error_type; return; } while (0)
|
||||||
|
|
||||||
#define SLOT_EMPTY_val 0
|
#define SLOT_EMPTY_val 0
|
||||||
#define SLOT_OCCUPIED 1
|
#define SLOT_OCCUPIED 1
|
||||||
@ -16,7 +16,7 @@
|
|||||||
#define SLOT_NEW_val (SLOT_OCCUPIED | SLOT_NEW)
|
#define SLOT_NEW_val (SLOT_OCCUPIED | SLOT_NEW)
|
||||||
|
|
||||||
#define check_available_prg(regionSz) if (!ctx.check_inboundness(regionSz)){ \
|
#define check_available_prg(regionSz) if (!ctx.check_inboundness(regionSz)){ \
|
||||||
ctx.error = regex024_error_codes::improper_finish; return; }
|
ctx.error = error_codes::improper_finish; return; }
|
||||||
|
|
||||||
|
|
||||||
#if defined(LIBREGEXIS024_DEBUG) && defined(LIBREGEXIS024_ALLOW_LOUD)
|
#if defined(LIBREGEXIS024_DEBUG) && defined(LIBREGEXIS024_ALLOW_LOUD)
|
||||||
@ -30,6 +30,8 @@
|
|||||||
#define thread_print_debug(thread)
|
#define thread_print_debug(thread)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void instruction_table(REGEX_IS024_CONTEXT& ctx);
|
namespace regexis024 {
|
||||||
|
void instruction_table(VMContext& ctx);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_INSTRUCTION_IMPLEMENTATION_H
|
#endif //LIBREGEXIS024_INSTRUCTION_IMPLEMENTATION_H
|
@ -1,10 +1,10 @@
|
|||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
#include <libregexis024vm/utils.h>
|
#include <libregexis024vm/utils.h>
|
||||||
|
|
||||||
#define rcase(name) case regex024_opcodes::name: return #name;
|
namespace regexis024 {
|
||||||
|
const char *opcode_to_str(opcode_t x) {
|
||||||
const char *regex024_opcode_tostr(regex024_opcode x) {
|
|
||||||
switch (x) {
|
switch (x) {
|
||||||
|
#define rcase(name) case opcodes::name: return #name;
|
||||||
rcase(READ)
|
rcase(READ)
|
||||||
rcase(READZ)
|
rcase(READZ)
|
||||||
rcase(JUMP)
|
rcase(JUMP)
|
||||||
@ -44,4 +44,5 @@ const char *regex024_opcode_tostr(regex024_opcode x) {
|
|||||||
default:
|
default:
|
||||||
return "Invalid opcode";
|
return "Invalid opcode";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,13 +2,13 @@
|
|||||||
#define LIBREGEXIS024_LIBREGEXIS024VM_H
|
#define LIBREGEXIS024_LIBREGEXIS024VM_H
|
||||||
|
|
||||||
/* This thing is bloated. And slow (Because I designed it imperfectly and because it is bloated).
|
/* This thing is bloated. And slow (Because I designed it imperfectly and because it is bloated).
|
||||||
* I could have halven the amount of bloat, but that would require me writing code in headers.
|
* I could have halven the amount of bloat, but that would require me writing code in headers.
|
||||||
* I am gonna use it for KM, even more bloated project. So I thought that this design is on the spot.
|
* I am gonna use it for KM, even more bloated project. So I thought that this design is on the spot.
|
||||||
* C++ is such a funny language. Code is divided into .cpp and .h files. But it only makes problems.
|
* C++ is such a funny language. Code is divided into .cpp and .h files. But it only makes problems.
|
||||||
* All of my work on this C++ project was not serious from the beginning. It's all funny stuff. */
|
* All of my work on this C++ project was not serious from the beginning. It's all funny stuff. */
|
||||||
|
|
||||||
/* Also, please, consider using libregexis024vm/libregexis024vm_interface.h
|
/* Also, please, consider using libregexis024vm/libregexis024vm_interface.h
|
||||||
* Naming in this project is super inconsistent. I don't want it to trash your namespace */
|
* Naming in this project is super inconsistent. I don't want it to trash your namespace */
|
||||||
|
|
||||||
#include <libregexis024vm/vm_errno.h>
|
#include <libregexis024vm/vm_errno.h>
|
||||||
#include <libregexis024vm/utils.h>
|
#include <libregexis024vm/utils.h>
|
||||||
@ -16,37 +16,38 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
struct REGEX_IS024_Stack{
|
namespace regexis024 {
|
||||||
regex_sslot_id_t* slots = NULL;
|
struct SSID_Stack{
|
||||||
regex_sslot_id_t sz = 0;
|
sslot_id_t* slots = NULL;
|
||||||
|
sslot_id_t max_size = 0;
|
||||||
|
sslot_id_t sz = 0;
|
||||||
|
|
||||||
regex_sslot_id_t pop();
|
sslot_id_t pop();
|
||||||
void append(regex_sslot_id_t x);
|
void append(sslot_id_t x);
|
||||||
bool empty() const;
|
bool empty() const;
|
||||||
bool non_empty() const;
|
|
||||||
|
|
||||||
REGEX_IS024_Stack(const REGEX_IS024_Stack&) = delete;
|
SSID_Stack(const SSID_Stack&) = delete;
|
||||||
REGEX_IS024_Stack& operator=(const REGEX_IS024_Stack&) = delete;
|
SSID_Stack& operator=(const SSID_Stack&) = delete;
|
||||||
REGEX_IS024_Stack() = default;
|
SSID_Stack() = default;
|
||||||
|
|
||||||
~REGEX_IS024_Stack();
|
~SSID_Stack();
|
||||||
};
|
};
|
||||||
|
|
||||||
struct REGEX024_CollectionArrayNode{
|
struct CollectionArrayNode{
|
||||||
/* Key is small for historical reasons I do not rememeber. Who cares anyway */
|
/* Key is small for historical reasons I do not rememeber. Who cares anyway */
|
||||||
regex_tai_t key;
|
tai_t key;
|
||||||
uint64_t value;
|
uint64_t value;
|
||||||
/* NULL at the beginning */
|
/* NULL at the beginning */
|
||||||
REGEX024_CollectionArrayNode* prev;
|
CollectionArrayNode* prev;
|
||||||
/* Reference counting */
|
/* Reference counting */
|
||||||
uint64_t refs = 0;
|
uint64_t refs = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct REGEX_IS024_Thread{
|
struct Thread{
|
||||||
/* First byte field is used only when thread is located in slot */
|
/* First byte field is used only when thread is located in slot */
|
||||||
uint8_t slot_occupation_status = 0;
|
uint8_t slot_occupation_status = 0;
|
||||||
regex_near_ptr_t IP = 0;
|
near_ptr_t IP = 0;
|
||||||
REGEX024_CollectionArrayNode* CAHptr = NULL;
|
CollectionArrayNode* CAHptr = NULL;
|
||||||
/* Pointer to the seletion array. SA's are reference counted. Because of that every SA
|
/* Pointer to the seletion array. SA's are reference counted. Because of that every SA
|
||||||
* is elongated by one meta element in the beginning - reference counter. So the actual elements
|
* is elongated by one meta element in the beginning - reference counter. So the actual elements
|
||||||
* are enumerated starting from one. */
|
* are enumerated starting from one. */
|
||||||
@ -54,20 +55,20 @@ struct REGEX_IS024_Thread{
|
|||||||
|
|
||||||
void delete_thread() noexcept;
|
void delete_thread() noexcept;
|
||||||
void debug_print(const char* place);
|
void debug_print(const char* place);
|
||||||
};
|
};
|
||||||
|
|
||||||
struct REGEX_IS024_CONTEXT{
|
struct VMContext{
|
||||||
REGEX_IS024_CONTEXT(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, regex_tai_t saLenLimit,
|
VMContext(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, tai_t saLenLimit,
|
||||||
regex_sslot_id_t readSsLimit, regex_sslot_id_t forkSsLimit, uint64_t timeTickLimit);
|
sslot_id_t readSsLimit, sslot_id_t forkSsLimit, uint64_t timeTickLimit);
|
||||||
|
|
||||||
regex024_error_code feedSOF();
|
error_code_t feedSOF();
|
||||||
/* You can safely pile up calls to this command, nothing bad will happen */
|
/* You can safely pile up calls to this command, nothing bad will happen */
|
||||||
regex024_error_code startThread();
|
error_code_t startThread();
|
||||||
regex024_error_code extendedFeedCharacter(uint64_t input);
|
error_code_t extendedFeedCharacter(uint64_t input);
|
||||||
regex024_error_code feedCharacter(uint64_t INP, uint64_t corresponding_byte_amount);
|
error_code_t feedCharacter(uint64_t INP, uint64_t corresponding_byte_amount);
|
||||||
|
|
||||||
|
|
||||||
~REGEX_IS024_CONTEXT();
|
~VMContext();
|
||||||
|
|
||||||
/* Program size larger than 2^62 is forbidden */
|
/* Program size larger than 2^62 is forbidden */
|
||||||
size_t program_size = 0;
|
size_t program_size = 0;
|
||||||
@ -78,9 +79,9 @@ struct REGEX_IS024_CONTEXT{
|
|||||||
/* CA = Collecton array. */
|
/* CA = Collecton array. */
|
||||||
uint64_t CA_TREE_LIMIT;
|
uint64_t CA_TREE_LIMIT;
|
||||||
/* SA = Selection array */
|
/* SA = Selection array */
|
||||||
regex_tai_t SA_LEN_LIMIT;
|
tai_t SA_LEN_LIMIT;
|
||||||
regex_sslot_id_t READ_SS_LIMIT;
|
sslot_id_t READ_SS_LIMIT;
|
||||||
regex_sslot_id_t FORK_SS_LIMIT;
|
sslot_id_t FORK_SS_LIMIT;
|
||||||
|
|
||||||
/* If time_tick_limit is non-zero, regex virtual machine will stop with error
|
/* If time_tick_limit is non-zero, regex virtual machine will stop with error
|
||||||
* after this many ticks. This parameter set's the timeout.*/
|
* after this many ticks. This parameter set's the timeout.*/
|
||||||
@ -93,38 +94,38 @@ struct REGEX_IS024_CONTEXT{
|
|||||||
uint64_t CAN_total = 0;
|
uint64_t CAN_total = 0;
|
||||||
|
|
||||||
/* Program selects it */
|
/* Program selects it */
|
||||||
regex_tai_t selection_array_len = 0;
|
tai_t selection_array_len = 0;
|
||||||
regex_sslot_id_t read_slots_number = 0;
|
sslot_id_t read_slots_number = 0;
|
||||||
regex_sslot_id_t fork_slots_number = 0;
|
sslot_id_t fork_slots_number = 0;
|
||||||
|
|
||||||
bool have_sift_function = false;
|
bool have_sift_function = false;
|
||||||
regex_near_ptr_t sift_function;
|
near_ptr_t sift_function;
|
||||||
|
|
||||||
bool allows_multistart = false;
|
bool allows_multistart = false;
|
||||||
uint8_t fed_input_extends_left = 0, fed_input_extends_right = 0;
|
uint8_t fed_input_extends_left = 0, fed_input_extends_right = 0;
|
||||||
regex_sslot_id_t portion_of_second_read_halt_ns = 0, portion_of_FIRST_read_halt_ns = 0;
|
sslot_id_t portion_of_second_read_halt_ns = 0, portion_of_FIRST_read_halt_ns = 0;
|
||||||
|
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
regex_near_ptr_t unnatural_started_thread_IP = 1337;
|
near_ptr_t unnatural_started_thread_IP = 1337;
|
||||||
regex024_error_code error = regex024_error_codes::stable;
|
error_code_t error = error_codes::stable;
|
||||||
|
|
||||||
REGEX_IS024_Thread* READ_halted_slots;
|
Thread* READ_halted_slots;
|
||||||
REGEX_IS024_Stack READ_halted_stack_old;
|
SSID_Stack READ_halted_stack_old;
|
||||||
REGEX_IS024_Stack READ_halted_stack_new_first;
|
SSID_Stack READ_halted_stack_new_first;
|
||||||
REGEX_IS024_Stack READ_halted_stack_new_second;
|
SSID_Stack READ_halted_stack_new_second;
|
||||||
REGEX_IS024_Thread* FORK_halted_slots;
|
Thread* FORK_halted_slots;
|
||||||
REGEX_IS024_Stack FORK_halted_stack;
|
SSID_Stack FORK_halted_stack;
|
||||||
|
|
||||||
REGEX_IS024_Thread active_thread;
|
Thread active_thread;
|
||||||
|
|
||||||
/* Environment for sifting stuff */
|
/* Environment for sifting stuff */
|
||||||
REGEX_IS024_Thread* sifting_with = NULL;
|
Thread* sifting_with = NULL;
|
||||||
/* specifies the type of operation vm should do after shift (there are only two distinct options) */
|
/* specifies the type of operation vm should do after shift (there are only two distinct options) */
|
||||||
uint8_t who_started_sift;
|
uint8_t who_started_sift;
|
||||||
/* Sifting process uses IP field of active thread. Other data of thread is not modified or used during collision
|
/* Sifting process uses IP field of active thread. Other data of thread is not modified or used during collision
|
||||||
* procudure. Old IP is stored there, if needed */
|
* procudure. Old IP is stored there, if needed */
|
||||||
regex_near_ptr_t child_ret_IP;
|
near_ptr_t child_ret_IP;
|
||||||
regex_near_ptr_t intruder_IP;
|
near_ptr_t intruder_IP;
|
||||||
/* RAX corresponds to intruder. Its data is stored in active thread field*/
|
/* RAX corresponds to intruder. Its data is stored in active thread field*/
|
||||||
uint64_t RAX;
|
uint64_t RAX;
|
||||||
/* RBX corresponds to homesteader. Its data is accessible by `REGEX_IS024_Thread* sifting_with` pointer*/
|
/* RBX corresponds to homesteader. Its data is accessible by `REGEX_IS024_Thread* sifting_with` pointer*/
|
||||||
@ -132,7 +133,7 @@ struct REGEX_IS024_CONTEXT{
|
|||||||
|
|
||||||
/* Will be unoccupied if no threads matched. After each feed of character this field will be wiped
|
/* Will be unoccupied if no threads matched. After each feed of character this field will be wiped
|
||||||
* User should take care of intermediate success himself */
|
* User should take care of intermediate success himself */
|
||||||
REGEX_IS024_Thread matched_thread;
|
Thread matched_thread;
|
||||||
|
|
||||||
uint64_t INP = 0;
|
uint64_t INP = 0;
|
||||||
uint64_t passed_chars = 0;
|
uint64_t passed_chars = 0;
|
||||||
@ -148,11 +149,11 @@ struct REGEX_IS024_CONTEXT{
|
|||||||
uint64_t extract_qw();
|
uint64_t extract_qw();
|
||||||
|
|
||||||
uint8_t extract_instruction();
|
uint8_t extract_instruction();
|
||||||
regex_sslot_id_t extract_sslot_id();
|
sslot_id_t extract_sslot_id();
|
||||||
regex_near_ptr_t extract_near_pointer();
|
near_ptr_t extract_near_pointer();
|
||||||
regex_tai_t extract_track_array_index();
|
tai_t extract_track_array_index();
|
||||||
|
|
||||||
void debug_print(const char* place);
|
void debug_print(const char* place);
|
||||||
};
|
};
|
||||||
|
}
|
||||||
#endif //LIBREGEXIS024_LIBREGEXIS024VM_H
|
#endif //LIBREGEXIS024_LIBREGEXIS024VM_H
|
||||||
|
@ -1,54 +1,54 @@
|
|||||||
|
#include <stdexcept>
|
||||||
#include <libregexis024vm/libregexis024vm.h>
|
#include <libregexis024vm/libregexis024vm.h>
|
||||||
#include <libregexis024vm/instruction_implementation.h>
|
#include <libregexis024vm/instruction_implementation.h>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
regex_sslot_id_t REGEX_IS024_Stack::pop() {
|
namespace regexis024 {
|
||||||
|
sslot_id_t SSID_Stack::pop() {
|
||||||
assert(sz != 0);
|
assert(sz != 0);
|
||||||
return slots[--sz];
|
return slots[--sz];
|
||||||
}
|
}
|
||||||
|
|
||||||
void REGEX_IS024_Stack::append(regex_sslot_id_t x) {
|
void SSID_Stack::append(sslot_id_t x) {
|
||||||
|
assert(max_size > 0);
|
||||||
assert(slots);
|
assert(slots);
|
||||||
|
assert(sz < max_size);
|
||||||
slots[sz] = x;
|
slots[sz] = x;
|
||||||
sz++;
|
sz++;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool REGEX_IS024_Stack::empty() const {
|
bool SSID_Stack::empty() const {
|
||||||
return !non_empty();
|
return sz == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool REGEX_IS024_Stack::non_empty() const {
|
SSID_Stack::~SSID_Stack() {
|
||||||
return sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
REGEX_IS024_Stack::~REGEX_IS024_Stack() {
|
|
||||||
assert(empty());
|
assert(empty());
|
||||||
free(slots);
|
free(slots);
|
||||||
}
|
}
|
||||||
|
|
||||||
REGEX_IS024_CONTEXT::REGEX_IS024_CONTEXT(size_t programSize, const uint8_t *data,
|
VMContext::VMContext(size_t programSize, const uint8_t *data,
|
||||||
uint64_t caTreeLimit, regex_tai_t saLenLimit,
|
uint64_t caTreeLimit, tai_t saLenLimit,
|
||||||
regex_sslot_id_t readSsLimit, regex_sslot_id_t forkSsLimit,
|
sslot_id_t readSsLimit, sslot_id_t forkSsLimit,
|
||||||
uint64_t timeTickLimit) :
|
uint64_t timeTickLimit) :
|
||||||
program_size(programSize), prg(data), CA_TREE_LIMIT(caTreeLimit), SA_LEN_LIMIT(saLenLimit),
|
program_size(programSize), prg(data), CA_TREE_LIMIT(caTreeLimit), SA_LEN_LIMIT(saLenLimit),
|
||||||
READ_SS_LIMIT(readSsLimit), FORK_SS_LIMIT(forkSsLimit), time_tick_limit(timeTickLimit)
|
READ_SS_LIMIT(readSsLimit), FORK_SS_LIMIT(forkSsLimit), time_tick_limit(timeTickLimit)
|
||||||
{
|
{
|
||||||
if (program_size > (1UL << 62))
|
if (program_size > (1UL << 62))
|
||||||
exitf("Program is too huge\n");
|
throw std::runtime_error("Program is too big");
|
||||||
active_thread.slot_occupation_status = SLOT_OCCUPIED;
|
active_thread.slot_occupation_status = SLOT_OCCUPIED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* No only will it launch a wave of deallocation in CA tree, but as a nice bonus it's
|
/* No only will it launch a wave of deallocation in CA tree, but as a nice bonus it's
|
||||||
* gonna deoccupy slot_occupation_status*/
|
* gonna deoccupy slot_occupation_status*/
|
||||||
void REGEX_IS024_Thread::delete_thread() noexcept {
|
void Thread::delete_thread() noexcept {
|
||||||
thread_print_debug(*this);
|
thread_print_debug(*this);
|
||||||
my_assert(slot_occupation_status & SLOT_OCCUPIED);
|
my_assert(slot_occupation_status & SLOT_OCCUPIED);
|
||||||
slot_occupation_status = SLOT_EMPTY_val;
|
slot_occupation_status = SLOT_EMPTY_val;
|
||||||
REGEX024_CollectionArrayNode* cur_CAptr = CAHptr;
|
CollectionArrayNode* cur_CAptr = CAHptr;
|
||||||
while (cur_CAptr){
|
while (cur_CAptr){
|
||||||
assert(cur_CAptr->refs > 0);
|
assert(cur_CAptr->refs > 0);
|
||||||
if (--(cur_CAptr->refs) == 0){
|
if (--(cur_CAptr->refs) == 0){
|
||||||
REGEX024_CollectionArrayNode* next_CAptr = cur_CAptr->prev;
|
CollectionArrayNode* next_CAptr = cur_CAptr->prev;
|
||||||
delete cur_CAptr;
|
delete cur_CAptr;
|
||||||
cur_CAptr = next_CAptr;
|
cur_CAptr = next_CAptr;
|
||||||
} else
|
} else
|
||||||
@ -58,33 +58,33 @@ void REGEX_IS024_Thread::delete_thread() noexcept {
|
|||||||
if (--(SAptr[0]) == 0)
|
if (--(SAptr[0]) == 0)
|
||||||
free(SAptr);
|
free(SAptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void emptify_one_of_new_read_halted_stacks(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Stack& type_new_stack){
|
void emptify_one_of_new_read_halted_stacks(VMContext& ctx, SSID_Stack& type_new_stack){
|
||||||
while (type_new_stack.non_empty()){
|
while (!type_new_stack.empty()){
|
||||||
REGEX_IS024_Thread& thread = ctx.READ_halted_slots[type_new_stack.pop()];
|
Thread& thread = ctx.READ_halted_slots[type_new_stack.pop()];
|
||||||
assert(thread.slot_occupation_status & SLOT_OCCUPIED);
|
assert(thread.slot_occupation_status & SLOT_OCCUPIED);
|
||||||
thread.delete_thread();
|
thread.delete_thread();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* First it will try to pop pending thread from FORK_halted_stack
|
/* First it will try to pop pending thread from FORK_halted_stack
|
||||||
* Then it will try popping thread from READ_halted_stack_old (checking if top
|
* Then it will try popping thread from READ_halted_stack_old (checking if top
|
||||||
* thread here is not actually SLOT_NEW). If something succeded, corresponding slot will be deoccupied, and
|
* thread here is not actually SLOT_NEW). If something succeded, corresponding slot will be deoccupied, and
|
||||||
* active slot will be occupied with it.
|
* active slot will be occupied with it.
|
||||||
*
|
*
|
||||||
* try_to_continue_scheduled() assumes that active thread is unoccupied.*/
|
* try_to_continue_scheduled() assumes that active thread is unoccupied.*/
|
||||||
void REGEX_IS024_CONTEXT::try_to_continue_scheduled(){
|
void VMContext::try_to_continue_scheduled(){
|
||||||
ctx_print_debug(*this);
|
ctx_print_debug(*this);
|
||||||
my_assert(!(active_thread.slot_occupation_status & SLOT_OCCUPIED));
|
my_assert(!(active_thread.slot_occupation_status & SLOT_OCCUPIED));
|
||||||
if (FORK_halted_stack.sz){
|
if (FORK_halted_stack.sz){
|
||||||
regex_sslot_id_t ssid = FORK_halted_stack.pop();
|
sslot_id_t ssid = FORK_halted_stack.pop();
|
||||||
active_thread = FORK_halted_slots[ssid];
|
active_thread = FORK_halted_slots[ssid];
|
||||||
FORK_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val;
|
FORK_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
while (READ_halted_stack_old.sz){
|
while (READ_halted_stack_old.sz){
|
||||||
regex_sslot_id_t ssid = READ_halted_stack_old.pop();
|
sslot_id_t ssid = READ_halted_stack_old.pop();
|
||||||
if (READ_halted_slots[ssid].slot_occupation_status & SLOT_NEW){
|
if (READ_halted_slots[ssid].slot_occupation_status & SLOT_NEW){
|
||||||
/* This is the case when old thread was silently replaced by settled new thread */
|
/* This is the case when old thread was silently replaced by settled new thread */
|
||||||
continue;
|
continue;
|
||||||
@ -94,30 +94,30 @@ void REGEX_IS024_CONTEXT::try_to_continue_scheduled(){
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* Failure here will be detected. We started with unoccupied active thread. iterator inside kick will see it */
|
/* Failure here will be detected. We started with unoccupied active thread. iterator inside kick will see it */
|
||||||
}
|
}
|
||||||
|
|
||||||
void kick(REGEX_IS024_CONTEXT& ctx) {
|
void kick(VMContext& ctx) {
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
while ((ctx.active_thread.slot_occupation_status & SLOT_OCCUPIED)
|
while ((ctx.active_thread.slot_occupation_status & SLOT_OCCUPIED)
|
||||||
&& ctx.error == regex024_error_codes::stable){
|
&& ctx.error == error_codes::stable){
|
||||||
if (ctx.timer >= ctx.time_tick_limit)
|
if (ctx.timer >= ctx.time_tick_limit)
|
||||||
smitsya(timeout);
|
smitsya(timeout);
|
||||||
ctx.timer++;
|
ctx.timer++;
|
||||||
|
|
||||||
check_available_prg(REGEX024_BYTECODE_INSTRUCTION_SZ) // May return from kick(ctx)
|
check_available_prg(BYTECODE_INSTRUCTION_SZ) // May return from kick(ctx)
|
||||||
// smivanie from those instructions will be immediately detected. Everything is OK
|
// smivanie from those instructions will be immediately detected. Everything is OK
|
||||||
instruction_table(ctx);
|
instruction_table(ctx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_CONTEXT::feedSOF() {
|
error_code_t VMContext::feedSOF() {
|
||||||
ctx_print_debug(*this);
|
ctx_print_debug(*this);
|
||||||
kick(*this);
|
kick(*this);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_CONTEXT::startThread() {
|
error_code_t VMContext::startThread() {
|
||||||
ctx_print_debug(*this);
|
ctx_print_debug(*this);
|
||||||
active_thread.slot_occupation_status = SLOT_OCCUPIED;
|
active_thread.slot_occupation_status = SLOT_OCCUPIED;
|
||||||
active_thread.IP = unnatural_started_thread_IP;
|
active_thread.IP = unnatural_started_thread_IP;
|
||||||
@ -125,31 +125,22 @@ regex024_error_code REGEX_IS024_CONTEXT::startThread() {
|
|||||||
active_thread.CAHptr = NULL;
|
active_thread.CAHptr = NULL;
|
||||||
kick(*this);
|
kick(*this);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* I hate C++ (aka antichrist), won't use move sementic (aka drink cornsyrup) */
|
void fill_empty_old_read_halted_stack(VMContext& ctx, SSID_Stack& read_halted_stack_new){
|
||||||
void swap_stacks(REGEX_IS024_Stack& A, REGEX_IS024_Stack& B) {
|
|
||||||
std::swap(A.sz, B.sz);
|
|
||||||
std::swap(A.slots, B.slots);
|
|
||||||
}
|
|
||||||
|
|
||||||
void fill_empty_old_read_halted_stack(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Stack& read_halted_stack_new){
|
|
||||||
ctx_print_debug(ctx);
|
ctx_print_debug(ctx);
|
||||||
my_assert(!ctx.READ_halted_stack_old.non_empty());
|
|
||||||
|
|
||||||
// Actually, READ_halted_stack_old is always empty in this case
|
// Actually, READ_halted_stack_old is always empty in this case
|
||||||
assert(ctx.READ_halted_stack_old.empty());
|
assert(ctx.READ_halted_stack_old.empty());
|
||||||
swap_stacks(ctx.READ_halted_stack_old, read_halted_stack_new);
|
while (!read_halted_stack_new.empty()) {
|
||||||
for (uint32_t i = 0; i < ctx.READ_halted_stack_old.sz; i++){
|
sslot_id_t sr = read_halted_stack_new.pop();
|
||||||
REGEX_IS024_Thread& slot = ctx.READ_halted_slots[ctx.READ_halted_stack_old.slots[i]];
|
Thread& slot = ctx.READ_halted_slots[sr];
|
||||||
/* Should get rid of 'NEW' qualifier */
|
assert(slot.slot_occupation_status & SLOT_NEW_val);
|
||||||
assert(slot.slot_occupation_status & SLOT_OCCUPIED);
|
slot.slot_occupation_status = SLOT_OCCUPIED_val;
|
||||||
if (slot.slot_occupation_status & SLOT_OCCUPIED)
|
ctx.READ_halted_stack_old.append(sr);
|
||||||
slot.slot_occupation_status = SLOT_OCCUPIED;
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_CONTEXT::feedCharacter(uint64_t input, uint64_t corresponding_byte_amount) {
|
error_code_t VMContext::feedCharacter(uint64_t input, uint64_t corresponding_byte_amount) {
|
||||||
ctx_print_debug(*this);
|
ctx_print_debug(*this);
|
||||||
if (matched_thread.slot_occupation_status & SLOT_OCCUPIED)
|
if (matched_thread.slot_occupation_status & SLOT_OCCUPIED)
|
||||||
matched_thread.delete_thread();
|
matched_thread.delete_thread();
|
||||||
@ -161,9 +152,9 @@ regex024_error_code REGEX_IS024_CONTEXT::feedCharacter(uint64_t input, uint64_t
|
|||||||
try_to_continue_scheduled();
|
try_to_continue_scheduled();
|
||||||
kick(*this);
|
kick(*this);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_CONTEXT::extendedFeedCharacter(uint64_t input) {
|
error_code_t VMContext::extendedFeedCharacter(uint64_t input) {
|
||||||
ctx_print_debug(*this);
|
ctx_print_debug(*this);
|
||||||
if (matched_thread.slot_occupation_status & SLOT_OCCUPIED)
|
if (matched_thread.slot_occupation_status & SLOT_OCCUPIED)
|
||||||
matched_thread.delete_thread();
|
matched_thread.delete_thread();
|
||||||
@ -172,21 +163,21 @@ regex024_error_code REGEX_IS024_CONTEXT::extendedFeedCharacter(uint64_t input) {
|
|||||||
try_to_continue_scheduled();
|
try_to_continue_scheduled();
|
||||||
kick(*this);
|
kick(*this);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
REGEX_IS024_CONTEXT::~REGEX_IS024_CONTEXT() {
|
VMContext::~VMContext() {
|
||||||
ctx_print_debug(*this);
|
ctx_print_debug(*this);
|
||||||
if (initialized){
|
if (initialized){
|
||||||
emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_first);
|
emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_first);
|
||||||
emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_second);
|
emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_second);
|
||||||
while (READ_halted_stack_old.non_empty()){
|
while (!READ_halted_stack_old.empty()){
|
||||||
REGEX_IS024_Thread& thread = READ_halted_slots[READ_halted_stack_old.pop()];
|
Thread& thread = READ_halted_slots[READ_halted_stack_old.pop()];
|
||||||
assert(thread.slot_occupation_status & SLOT_OCCUPIED);
|
assert(thread.slot_occupation_status & SLOT_OCCUPIED);
|
||||||
if (!(thread.slot_occupation_status & SLOT_NEW))
|
if (!(thread.slot_occupation_status & SLOT_NEW))
|
||||||
thread.delete_thread();
|
thread.delete_thread();
|
||||||
}
|
}
|
||||||
free(READ_halted_slots);
|
free(READ_halted_slots);
|
||||||
while (FORK_halted_stack.non_empty())
|
while (!FORK_halted_stack.empty())
|
||||||
FORK_halted_slots[FORK_halted_stack.pop()].delete_thread();
|
FORK_halted_slots[FORK_halted_stack.pop()].delete_thread();
|
||||||
free(FORK_halted_slots);
|
free(FORK_halted_slots);
|
||||||
|
|
||||||
@ -194,4 +185,5 @@ REGEX_IS024_CONTEXT::~REGEX_IS024_CONTEXT() {
|
|||||||
matched_thread.delete_thread();
|
matched_thread.delete_thread();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,38 +1,40 @@
|
|||||||
#include <libregexis024vm/libregexis024vm.h>
|
#include <libregexis024vm/libregexis024vm.h>
|
||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
|
|
||||||
bool REGEX_IS024_CONTEXT::check_inboundness(int region){
|
namespace regexis024 {
|
||||||
|
bool VMContext::check_inboundness(int region){
|
||||||
return vmprog_check_inboundness(program_size, active_thread.IP, region);
|
return vmprog_check_inboundness(program_size, active_thread.IP, region);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t REGEX_IS024_CONTEXT::extract_b() {
|
uint8_t VMContext::extract_b() {
|
||||||
return vmprog_extract_b(&active_thread.IP, prg);
|
return vmprog_extract_b(&active_thread.IP, prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t REGEX_IS024_CONTEXT::extract_w() {
|
uint16_t VMContext::extract_w() {
|
||||||
return vmprog_extract_w(&active_thread.IP, prg);
|
return vmprog_extract_w(&active_thread.IP, prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t REGEX_IS024_CONTEXT::extract_dw() {
|
uint32_t VMContext::extract_dw() {
|
||||||
return vmprog_extract_dw(&active_thread.IP, prg);
|
return vmprog_extract_dw(&active_thread.IP, prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t REGEX_IS024_CONTEXT::extract_qw() {
|
uint64_t VMContext::extract_qw() {
|
||||||
return vmprog_extract_qw(&active_thread.IP, prg);
|
return vmprog_extract_qw(&active_thread.IP, prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t REGEX_IS024_CONTEXT::extract_instruction() {
|
uint8_t VMContext::extract_instruction() {
|
||||||
return extract_b();
|
return extract_b();
|
||||||
}
|
}
|
||||||
|
|
||||||
regex_sslot_id_t REGEX_IS024_CONTEXT::extract_sslot_id() {
|
sslot_id_t VMContext::extract_sslot_id() {
|
||||||
return extract_dw();
|
return extract_dw();
|
||||||
}
|
}
|
||||||
|
|
||||||
regex_near_ptr_t REGEX_IS024_CONTEXT::extract_near_pointer() {
|
near_ptr_t VMContext::extract_near_pointer() {
|
||||||
return extract_qw();
|
return extract_qw();
|
||||||
}
|
}
|
||||||
|
|
||||||
regex_tai_t REGEX_IS024_CONTEXT::extract_track_array_index() {
|
tai_t VMContext::extract_track_array_index() {
|
||||||
return extract_w();
|
return extract_w();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,105 +1,106 @@
|
|||||||
|
#include <stdexcept>
|
||||||
#include <libregexis024vm/libregexis024vm_interface.h>
|
#include <libregexis024vm/libregexis024vm_interface.h>
|
||||||
#include <libregexis024vm/libregexis024vm.h>
|
#include <libregexis024vm/libregexis024vm.h>
|
||||||
#include <libregexis024vm/instruction_implementation.h>
|
#include <libregexis024vm/instruction_implementation.h>
|
||||||
|
|
||||||
bool REGEX_IS024_CAEvent::operator==(const REGEX_IS024_CAEvent &other) const {
|
namespace regexis024 {
|
||||||
|
bool CAEvent::operator==(const CAEvent &other) const {
|
||||||
return (key == other.key) && (value == other.value);
|
return (key == other.key) && (value == other.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define reveal ((REGEX_IS024_CONTEXT*)opaque)
|
#define reveal ((VMContext*)opaque)
|
||||||
|
|
||||||
REGEX_IS024_VirtualMachine::REGEX_IS024_VirtualMachine(size_t programSize, const uint8_t *data,
|
VirtualMachine::VirtualMachine(size_t programSize, const uint8_t *data,
|
||||||
uint64_t caTreeLimit, regex_tai_t saLenLimit,
|
uint64_t caTreeLimit, tai_t saLenLimit,
|
||||||
regex_sslot_id_t readSsLimit, regex_sslot_id_t forkSsLimit,
|
sslot_id_t readSsLimit, sslot_id_t forkSsLimit,
|
||||||
uint64_t timeTickLimit) {
|
uint64_t timeTickLimit) {
|
||||||
opaque = new REGEX_IS024_CONTEXT(programSize, data, caTreeLimit, saLenLimit,
|
opaque = new VMContext(programSize, data, caTreeLimit, saLenLimit,
|
||||||
readSsLimit, forkSsLimit, timeTickLimit);
|
readSsLimit, forkSsLimit, timeTickLimit);
|
||||||
}
|
}
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_VirtualMachine::initialize() {
|
error_code_t VirtualMachine::initialize() {
|
||||||
if (gave_SOF)
|
if (gave_SOF)
|
||||||
exitf("double feedSOF\n");
|
throw std::runtime_error("double feedSOF\n");
|
||||||
gave_SOF = true;
|
gave_SOF = true;
|
||||||
return reveal->feedSOF();
|
return reveal->feedSOF();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool REGEX_IS024_VirtualMachine::isInitialized() {
|
bool VirtualMachine::isInitialized() {
|
||||||
return reveal->initialized;
|
return reveal->initialized;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool REGEX_IS024_VirtualMachine::isUsable() {
|
bool VirtualMachine::isUsable() {
|
||||||
return isInitialized() && reveal->error == regex024_error_codes::stable;
|
return isInitialized() && reveal->error == error_codes::stable;
|
||||||
}
|
}
|
||||||
|
|
||||||
REGEX_IS024_VirtualMachine::~REGEX_IS024_VirtualMachine() {
|
VirtualMachine::~VirtualMachine() {
|
||||||
delete reveal;
|
delete reveal;
|
||||||
}
|
}
|
||||||
|
|
||||||
regex_tai_t REGEX_IS024_VirtualMachine::getSelectionArrayLength() {
|
tai_t VirtualMachine::getSelectionArrayLength() {
|
||||||
return isUsable() ? reveal->selection_array_len : 0;
|
return isUsable() ? reveal->selection_array_len : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool REGEX_IS024_VirtualMachine::isAllowMultistart() {
|
bool VirtualMachine::isAllowMultistart() {
|
||||||
return isUsable() ? reveal->allows_multistart : false;
|
return isUsable() ? reveal->allows_multistart : false;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t REGEX_IS024_VirtualMachine::getInputLeftExtensionSize() {
|
uint8_t VirtualMachine::getInputLeftExtensionSize() {
|
||||||
return isUsable() ? reveal->fed_input_extends_left : 0;
|
return isUsable() ? reveal->fed_input_extends_left : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t REGEX_IS024_VirtualMachine::getInputRightExtensionSize() {
|
uint8_t VirtualMachine::getInputRightExtensionSize() {
|
||||||
return isUsable() ? reveal->fed_input_extends_right : 0;
|
return isUsable() ? reveal->fed_input_extends_right : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_VirtualMachine::getErrno() {
|
error_code_t VirtualMachine::getErrno() {
|
||||||
return reveal->error;
|
return reveal->error;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Stupid kinda function. Checks if somebody is ready to continue reading the actual string */
|
/* Stupid kinda function. Checks if somebody is ready to continue reading the actual string or extended l-r input */
|
||||||
bool REGEX_IS024_VirtualMachine::haveSurvivors() {
|
bool VirtualMachine::haveSurvivors() {
|
||||||
return isUsable() && (reveal->READ_halted_stack_new_first.non_empty());
|
return isUsable() && (!reveal->READ_halted_stack_new_first.empty() || !reveal->READ_halted_stack_new_second.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool REGEX_IS024_VirtualMachine::isMatched() {
|
bool VirtualMachine::isMatched() {
|
||||||
return isUsable() && static_cast<bool>((reveal->matched_thread.slot_occupation_status & SLOT_OCCUPIED));
|
return isUsable() && static_cast<bool>((reveal->matched_thread.slot_occupation_status & SLOT_OCCUPIED));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<REGEX_IS024_CAEvent> REGEX_IS024_VirtualMachine::getMatchedThreadCABranchReverse() {
|
std::vector<CAEvent> VirtualMachine::getMatchedThreadCABranchReverse() {
|
||||||
if (!isMatched())
|
if (!isMatched())
|
||||||
return {};
|
return {};
|
||||||
std::vector<REGEX_IS024_CAEvent> res;
|
std::vector<CAEvent> res;
|
||||||
REGEX024_CollectionArrayNode* cur = reveal->matched_thread.CAHptr;
|
CollectionArrayNode* cur = reveal->matched_thread.CAHptr;
|
||||||
while (cur != NULL){
|
while (cur != NULL){
|
||||||
res.push_back({cur->key, cur->value});
|
res.push_back({cur->key, cur->value});
|
||||||
cur = cur->prev;
|
cur = cur->prev;
|
||||||
}
|
}
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t REGEX_IS024_VirtualMachine::getMatchedThreadSAValue(uint16_t key) {
|
uint64_t VirtualMachine::getMatchedThreadSAValue(uint16_t key) {
|
||||||
if (key >= getSelectionArrayLength())
|
if (key >= getSelectionArrayLength())
|
||||||
return 0;
|
return 0;
|
||||||
if (!isMatched())
|
if (!isMatched())
|
||||||
return 0;
|
return 0;
|
||||||
return reveal->matched_thread.SAptr ? reveal->matched_thread.SAptr[key + 1] : 0;
|
return reveal->matched_thread.SAptr ? reveal->matched_thread.SAptr[key + 1] : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_VirtualMachine::addNewMatchingThread() {
|
error_code_t VirtualMachine::addNewMatchingThread() {
|
||||||
if (!isUsable())
|
if (!isUsable())
|
||||||
exitf("unusable\n");
|
throw std::runtime_error("unusable");
|
||||||
// if (started_first_thread && !isAllowMultistart())
|
|
||||||
// exitf("Multistart is forbidden, bad usage of program\n");
|
|
||||||
return reveal->startThread();
|
return reveal->startThread();
|
||||||
}
|
}
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_VirtualMachine::extendedFeedCharacter(uint64_t input) {
|
error_code_t VirtualMachine::extendedFeedCharacter(uint64_t input) {
|
||||||
if (!isUsable())
|
if (!isUsable())
|
||||||
exitf("unusable\n");
|
throw std::runtime_error("unusable\n");
|
||||||
return reveal->extendedFeedCharacter(input);
|
return reveal->extendedFeedCharacter(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
regex024_error_code REGEX_IS024_VirtualMachine::feedCharacter(uint64_t input, uint64_t bytesResembled) {
|
error_code_t VirtualMachine::feedCharacter(uint64_t input, uint64_t bytesResembled) {
|
||||||
if (!isUsable())
|
if (!isUsable())
|
||||||
exitf("unusable\n");
|
throw std::runtime_error("unusable\n");
|
||||||
return reveal->feedCharacter(input, bytesResembled);
|
return reveal->feedCharacter(input, bytesResembled);
|
||||||
|
}
|
||||||
}
|
}
|
@ -6,41 +6,42 @@
|
|||||||
#include <libregexis024vm/vm_errno.h>
|
#include <libregexis024vm/vm_errno.h>
|
||||||
#include <libregexis024vm/vm_opcodes_types.h>
|
#include <libregexis024vm/vm_opcodes_types.h>
|
||||||
|
|
||||||
struct REGEX_IS024_CAEvent{
|
namespace regexis024 {
|
||||||
regex_tai_t key;
|
struct CAEvent{
|
||||||
|
tai_t key;
|
||||||
uint64_t value;
|
uint64_t value;
|
||||||
bool operator==(const REGEX_IS024_CAEvent& other) const;
|
bool operator==(const CAEvent& other) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
class REGEX_IS024_VirtualMachine{
|
struct VirtualMachine{
|
||||||
public:
|
VirtualMachine(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, uint16_t saLenLimit,
|
||||||
REGEX_IS024_VirtualMachine(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, uint16_t saLenLimit,
|
|
||||||
uint32_t readSsLimit, uint32_t forkSsLimit, uint64_t timeTickLimit);
|
uint32_t readSsLimit, uint32_t forkSsLimit, uint64_t timeTickLimit);
|
||||||
|
|
||||||
REGEX_IS024_VirtualMachine(const REGEX_IS024_VirtualMachine& ) = delete;
|
VirtualMachine(const VirtualMachine& ) = delete;
|
||||||
REGEX_IS024_VirtualMachine& operator=(const REGEX_IS024_VirtualMachine&) = delete;
|
VirtualMachine& operator=(const VirtualMachine&) = delete;
|
||||||
|
|
||||||
regex024_error_code initialize();
|
error_code_t initialize();
|
||||||
bool isInitialized();
|
bool isInitialized();
|
||||||
bool isUsable();
|
bool isUsable();
|
||||||
virtual ~REGEX_IS024_VirtualMachine();
|
virtual ~VirtualMachine();
|
||||||
regex_tai_t getSelectionArrayLength();
|
tai_t getSelectionArrayLength();
|
||||||
bool isAllowMultistart();
|
bool isAllowMultistart();
|
||||||
uint8_t getInputLeftExtensionSize();
|
uint8_t getInputLeftExtensionSize();
|
||||||
uint8_t getInputRightExtensionSize();
|
uint8_t getInputRightExtensionSize();
|
||||||
regex024_error_code getErrno();
|
error_code_t getErrno();
|
||||||
bool haveSurvivors();
|
bool haveSurvivors();
|
||||||
bool isMatched();
|
bool isMatched();
|
||||||
std::vector<REGEX_IS024_CAEvent> getMatchedThreadCABranchReverse();
|
std::vector<CAEvent> getMatchedThreadCABranchReverse();
|
||||||
uint64_t getMatchedThreadSAValue(uint16_t key);
|
uint64_t getMatchedThreadSAValue(uint16_t key);
|
||||||
|
|
||||||
regex024_error_code addNewMatchingThread();
|
error_code_t addNewMatchingThread();
|
||||||
regex024_error_code extendedFeedCharacter(uint64_t input);
|
error_code_t extendedFeedCharacter(uint64_t input);
|
||||||
regex024_error_code feedCharacter(uint64_t input, uint64_t bytesResembled);
|
error_code_t feedCharacter(uint64_t input, uint64_t bytesResembled);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool gave_SOF = false;
|
bool gave_SOF = false;
|
||||||
void* opaque;
|
void* opaque;
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_LIBREGEXIS024VM_INTERFACE_H
|
#endif //LIBREGEXIS024_LIBREGEXIS024VM_INTERFACE_H
|
||||||
|
@ -10,35 +10,28 @@
|
|||||||
#error "Big endian is currently unsupported"
|
#error "Big endian is currently unsupported"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void exitf(const char *fmt, ...) {
|
namespace regexis024 {
|
||||||
va_list va;
|
int utf8_retrieve_size(char firstByte) {
|
||||||
va_start(va, fmt);
|
if (!((uint8_t)firstByte & 0b10000000))
|
||||||
vfprintf(stderr, fmt, va);
|
|
||||||
va_end(va);
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int utf8_retrieve_size(uint8_t firstByte) {
|
|
||||||
if (!(firstByte & 0b10000000))
|
|
||||||
return 1;
|
return 1;
|
||||||
uint8_t a = 0b11000000;
|
uint8_t a = 0b11000000;
|
||||||
uint8_t b = 0b00100000;
|
uint8_t b = 0b00100000;
|
||||||
for (int i = 2; i <= 4; i++){
|
for (int i = 2; i <= 4; i++){
|
||||||
if ((firstByte & (a | b)) == a)
|
if (((uint8_t)firstByte & (a | b)) == a)
|
||||||
return i;
|
return i;
|
||||||
a |= b;
|
a |= b;
|
||||||
b >>= 1;
|
b >>= 1;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t utf8_retrieve_character(int sz, size_t pos, const uint8_t *string) {
|
int32_t utf8_retrieve_character(int sz, size_t pos, const char *string) {
|
||||||
if (sz == 1)
|
if (sz == 1)
|
||||||
return string[pos];
|
return (uint8_t)string[pos];
|
||||||
uint32_t v = string[pos] & (0b01111111 >> sz);
|
uint32_t v = (uint8_t)string[pos] & (0b01111111 >> sz);
|
||||||
pos++;
|
pos++;
|
||||||
for (int i = 1; i < sz; i++){
|
for (int i = 1; i < sz; i++){
|
||||||
uint32_t th = string[pos];
|
uint32_t th = (uint8_t)string[pos];
|
||||||
if ((th & 0b11000000) != 0b10000000)
|
if ((th & 0b11000000) != 0b10000000)
|
||||||
return -1;
|
return -1;
|
||||||
v <<= 6;
|
v <<= 6;
|
||||||
@ -47,18 +40,16 @@ int32_t utf8_retrieve_character(int sz, size_t pos, const uint8_t *string) {
|
|||||||
}
|
}
|
||||||
assert(v <= INT32_MAX);
|
assert(v <= INT32_MAX);
|
||||||
return static_cast<int32_t>(v);
|
return static_cast<int32_t>(v);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define AAAAAA {cp = -1; return;}
|
void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) {
|
||||||
|
if (pos >= string_size) {cp = -1; return;}
|
||||||
void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const uint8_t *string, size_t string_size) {
|
|
||||||
if (pos >= string_size) AAAAAA
|
|
||||||
adj = utf8_retrieve_size(string[pos]);
|
adj = utf8_retrieve_size(string[pos]);
|
||||||
if (adj < 0 || pos + adj > string_size) AAAAAA
|
if (adj < 0 || pos + adj > string_size) {cp = -1; return;}
|
||||||
if ((cp = utf8_retrieve_character(adj, pos, string)) < 0) AAAAAA
|
if ((cp = utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1;}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool is_string_in_stringset(const char *strSample, const char **strSet) {
|
bool is_string_in_stringset(const char *strSample, const char **strSet) {
|
||||||
const char** cmpSubject = strSet;
|
const char** cmpSubject = strSet;
|
||||||
while ((*cmpSubject) != NULL){
|
while ((*cmpSubject) != NULL){
|
||||||
if (strcmp(strSample, *cmpSubject) == 0)
|
if (strcmp(strSample, *cmpSubject) == 0)
|
||||||
@ -66,4 +57,5 @@ bool is_string_in_stringset(const char *strSample, const char **strSet) {
|
|||||||
cmpSubject++; // += 8 bytes
|
cmpSubject++; // += 8 bytes
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
@ -4,18 +4,19 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
void exitf(const char* fmt, ...);
|
// todo: move this file out from my eyes.
|
||||||
|
namespace regexis024 {
|
||||||
|
/* 1, 2, 3, 4 on success; -1 on error */
|
||||||
|
int utf8_retrieve_size(char firstByte);
|
||||||
|
|
||||||
/* 1, 2, 3, 4 on success; -1 on error */
|
/* sz is a positive value returned by utf8_retrieve_size. Returns negative on error */
|
||||||
int utf8_retrieve_size(uint8_t firstByte);
|
int32_t utf8_retrieve_character(int sz, size_t pos, const char* string);
|
||||||
|
|
||||||
/* sz is a positive value returned by utf8_retrieve_size. Returns negative on error */
|
/* cp is negative on error. adj is the size of letter in bytes. Can be used to adjust pos.
|
||||||
int32_t utf8_retrieve_character(int sz, size_t pos, const uint8_t* string);
|
|
||||||
|
|
||||||
/* cp is negative on error. adj is the size of letter in bytes. Can be used to adjust pos.
|
|
||||||
* All safety checks will be performed */
|
* All safety checks will be performed */
|
||||||
void utf8_string_iterat(int32_t& cp, size_t& adj, size_t pos, const uint8_t* string, size_t string_size);
|
void utf8_string_iterat(int32_t& cp, size_t& adj, size_t pos, const char* string, size_t string_size);
|
||||||
|
|
||||||
bool is_string_in_stringset(const char* strSample, const char* strSet[]);
|
bool is_string_in_stringset(const char* strSample, const char* strSet[]);
|
||||||
|
}
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_UTILS_H
|
#endif //LIBREGEXIS024_UTILS_H
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
#include <libregexis024vm/vm_errno.h>
|
#include <libregexis024vm/vm_errno.h>
|
||||||
|
|
||||||
const char *regex024_error_code_tostr(regex024_error_code x) {
|
namespace regexis024 {
|
||||||
#define rcase(name) case regex024_error_codes::name: return #name;
|
const char *error_code_to_str(error_code_t x) {
|
||||||
|
#define rcase(name) case error_codes::name: return #name;
|
||||||
switch (x) {
|
switch (x) {
|
||||||
rcase(stable)
|
rcase(stable)
|
||||||
rcase(ca_tree_limit_violation)
|
rcase(ca_tree_limit_violation)
|
||||||
@ -23,4 +24,5 @@ const char *regex024_error_code_tostr(regex024_error_code x) {
|
|||||||
default:
|
default:
|
||||||
return "unknown_error_code";
|
return "unknown_error_code";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -3,7 +3,8 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
namespace regex024_error_codes {
|
namespace regexis024 {
|
||||||
|
namespace error_codes {
|
||||||
enum regex024_error_code_I: int {
|
enum regex024_error_code_I: int {
|
||||||
stable = 0,
|
stable = 0,
|
||||||
ca_tree_limit_violation = -1,
|
ca_tree_limit_violation = -1,
|
||||||
@ -36,10 +37,11 @@ namespace regex024_error_codes {
|
|||||||
/* O_o */
|
/* O_o */
|
||||||
bad_alloc = -17,
|
bad_alloc = -17,
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef error_codes::regex024_error_code_I error_code_t;
|
||||||
|
|
||||||
|
const char* error_code_to_str(error_code_t x);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef regex024_error_codes::regex024_error_code_I regex024_error_code;
|
|
||||||
|
|
||||||
const char* regex024_error_code_tostr(regex024_error_code x);
|
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_VM_ERRNO_H
|
#endif //LIBREGEXIS024_VM_ERRNO_H
|
||||||
|
@ -3,7 +3,8 @@
|
|||||||
|
|
||||||
#include <libregexis024vm/vm_opcodes_types.h>
|
#include <libregexis024vm/vm_opcodes_types.h>
|
||||||
|
|
||||||
namespace regex024_opcodes {
|
namespace regexis024 {
|
||||||
|
namespace opcodes {
|
||||||
enum regex024_opcode_I: uint8_t{
|
enum regex024_opcode_I: uint8_t{
|
||||||
/* READ <Settlement ID> */
|
/* READ <Settlement ID> */
|
||||||
READ = 0,
|
READ = 0,
|
||||||
@ -72,28 +73,27 @@ namespace regex024_opcodes {
|
|||||||
THROW = 35,
|
THROW = 35,
|
||||||
regex024_opcode_greaterMax = 36
|
regex024_opcode_greaterMax = 36
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef opcodes::regex024_opcode_I opcode_t;
|
||||||
|
|
||||||
|
const char* opcode_to_str(opcode_t x);
|
||||||
|
|
||||||
|
constexpr uint64_t BYTECODE_INSTRUCTION_SZ = 1;
|
||||||
|
constexpr uint64_t BYTECODE_SSLOT_ID_SZ = 4;
|
||||||
|
constexpr uint64_t BYTECODE_TRACK_ARRAY_INDEX_ID_SZ = 2;
|
||||||
|
constexpr uint64_t BYTECODE_NEAR_POINTER_SZ = 8;
|
||||||
|
|
||||||
|
bool vmprog_check_inboundness(near_ptr_t prgSize, near_ptr_t IP, near_ptr_t region);
|
||||||
|
|
||||||
|
uint8_t vmprog_extract_b(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
|
uint16_t vmprog_extract_w(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
|
uint32_t vmprog_extract_dw(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
|
uint64_t vmprog_extract_qw(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
|
|
||||||
|
uint8_t vmprog_extract_instruction(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
|
sslot_id_t vmprog_extract_sslot_id(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
|
near_ptr_t vmprog_extract_near_pointer(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
|
tai_t vmprog_extrack_track_array_index(near_ptr_t* IPptr, const uint8_t* prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef regex024_opcodes::regex024_opcode_I regex024_opcode;
|
|
||||||
|
|
||||||
const char* regex024_opcode_tostr(regex024_opcode x);
|
|
||||||
|
|
||||||
|
|
||||||
constexpr uint64_t REGEX024_BYTECODE_INSTRUCTION_SZ = 1;
|
|
||||||
constexpr uint64_t REGEX024_BYTECODE_SSLOT_ID_SZ = 4;
|
|
||||||
constexpr uint64_t REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ = 2;
|
|
||||||
constexpr uint64_t REGEX024_BYTECODE_NEAR_POINTER_SZ = 8;
|
|
||||||
|
|
||||||
bool vmprog_check_inboundness(regex_near_ptr_t prgSize, regex_near_ptr_t IP, regex_near_ptr_t region);
|
|
||||||
|
|
||||||
uint8_t vmprog_extract_b(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
uint16_t vmprog_extract_w(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
uint32_t vmprog_extract_dw(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
uint64_t vmprog_extract_qw(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
|
|
||||||
uint8_t vmprog_extract_instruction(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
regex_sslot_id_t vmprog_extract_sslot_id(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
regex_near_ptr_t vmprog_extract_near_pointer(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
regex_tai_t vmprog_extrack_track_array_index(regex_near_ptr_t* IPptr, const uint8_t* prg);
|
|
||||||
|
|
||||||
#endif //LIBREGEXIS024_VM_OPCODES_H
|
#endif //LIBREGEXIS024_VM_OPCODES_H
|
||||||
|
@ -1,47 +1,54 @@
|
|||||||
#include <libregexis024vm/vm_opcodes.h>
|
#include <libregexis024vm/vm_opcodes.h>
|
||||||
|
|
||||||
#ifndef __ORDER_LITTLE_ENDIAN__
|
namespace regexis024 {
|
||||||
#error "Big endian is currently unsupported"
|
bool vmprog_check_inboundness(near_ptr_t prgSz, near_ptr_t IP, near_ptr_t region) {
|
||||||
#endif
|
|
||||||
|
|
||||||
bool vmprog_check_inboundness(regex_near_ptr_t prgSz, regex_near_ptr_t IP, regex_near_ptr_t region) {
|
|
||||||
return IP + region <= prgSz;
|
return IP + region <= prgSz;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t vmprog_extract_b(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
uint8_t vmprog_extract_b(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
return prg[(*IPptr)++];
|
return prg[(*IPptr)++];
|
||||||
}
|
}
|
||||||
|
|
||||||
uint16_t vmprog_extract_w(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
uint16_t vmprog_extract_w(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
uint16_t answer = *(uint16_t*)(&prg[*IPptr]);
|
uint16_t answer = 0;
|
||||||
*IPptr += 2;
|
(*IPptr) += 2;
|
||||||
|
for (int i = 1; i < 3; i++) {
|
||||||
|
answer <<= 8; answer |= prg[(*IPptr) - i];
|
||||||
|
}
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t vmprog_extract_dw(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
uint32_t vmprog_extract_dw(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
uint32_t answer = *(uint32_t *)(&prg[*IPptr]);
|
uint32_t answer = 0;
|
||||||
*IPptr += 4;
|
(*IPptr) += 4;
|
||||||
|
for (int i = 1; i < 5; i++) {
|
||||||
|
answer <<= 8; answer |= prg[(*IPptr) - i];
|
||||||
|
}
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t vmprog_extract_qw(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
uint64_t vmprog_extract_qw(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
uint64_t answer = *(uint64_t *)(&prg[*IPptr]);
|
uint64_t answer = 0;
|
||||||
*IPptr += 8;
|
(*IPptr) += 8;
|
||||||
|
for (int i = 1; i < 9; i++) {
|
||||||
|
answer <<= 8; answer |= prg[(*IPptr) - i];
|
||||||
|
}
|
||||||
return answer;
|
return answer;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t vmprog_extract_instruction(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
uint8_t vmprog_extract_instruction(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
return vmprog_extract_b(IPptr, prg);
|
return vmprog_extract_b(IPptr, prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
regex_sslot_id_t vmprog_extract_sslot_id(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
sslot_id_t vmprog_extract_sslot_id(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
return vmprog_extract_dw(IPptr, prg);
|
return vmprog_extract_dw(IPptr, prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
regex_near_ptr_t vmprog_extract_near_pointer(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
near_ptr_t vmprog_extract_near_pointer(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
return vmprog_extract_qw(IPptr, prg);
|
return vmprog_extract_qw(IPptr, prg);
|
||||||
}
|
}
|
||||||
|
|
||||||
regex_tai_t vmprog_extrack_track_array_index(regex_near_ptr_t *IPptr, const uint8_t *prg) {
|
tai_t vmprog_extrack_track_array_index(near_ptr_t *IPptr, const uint8_t *prg) {
|
||||||
return vmprog_extract_w(IPptr, prg);
|
return vmprog_extract_w(IPptr, prg);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,9 +3,10 @@
|
|||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
typedef uint32_t regex_sslot_id_t;
|
namespace regexis024 {
|
||||||
typedef uint64_t regex_near_ptr_t;
|
typedef uint32_t sslot_id_t;
|
||||||
typedef uint16_t regex_tai_t;
|
typedef uint64_t near_ptr_t;
|
||||||
|
typedef uint16_t tai_t;
|
||||||
|
}
|
||||||
|
|
||||||
#endif //VM_OPCODES_TYPES_H
|
#endif //VM_OPCODES_TYPES_H
|
||||||
|
Loading…
Reference in New Issue
Block a user