From 12a7db912f69ec92defc3fcfe0836a96a8f607fa Mon Sep 17 00:00:00 2001 From: Andreev Gregory Date: Wed, 31 Jul 2024 18:55:43 +0300 Subject: [PATCH] debloatified my virtual machine a little bit, fixed stringmatching bug --- .../vm/libregexis024vm_debug.cpp | 7 ++-- .../graph_to_bytecode/core.cpp | 28 +++----------- src/libregexis024fa/graph_to_bytecode/core.h | 2 +- .../graph_to_bytecode/fa_compiler.cpp | 23 +++--------- .../graph_to_bytecode/writing_commands.cpp | 8 +--- .../graph_to_bytecode/writing_commands.h | 3 +- src/libregexis024test/test4.cpp | 37 ++++++------------- src/libregexis024tools/stringmatching.cpp | 23 ++---------- .../instruction_implementation.cpp | 37 ++----------------- .../libregex024opcodes_stringification.cpp | 2 - src/libregexis024vm/libregexis024vm.h | 7 +--- .../libregexis024vm_context.cpp | 17 +-------- .../libregexis024vm_interface.cpp | 22 +---------- .../libregexis024vm_interface.h | 4 -- src/libregexis024vm/utils.cpp | 4 -- src/libregexis024vm/vm_opcodes.h | 4 -- 16 files changed, 40 insertions(+), 188 deletions(-) diff --git a/src/debugging_regexis024/vm/libregexis024vm_debug.cpp b/src/debugging_regexis024/vm/libregexis024vm_debug.cpp index cac2b54..1ee2494 100644 --- a/src/debugging_regexis024/vm/libregexis024vm_debug.cpp +++ b/src/debugging_regexis024/vm/libregexis024vm_debug.cpp @@ -36,10 +36,9 @@ namespace regexis024 { FORK_slots += (stat & SLOT_OCCUPIED) ? "O" : "x"; } char buf[4096]; - snprintf(buf, 4096, "READ_slots: %s ; FORK_slots: %s ; READ_stack_new_main: %s ; " - "READ_stack_new_second: %s ; READ_stack_old: %s ; FORK_stack: %s", - READ_slots.c_str(), FORK_slots.c_str(), stack_to_str(ctx.READ_halted_stack_new_first).c_str(), - stack_to_str(ctx.READ_halted_stack_new_second).c_str(), + snprintf(buf, 4096, "READ_slots: %s ; FORK_slots: %s ; READ_stack_new: %s ; " + "READ_stack_old: %s ; FORK_stack: %s", + READ_slots.c_str(), FORK_slots.c_str(), stack_to_str(ctx.READ_halted_stack_new).c_str(), stack_to_str(ctx.READ_halted_stack_old).c_str(), stack_to_str(ctx.FORK_halted_stack).c_str()); return buf; } diff --git a/src/libregexis024fa/graph_to_bytecode/core.cpp b/src/libregexis024fa/graph_to_bytecode/core.cpp index 38c97fa..dc189bc 100644 --- a/src/libregexis024fa/graph_to_bytecode/core.cpp +++ b/src/libregexis024fa/graph_to_bytecode/core.cpp @@ -8,17 +8,13 @@ namespace regexis024 { #define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; } void compilation_core(std::vector& result, FA_Container& fa, explicit_bookmarks& bookmark_manager, - size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error) + size_t& read_ss_ns, size_t& fork_ss_ns, int& error) { bookmark_id_t node_start_bm_offset = bookmark_manager.new_range_of_bookmarks(fa.all.size()); - std::vector not_yet_dedicated_second_read_ns_ssids; - first_read_ns = 0; - second_read_ns = 0; + read_ss_ns = 0; fork_ss_ns = 0; assert(fa.start); std::vector todo = {fa.start}; - // std::vector promised(fa.all.size(), false); - // promised[fa.start->nodeId] = true; auto nodesBookmark = [&](FA_Node* node) -> bookmark_id_t { assert(node); @@ -29,15 +25,6 @@ namespace regexis024 { todo.push_back(node); }; - auto reading_head = [&](bool is_in_second_ns) { - if (is_in_second_ns) { - cmd_READ_second_ns(result, not_yet_dedicated_second_read_ns_ssids); - second_read_ns++; - } else { - cmd_READ_first_ns(result, first_read_ns++); - } - }; - while (!todo.empty()) { FA_Node* node = todo.back(); todo.pop_back(); if (bookmark_manager.has_landed(nodesBookmark(node))) { @@ -55,8 +42,8 @@ namespace regexis024 { break; } else if (node->type == one_char_read) { FA_NodeOfOneCharRead* ocr = dynamic_cast(node); - nonthrowing_assert(first_read_ns + second_read_ns < UINT32_MAX); - reading_head(ocr->second_ns); + nonthrowing_assert(read_ss_ns < UINT32_MAX); + cmd_READ(result, read_ss_ns++); write_filter(result, bookmark_manager, {ocr->filter},{nodesBookmark(ocr->nxt_node)}); node = ocr->nxt_node; } else if (node->type == look_one_behind) { @@ -92,10 +79,10 @@ namespace regexis024 { node = tamh->nxt_node; } else if (node->type == det_char_crossroads) { FA_NodeOfDetCharCrossroads* dcc = dynamic_cast(node); - nonthrowing_assert(first_read_ns + second_read_ns < UINT32_MAX); + nonthrowing_assert(read_ss_ns < UINT32_MAX); if (dcc->matching) cmd_MATCH(result); - reading_head(dcc->second_ns); + cmd_READ(result, read_ss_ns++); std::vector codesets; std::vector branches; for (const DFA_CrossroadPath& p: dcc->crossroads) { @@ -111,8 +98,5 @@ namespace regexis024 { assert(false); } } - for (size_t j = 0; j < not_yet_dedicated_second_read_ns_ssids.size(); j++) { - belated_sslot_id(result, not_yet_dedicated_second_read_ns_ssids[j], j + first_read_ns); - } } } diff --git a/src/libregexis024fa/graph_to_bytecode/core.h b/src/libregexis024fa/graph_to_bytecode/core.h index c1fb732..c9f328d 100644 --- a/src/libregexis024fa/graph_to_bytecode/core.h +++ b/src/libregexis024fa/graph_to_bytecode/core.h @@ -6,7 +6,7 @@ namespace regexis024 { void compilation_core(std::vector& result, FA_Container& fa, explicit_bookmarks& bookmark_manager, - size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error); + size_t& read_ss_ns, size_t& fork_ss_ns, int& error); } #endif diff --git a/src/libregexis024fa/graph_to_bytecode/fa_compiler.cpp b/src/libregexis024fa/graph_to_bytecode/fa_compiler.cpp index 8584491..2e22615 100644 --- a/src/libregexis024fa/graph_to_bytecode/fa_compiler.cpp +++ b/src/libregexis024fa/graph_to_bytecode/fa_compiler.cpp @@ -28,15 +28,11 @@ namespace regexis024 { struct belate_initialization_parameters { size_t todo_pos_read_ss_n; size_t todo_pos_fork_ss_n; - size_t todo_pos_second_ns_size; - void complete_it(std::vector& result, - sslot_id_t first_read_ns, sslot_id_t second_read_ns, sslot_id_t fork_ss_ns) + void complete_it(std::vector& result, sslot_id_t read_ss_ns, sslot_id_t fork_ss_ns) { - assert((uint64_t)first_read_ns + (uint64_t)second_read_ns <= UINT32_MAX); - belated_sslot_id(result, todo_pos_read_ss_n , first_read_ns + second_read_ns); + belated_sslot_id(result, todo_pos_read_ss_n , read_ss_ns); belated_sslot_id(result, todo_pos_fork_ss_n, fork_ss_ns); - belated_sslot_id(result, todo_pos_second_ns_size, second_read_ns); } }; @@ -59,15 +55,6 @@ namespace regexis024 { write_byte(result, opcodes::PARAM_SELARR_LEN); write_tai(result, selarr_size); - write_byte(result, opcodes::MSG_MULTISTART_ALLOWED); - write_byte(result, 1); - - write_byte(result, opcodes::MSG_FED_INPUT_EXTENDED); - write_byte(result, info1.fed_chars_extend_one_left ? 1 : 0); - write_byte(result, info1.fed_chars_extend_one_right ? 1 : 0); - todo.todo_pos_second_ns_size = result.size(); - write_sslot_id(result, 0); // Belate - write_byte(result, opcodes::INIT); return todo; } @@ -94,11 +81,11 @@ namespace regexis024 { belate_initialization_parameters init_param_todo = write_some_normal_initialization(result, selarr_size, info1); - size_t first_read_ns, second_read_ns, fork_ss_ns; - compilation_core(result, fa, bookmark_manager, first_read_ns, second_read_ns, fork_ss_ns, error); + size_t read_ss_ns, fork_ss_ns; + compilation_core(result, fa, bookmark_manager, read_ss_ns, fork_ss_ns, error); if (error < 0) return; - init_param_todo.complete_it(result, first_read_ns, second_read_ns, fork_ss_ns); + init_param_todo.complete_it(result, read_ss_ns, fork_ss_ns); bookmark_manager.finish(result); } } diff --git a/src/libregexis024fa/graph_to_bytecode/writing_commands.cpp b/src/libregexis024fa/graph_to_bytecode/writing_commands.cpp index b57ff8f..5bb337c 100644 --- a/src/libregexis024fa/graph_to_bytecode/writing_commands.cpp +++ b/src/libregexis024fa/graph_to_bytecode/writing_commands.cpp @@ -56,7 +56,7 @@ namespace regexis024 { write_byte(result, opcodes::MATCH); } - void cmd_READ_first_ns(std::vector& result, size_t slot) { + void cmd_READ(std::vector& result, size_t slot) { assert(slot <= UINT32_MAX); write_byte(result, opcodes::READ); write_sslot_id(result, slot); @@ -68,10 +68,4 @@ namespace regexis024 { write_sslot_id(result, slot); bookmark_manager.write_unresolved_reference(result, dest); } - - void cmd_READ_second_ns(std::vector& result, std::vector& belate_second_read_ns_slot_args) { - write_byte(result, opcodes::READ); - belate_second_read_ns_slot_args.push_back(result.size()); - write_sslot_id(result, 0); - } } \ No newline at end of file diff --git a/src/libregexis024fa/graph_to_bytecode/writing_commands.h b/src/libregexis024fa/graph_to_bytecode/writing_commands.h index 64efac5..ccf192b 100644 --- a/src/libregexis024fa/graph_to_bytecode/writing_commands.h +++ b/src/libregexis024fa/graph_to_bytecode/writing_commands.h @@ -14,8 +14,7 @@ namespace regexis024 { void cmd_DIE(std::vector& result); void cmd_MATCH(std::vector& result); - void cmd_READ_first_ns(std::vector& result, size_t slot); - void cmd_READ_second_ns(std::vector& result, std::vector& belate_second_read_ns_slot_args); + void cmd_READ(std::vector& result, size_t slot); void cmd_FORK(std::vector &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest); } diff --git a/src/libregexis024test/test4.cpp b/src/libregexis024test/test4.cpp index cca5335..222ed00 100644 --- a/src/libregexis024test/test4.cpp +++ b/src/libregexis024test/test4.cpp @@ -21,24 +21,19 @@ void test(const string& input, const string& pattern, const MatchInfo& right_ans } int main() { - test("11aa", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {})); - test("aa11", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {})); - test("a111", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo()); - test("aa11", "^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo()); - test("1a11", "^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo()); - test("11aa", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {})); - test("aa11", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {})); - test("a111", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo()); - test("aa11", "!dfa;^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo()); - test("1a11", "!dfa;^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo()); + test("bababbaa", "[ab]*", MatchInfo({}, {})); + test("bababbaa", "!dfa;[ab]*", MatchInfo({}, {})); + test("d3", "[abc]3", MatchInfo()); + test("a3", "[abc]3", MatchInfo({}, {})); + test("", "", MatchInfo({}, {})); + test("a", "a", MatchInfo({}, {})); + test("abba", "!select{M{max}}a#M(b*)a", MatchInfo({}, {1, 3})); + test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n", - "!dfa;!select{fieldname{ca}fieldbody{ca}}^^^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n$$$", + "!dfa;!select{fieldname{ca}fieldbody{ca}}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28})); test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n", - "!dfa;!select{fieldname{ca}fieldbody{ca}}^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+\\>):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", - MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28})); - test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n", - "!dfa;!select{fieldname{ca}fieldbody{ca}}^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", + "!dfa;!select{fieldname{ca}fieldbody{ca}}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28})); test("LINE\r\nFirst:Second\r\n\r\n", "!select{fieldname{ca}}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", @@ -53,12 +48,11 @@ int main() { test("абвввввввгд", "абв*г+д", MatchInfo({}, {})); test("абвввввввд", "абв*г+д", MatchInfo()); test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n", - "!dfa;^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", + "!dfa;LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {})); test("LINE\r\nFirst:Second\r\n\r\n", "LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n", MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}}, {})); - test("C111111111111", "C\\>1*", MatchInfo({}, {})); test("GET / HTTP/1.1\r\nHost: example.com\r\nAAAAA: a\rfaafafdf\r\n\r\n", "!dfa;(GET|POST) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n", MatchInfo()); @@ -66,24 +60,17 @@ int main() { test("\n3432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo()); test("3:::;;432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {})); test("3:::;;432 \r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {})); - test("GET / HTTP/0.9\r\nHost: bibur at\r\nContent-type: html\r\n\r\n", - "^(GET|POST\\>) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n", - MatchInfo({}, {})); test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {})); - test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2})); + test("abc", "!selarr{boba{ca}}a#boba(b)c", MatchInfo({{0, 1}, {1, 2}}, {1, 2})); for (int i = 0; i < 64; i++) { std::string T; T += ('a' + (i >> 3)); T+= ('a' + (i % 8)); test(T, "(((a|b)|(c|d))|((e|f)|(g|h)))!r{2}", MatchInfo({}, {})); } - test("abba", "!select{M{max}}a#M(b*)a", MatchInfo({}, {1, 3})); test("abba", "!dfa;!select{M{max}}a#M(b*)a", MatchInfo({}, {1, 3})); test("abba", "!select{M{max}}a#M(!any;*)a", MatchInfo({}, {1, 3})); test("abba", "!dfa;!select{M{max}}a#M(!any;*)a", MatchInfo({}, {1, 3})); - test("", "", MatchInfo({}, {})); - test("a", "a", MatchInfo({}, {})); - test("a3", "[abc]3", MatchInfo({}, {})); test("b3", "[abc]3", MatchInfo({}, {})); test("c3", "[abc]3", MatchInfo({}, {})); test("aa", "aa", MatchInfo({}, {})); diff --git a/src/libregexis024tools/stringmatching.cpp b/src/libregexis024tools/stringmatching.cpp index 9ae81a1..0a5b648 100644 --- a/src/libregexis024tools/stringmatching.cpp +++ b/src/libregexis024tools/stringmatching.cpp @@ -46,22 +46,11 @@ namespace regexis024 { retStatus = "Virtual machine initialization. " + getVMErrString(); return -1; } - int left_ext_feed = vm.getInputLeftExtensionSize(); - int right_ext_feed = vm.getInputRightExtensionSize(); - if (left_ext_feed > 1 || right_ext_feed > 1) { - retStatus = "Unnatural extended input request."; - return -1; - } if (vm.addNewMatchingThread() != error_codes::stable) { retStatus = "Virtual machine first kick. " + getVMErrString(); + return -1; } - if (left_ext_feed) { - if (vm.extendedFeedCharacter('\n') != error_codes::stable) { - retStatus = "VM left extended input. " + getVMErrString(); - return -1; - } - } - for (size_t cur_text_pos = 0;cur_text_pos < input.size();) { + for (size_t cur_text_pos = 0; cur_text_pos < input.size();) { int32_t inp_code; size_t adj; utf8_string_iterat(inp_code, adj, cur_text_pos, input.data(), input.size()); @@ -75,12 +64,6 @@ namespace regexis024 { } cur_text_pos += adj; } - if (right_ext_feed) { - if (vm.extendedFeedCharacter('\n') != error_codes::stable) { - retStatus = "VM right extended input. " + getVMErrString(); - return -1; - } - } assert(vm.isUsable()); if (vm.isMatched()) { retMatchInfo.have_match = true; @@ -107,4 +90,4 @@ namespace regexis024 { MatchInfo::MatchInfo(const std::vector &ca_history, const std::vector &sa): ca_history(ca_history), sa(sa), have_match(true) { } -} \ No newline at end of file +} diff --git a/src/libregexis024vm/instruction_implementation.cpp b/src/libregexis024vm/instruction_implementation.cpp index f1cd5bf..0c0fa52 100644 --- a/src/libregexis024vm/instruction_implementation.cpp +++ b/src/libregexis024vm/instruction_implementation.cpp @@ -49,15 +49,6 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } #define sift_mode_check() if (!ctx.sifting_with){ \ ctx.error = error_codes::instruction_not_for_collision_thread; return; } - /* Can append to both read_halted+new stacks of context */ - void read_halted_new_type_stacks_append(VMContext &ctx, sslot_id_t ssid){ - ctx_print_debug(ctx); - if (ssid < ctx.portion_of_FIRST_read_halt_ns){ - ctx.READ_halted_stack_new_first.append(ssid); - } else { - ctx.READ_halted_stack_new_second.append(ssid); - } - } void do_i_read(VMContext &ctx, sslot_id_t ssid) { ctx_print_debug(ctx); @@ -71,13 +62,13 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } } else { swap_old_settled_and_new_active(ctx, other); /* Even though ssid was registed in stack for elders, now young stack should also track this slot */ - read_halted_new_type_stacks_append(ctx, ssid); + ctx.READ_halted_stack_new.append(ssid); } } else { other = ctx.active_thread; other.slot_occupation_status = SLOT_NEW_val; ctx.active_thread.slot_occupation_status = SLOT_EMPTY_val; - read_halted_new_type_stacks_append(ctx, ssid); + ctx.READ_halted_stack_new.append(ssid); ctx.try_to_continue_scheduled(); } } @@ -221,22 +212,6 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx.have_sift_function = false; } - void i_MSG_MULTISTART_ALLOWED(VMContext& ctx){ - ctx_print_debug(ctx); - initialization_phase_check() - check_available_prg(1) - ctx.allows_multistart = (bool)ctx.extract_b(); - } - - void i_MSG_FED_INPUT_EXTENDED(VMContext& ctx){ - ctx_print_debug(ctx); - initialization_phase_check() - check_available_prg(1 + 1 + BYTECODE_SSLOT_ID_SZ) - ctx.fed_input_extends_left = ctx.extract_b(); - ctx.fed_input_extends_right = ctx.extract_b(); - ctx.portion_of_second_read_halt_ns = ctx.extract_sslot_id(); - } - uint64_t get_el_from_selarr(uint64_t* sa, near_ptr_t ind){ return sa ? sa[1UL + ind] : 0; } @@ -427,14 +402,10 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } smitsya(read_sslot_count_limit_violation); if (ctx.fork_slots_number > ctx.FORK_SS_LIMIT) smitsya(fork_sslot_count_limit_violation); - if (ctx.portion_of_second_read_halt_ns > ctx.read_slots_number) - smitsya(fork_sslot_out_of_range); ctx.READ_halted_slots = calloc_slots_array(ctx.read_slots_number); calloc_stack_slots(ctx.READ_halted_stack_old, ctx.read_slots_number); - ctx.portion_of_FIRST_read_halt_ns = ctx.read_slots_number - ctx.portion_of_second_read_halt_ns; - calloc_stack_slots(ctx.READ_halted_stack_new_first, ctx.portion_of_FIRST_read_halt_ns); - calloc_stack_slots(ctx.READ_halted_stack_new_second, ctx.portion_of_second_read_halt_ns); + calloc_stack_slots(ctx.READ_halted_stack_new, ctx.read_slots_number); ctx.FORK_halted_slots = calloc_slots_array(ctx.fork_slots_number); calloc_stack_slots(ctx.FORK_halted_stack, ctx.fork_slots_number); @@ -474,8 +445,6 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } rcase(PARAM_SELARR_LEN) rcase(PARAM_COLSIFTFUNC_SET) rcase(PARAM_COLSIFTFUNC_WIPE) - rcase(MSG_MULTISTART_ALLOWED) - rcase(MSG_FED_INPUT_EXTENDED) rcase(DMOV_RABX_SELARR) rcase(DDIST_RABX_SELARR) rcase(SIFTPRIOR_MIN_RABX) diff --git a/src/libregexis024vm/libregex024opcodes_stringification.cpp b/src/libregexis024vm/libregex024opcodes_stringification.cpp index 0ac869f..455c968 100644 --- a/src/libregexis024vm/libregex024opcodes_stringification.cpp +++ b/src/libregexis024vm/libregex024opcodes_stringification.cpp @@ -28,8 +28,6 @@ namespace regexis024 { rcase(PARAM_SELARR_LEN) rcase(PARAM_COLSIFTFUNC_SET) rcase(PARAM_COLSIFTFUNC_WIPE) - rcase(MSG_MULTISTART_ALLOWED) - rcase(MSG_FED_INPUT_EXTENDED) rcase(DMOV_RABX_SELARR) rcase(DDIST_RABX_SELARR) rcase(SIFTPRIOR_MIN_RABX) diff --git a/src/libregexis024vm/libregexis024vm.h b/src/libregexis024vm/libregexis024vm.h index b66f7dd..bf061bb 100644 --- a/src/libregexis024vm/libregexis024vm.h +++ b/src/libregexis024vm/libregexis024vm.h @@ -101,18 +101,13 @@ namespace regexis024 { bool have_sift_function = false; near_ptr_t sift_function; - bool allows_multistart = false; - uint8_t fed_input_extends_left = 0, fed_input_extends_right = 0; - sslot_id_t portion_of_second_read_halt_ns = 0, portion_of_FIRST_read_halt_ns = 0; - bool initialized = false; near_ptr_t unnatural_started_thread_IP = 1337; error_code_t error = error_codes::stable; Thread* READ_halted_slots; SSID_Stack READ_halted_stack_old; - SSID_Stack READ_halted_stack_new_first; - SSID_Stack READ_halted_stack_new_second; + SSID_Stack READ_halted_stack_new; Thread* FORK_halted_slots; SSID_Stack FORK_halted_stack; diff --git a/src/libregexis024vm/libregexis024vm_context.cpp b/src/libregexis024vm/libregexis024vm_context.cpp index c289e16..78d243a 100644 --- a/src/libregexis024vm/libregexis024vm_context.cpp +++ b/src/libregexis024vm/libregexis024vm_context.cpp @@ -144,8 +144,7 @@ namespace regexis024 { ctx_print_debug(*this); if (matched_thread.slot_occupation_status & SLOT_OCCUPIED) matched_thread.delete_thread(); - emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_second); - fill_empty_old_read_halted_stack(*this, READ_halted_stack_new_first); + fill_empty_old_read_halted_stack(*this, READ_halted_stack_new); INP = input; passed_bytes += corresponding_byte_amount; passed_chars++; @@ -154,22 +153,10 @@ namespace regexis024 { return error; } - error_code_t VMContext::extendedFeedCharacter(uint64_t input) { - ctx_print_debug(*this); - if (matched_thread.slot_occupation_status & SLOT_OCCUPIED) - matched_thread.delete_thread(); - fill_empty_old_read_halted_stack(*this, READ_halted_stack_new_second); - INP = input; - try_to_continue_scheduled(); - kick(*this); - return error; - } - VMContext::~VMContext() { ctx_print_debug(*this); if (initialized){ - emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_first); - emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_second); + emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new); while (!READ_halted_stack_old.empty()){ Thread& thread = READ_halted_slots[READ_halted_stack_old.pop()]; assert(thread.slot_occupation_status & SLOT_OCCUPIED); diff --git a/src/libregexis024vm/libregexis024vm_interface.cpp b/src/libregexis024vm/libregexis024vm_interface.cpp index 0594e0f..f8506f5 100644 --- a/src/libregexis024vm/libregexis024vm_interface.cpp +++ b/src/libregexis024vm/libregexis024vm_interface.cpp @@ -41,25 +41,13 @@ namespace regexis024 { return isUsable() ? reveal->selection_array_len : 0; } - bool VirtualMachine::isAllowMultistart() { - return isUsable() ? reveal->allows_multistart : false; - } - - uint8_t VirtualMachine::getInputLeftExtensionSize() { - return isUsable() ? reveal->fed_input_extends_left : 0; - } - - uint8_t VirtualMachine::getInputRightExtensionSize() { - return isUsable() ? reveal->fed_input_extends_right : 0; - } error_code_t VirtualMachine::getErrno() { return reveal->error; } - /* Stupid kinda function. Checks if somebody is ready to continue reading the actual string or extended l-r input */ bool VirtualMachine::haveSurvivors() { - return isUsable() && (!reveal->READ_halted_stack_new_first.empty() || !reveal->READ_halted_stack_new_second.empty()); + return isUsable() && (!reveal->READ_halted_stack_new.empty()); } bool VirtualMachine::isMatched() { @@ -92,15 +80,9 @@ namespace regexis024 { return reveal->startThread(); } - error_code_t VirtualMachine::extendedFeedCharacter(uint64_t input) { - if (!isUsable()) - throw std::runtime_error("unusable\n"); - return reveal->extendedFeedCharacter(input); - } - error_code_t VirtualMachine::feedCharacter(uint64_t input, uint64_t bytesResembled) { if (!isUsable()) throw std::runtime_error("unusable\n"); return reveal->feedCharacter(input, bytesResembled); } -} \ No newline at end of file +} diff --git a/src/libregexis024vm/libregexis024vm_interface.h b/src/libregexis024vm/libregexis024vm_interface.h index b8bbf9a..fc6a510 100644 --- a/src/libregexis024vm/libregexis024vm_interface.h +++ b/src/libregexis024vm/libregexis024vm_interface.h @@ -25,9 +25,6 @@ namespace regexis024 { bool isUsable(); virtual ~VirtualMachine(); tai_t getSelectionArrayLength(); - bool isAllowMultistart(); - uint8_t getInputLeftExtensionSize(); - uint8_t getInputRightExtensionSize(); error_code_t getErrno(); bool haveSurvivors(); bool isMatched(); @@ -35,7 +32,6 @@ namespace regexis024 { uint64_t getMatchedThreadSAValue(uint16_t key); error_code_t addNewMatchingThread(); - error_code_t extendedFeedCharacter(uint64_t input); error_code_t feedCharacter(uint64_t input, uint64_t bytesResembled); private: diff --git a/src/libregexis024vm/utils.cpp b/src/libregexis024vm/utils.cpp index ef42dfc..79dde33 100644 --- a/src/libregexis024vm/utils.cpp +++ b/src/libregexis024vm/utils.cpp @@ -6,10 +6,6 @@ #include #include -#ifndef __ORDER_LITTLE_ENDIAN__ -#error "Big endian is currently unsupported" -#endif - namespace regexis024 { int utf8_retrieve_size(char firstByte) { if (!((uint8_t)firstByte & 0b10000000)) diff --git a/src/libregexis024vm/vm_opcodes.h b/src/libregexis024vm/vm_opcodes.h index c3aa7b2..30e8092 100644 --- a/src/libregexis024vm/vm_opcodes.h +++ b/src/libregexis024vm/vm_opcodes.h @@ -45,10 +45,6 @@ namespace regexis024 { PARAM_COLSIFTFUNC_SET = 21, /* PARAM_COLSIFTFUNC_WIPE */ PARAM_COLSIFTFUNC_WIPE = 22, - /* MSG_MULTISTART_ALLOWED <1B> */ - MSG_MULTISTART_ALLOWED = 23, - /* MSG_FED_INPUT_EXTENDED <1B> <1B> */ - MSG_FED_INPUT_EXTENDED = 24, /* DMOVRABXSELARR */ DMOV_RABX_SELARR = 25, /* DDISTRABXSELARR */