From 807d311c1531b50c62e10a1704f42368aed87f12 Mon Sep 17 00:00:00 2001 From: Andreev Gregory Date: Sat, 3 Aug 2024 22:48:40 +0300 Subject: [PATCH] Added wipeToInit method to virtual machine, fixed some bugs, removed debug output --- building/main.cpp | 8 ++- .../debug_through_graphviz.cpp | 2 +- .../natural_compiler_utils.h | 2 + src/libregexis024fa/tracking_variables.h | 18 +++++ src/libregexis024sol/expr_compiler.cpp | 6 +- .../expr_parse_functions/tracking_units.cpp | 2 +- .../part_of_expr_that_tracks.h | 4 +- .../subexpr_fa_transformed.cpp | 1 - src/libregexis024test/test4.cpp | 37 +++++++++- src/libregexis024tools/delayed_matching.cpp | 19 +++++ src/libregexis024tools/delayed_matching.h | 13 ++++ src/libregexis024tools/stringmatching.cpp | 21 +----- src/libregexis024tools/stringmatching.h | 17 ----- .../instruction_implementation.cpp | 62 ++++++++-------- .../instruction_implementation.h | 20 ++++-- src/libregexis024vm/libregexis024vm.h | 19 +---- .../libregexis024vm_context.cpp | 72 ++++++++++--------- .../libregexis024vm_disassembly.cpp | 36 +++++----- .../libregexis024vm_interface.cpp | 4 ++ .../libregexis024vm_interface.h | 2 +- 20 files changed, 214 insertions(+), 151 deletions(-) create mode 100644 src/libregexis024tools/delayed_matching.cpp create mode 100644 src/libregexis024tools/delayed_matching.h diff --git a/building/main.cpp b/building/main.cpp index e3b5906..8fe329f 100644 --- a/building/main.cpp +++ b/building/main.cpp @@ -14,7 +14,7 @@ struct Libregexis024BuildSystem { "-Wno-unused-but-set-variable", "-Wno-reorder"}; std::vector version_flags = {"--std", "c++14", "-D", "_POSIX_C_SOURCE=200809L"}; - std::vector debug_defines_release = {"_GLIBCXX_DEBUG"}; + std::vector debug_defines_release = {}; std::vector debug_defines_debug = {"_GLIBCXX_DEBUG", "LIBREGEXIS024_DEBUG", "LIBREGEXIS024_ALLOW_LOUD"}; std::vector opt_flags_release = {"-g", "-O2"}; std::vector opt_flags_debug = {"-g", "-ggdb", "-O0"}; @@ -79,6 +79,7 @@ struct Libregexis024BuildSystem { "libregexis024sol/expr_parse_functions/command_recognition.cpp", "libregexis024tools/stringmatching.cpp", + "libregexis024tools/delayed_matching.cpp", }; /* These are added to compilation_units_of_release */ @@ -102,6 +103,7 @@ struct Libregexis024BuildSystem { "libregexis024sol/expr_compiler.h", "libregexis024tools/stringmatching.h", + "libregexis024tools/delayed_matching.h", }; CTarget T{"libregexis024", "shared_library"}; @@ -131,14 +133,14 @@ struct Libregexis024BuildSystem { int main(int argc, char** argv) { try { - assert(argc > 0); + ASSERT_pl(argc > 0); std::vector args(argc - 1); for (int i = 0; i + 1 < argc; i++) { args[i] = argv[i + 1]; } NormalCBuildSystemCommandMeaning cmd; regular_bs_cli_cmd_interpret(args, cmd); - Libregexis024BuildSystem bs("debug", cmd); + Libregexis024BuildSystem bs("release", cmd); if (cmd.need_to_build) complete_tasks_of_build_units(bs.runlevel_1); if (cmd.need_to_install) diff --git a/src/debugging_regexis024/debug_through_graphviz.cpp b/src/debugging_regexis024/debug_through_graphviz.cpp index 811a7d2..4cf252e 100644 --- a/src/debugging_regexis024/debug_through_graphviz.cpp +++ b/src/debugging_regexis024/debug_through_graphviz.cpp @@ -198,7 +198,7 @@ namespace regexis024 { std::string infoText; for (auto& p: ktr.track_names){ - const SubtrackingNameInfo& tu = ktr.retrieval_info[p.second]; + const SubtrackingNameUsageInfo& tu = ktr.retrieval_info[p.second]; auto getRole = [](bool presence, tracking_var_type_t type, int first, int second, const std::string& ARR_NAME) -> std::string { diff --git a/src/libregexis024fa/graph_to_bytecode/natural_compiler_utils.h b/src/libregexis024fa/graph_to_bytecode/natural_compiler_utils.h index f10b41b..8dc9c84 100644 --- a/src/libregexis024fa/graph_to_bytecode/natural_compiler_utils.h +++ b/src/libregexis024fa/graph_to_bytecode/natural_compiler_utils.h @@ -4,6 +4,8 @@ #include #include #include +#include + namespace regexis024 { void write_byte(std::vector& result, uint8_t x); void write_word(std::vector& result, uint16_t x); diff --git a/src/libregexis024fa/tracking_variables.h b/src/libregexis024fa/tracking_variables.h index a882adb..2fa17f7 100644 --- a/src/libregexis024fa/tracking_variables.h +++ b/src/libregexis024fa/tracking_variables.h @@ -1,6 +1,9 @@ #ifndef LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H #define LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H +#include +#include + namespace regexis024 { namespace tracking_var_types { enum tracking_var_type_I { @@ -11,6 +14,21 @@ namespace regexis024 { } typedef tracking_var_types::tracking_var_type_I tracking_var_type_t; + + struct TrackingVariableInfo { + bool stored_in_ca = true; + bool stored_in_sa = false; + + tracking_var_type_t type; + /* These fields will be -1 if unused */ + int colarr_first = -1; + int colarr_second = -1; + + int selarr_first = -1; + int selarr_second = -1; + }; + + typedef std::map track_var_list; } #endif diff --git a/src/libregexis024sol/expr_compiler.cpp b/src/libregexis024sol/expr_compiler.cpp index 0f296b2..e65ce11 100644 --- a/src/libregexis024sol/expr_compiler.cpp +++ b/src/libregexis024sol/expr_compiler.cpp @@ -138,7 +138,7 @@ namespace regexis024 { chekushka BracketLvl_ParseCall::firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa) { result.assertDefault(); assert(readChar(ctx) == U'('); - /* sequence lvl already took care about resolving name and configuring SubtrackingNameInfo */ + /* sequence lvl already took care about resolving name and configuring SubtrackingNameUsageInfo */ if (namedSubexpressionId >= 0){ assert(ctx.ktr.retrieval_info[namedSubexpressionId].type == tracking_var_types::range); if (ctx.ktr.retrieval_info[namedSubexpressionId].stored_in_sa){ @@ -157,7 +157,7 @@ namespace regexis024 { readChar(ctx); result = tmp_ret_buff; if (namedSubexpressionId >= 0) { - SubtrackingNameInfo& tai_slots = ctx.ktr.retrieval_info[namedSubexpressionId]; + SubtrackingNameUsageInfo& tai_slots = ctx.ktr.retrieval_info[namedSubexpressionId]; if (tai_slots.stored_in_ca){ assert(tai_slots.colarr_first >= 0 && tai_slots.colarr_first < UINT16_MAX); assert(tai_slots.colarr_second >= 0 && tai_slots.colarr_second < UINT16_MAX); @@ -214,7 +214,7 @@ namespace regexis024 { } } /* Generating priority table (sifting program) */ - for (const SubtrackingNameInfo& sni: ctx.ktr.retrieval_info) { + for (const SubtrackingNameUsageInfo& sni: ctx.ktr.retrieval_info) { if (!sni.discovered) aux_THROW("tracking tool named in !select is not used anywhere"); if (sni.used_in_sifting) { diff --git a/src/libregexis024sol/expr_parse_functions/tracking_units.cpp b/src/libregexis024sol/expr_parse_functions/tracking_units.cpp index 148d048..e33735a 100644 --- a/src/libregexis024sol/expr_parse_functions/tracking_units.cpp +++ b/src/libregexis024sol/expr_parse_functions/tracking_units.cpp @@ -20,7 +20,7 @@ report(ctx, ("regex: " + ARR_NAME + ": key namespace overflow").c_str()); return void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type_t type) { size_t id = ctx.ktr.track_names[name]; /* Size of this verctor won't be changed. THis is a safe reference */ - SubtrackingNameInfo& info = ctx.ktr.retrieval_info[id]; + SubtrackingNameUsageInfo& info = ctx.ktr.retrieval_info[id]; if (!info.discovered){ info.type = type; if (info.stored_in_ca) { diff --git a/src/libregexis024sol/part_of_expr_that_tracks.h b/src/libregexis024sol/part_of_expr_that_tracks.h index 7faf007..85ce883 100644 --- a/src/libregexis024sol/part_of_expr_that_tracks.h +++ b/src/libregexis024sol/part_of_expr_that_tracks.h @@ -7,7 +7,7 @@ #include namespace regexis024 { - struct SubtrackingNameInfo{ + struct SubtrackingNameUsageInfo: public TrackingVariableInfo{ bool stored_in_ca = true; bool stored_in_sa = false; @@ -25,7 +25,7 @@ namespace regexis024 { struct KnownTrackingTools { std::map track_names; - std::vector retrieval_info; + std::vector retrieval_info; }; } diff --git a/src/libregexis024sol/subexpr_fa_transformed.cpp b/src/libregexis024sol/subexpr_fa_transformed.cpp index 6e9f052..51b1bc1 100644 --- a/src/libregexis024sol/subexpr_fa_transformed.cpp +++ b/src/libregexis024sol/subexpr_fa_transformed.cpp @@ -76,7 +76,6 @@ namespace regexis024 { assert(patient.start); for (FA_Node** end: patient.ends){ assert(!(*end)); - printf("DEBUG %lu->->->->->%lu\n", patient.start->nodeId, node->nodeId); reattach_fa_node_edge(end, node); } } diff --git a/src/libregexis024test/test4.cpp b/src/libregexis024test/test4.cpp index 222ed00..8524331 100644 --- a/src/libregexis024test/test4.cpp +++ b/src/libregexis024test/test4.cpp @@ -2,6 +2,7 @@ #include #include #include +#include using namespace regexis024; using namespace std; @@ -20,13 +21,45 @@ void test(const string& input, const string& pattern, const MatchInfo& right_ans printf("Test passed\n"); } +void boba(const vector>& input, const string& pattern) { + std::vector regexp_program; + std::map variables; + std::string error; + int ret = compile(pattern, variables, regexp_program, error); + if (ret < 0) { + throw runtime_error("Compilation failure " + error); + } + VirtualMachine vm(regexp_program.size(), regexp_program.data(), + UINT64_MAX, UINT16_MAX, + UINT32_MAX, UINT32_MAX, UINT64_MAX); + if (vm.initialize() < 0) { + throw runtime_error("Can't initialize"); + } + for (int i = 0; i < 3; i++) { + for (auto& inp: input) { + vm.wipeToInit(); + if (vm.addNewMatchingThread() < 0) + throw runtime_error("addNewMatchingThread"); + for (char ch: inp.first) { + if (vm.feedCharacter(ch, 1) < 0) + throw runtime_error("feedCharacter"); + } + if (vm.isMatched() != inp.second) + throw runtime_error("AAAAAAAAAAAAAAAA"); + } + } +} + int main() { + boba({{"", true}, {"a", false}}, ""); + boba({{"", true}, {"a", true}, {"aab", true}, {"aac", false}, {"c", false}, {"abbababa", true}}, "[ab]*"); + test("", "", MatchInfo({}, {})); + test("a", "a", MatchInfo({}, {})); + test("1b", "1\\ + +namespace regexis024 { + int compile(const std::string& regexp, track_var_list &ret_track_var_list, std::vector &ret_program, + std::string &ret_message) { + ret_message.clear(); + REGEX_IS024_MeaningContext cmp_ctx(regexp.size(), regexp.data()); + if (cmp_ctx.error) { + ret_message = std::move(cmp_ctx.error_msg); + return -1; + } + ret_program = std::move(cmp_ctx.compiled_program); + ret_track_var_list.clear(); + for (auto& iip: cmp_ctx.ktr.track_names) + ret_track_var_list[iip.first] = (TrackingVariableInfo)cmp_ctx.ktr.retrieval_info[iip.second]; + return 0; + } +} diff --git a/src/libregexis024tools/delayed_matching.h b/src/libregexis024tools/delayed_matching.h new file mode 100644 index 0000000..bf289bd --- /dev/null +++ b/src/libregexis024tools/delayed_matching.h @@ -0,0 +1,13 @@ +#ifndef LIBREGEXIS024_SRC_LIBREGEXIS024TOOLS_DELAYED_MATCHING_H +#define LIBREGEXIS024_SRC_LIBREGEXIS024TOOLS_DELAYED_MATCHING_H + +#include +#include +#include + +namespace regexis024 { + int compile(const std::string& regexp, track_var_list& ret_track_var_list, std::vector& ret_program, + std::string& ret_message); +} + +#endif diff --git a/src/libregexis024tools/stringmatching.cpp b/src/libregexis024tools/stringmatching.cpp index f24fbee..a5a7cf4 100644 --- a/src/libregexis024tools/stringmatching.cpp +++ b/src/libregexis024tools/stringmatching.cpp @@ -5,36 +5,21 @@ #include #include -// using namespace regexis024; - namespace regexis024 { - void convert(TrackingVariableInfo& to, const SubtrackingNameInfo& from) { -#define plagiat(field) to.field = from.field; - plagiat(type); - plagiat(colarr_first); - plagiat(colarr_second); - plagiat(stored_in_ca); - plagiat(selarr_first); - plagiat(selarr_second); - plagiat(stored_in_sa); -#undef plagiat - } - int matchStrToRegexp(const std::string& input, const std::string& pattern, MatchInfo& retMatchInfo, track_var_list& retTrackVarList, std::string& retStatus) { retTrackVarList = {}; retMatchInfo = MatchInfo(); - retStatus = ""; + retStatus.clear(); REGEX_IS024_MeaningContext regexp(pattern.size(), pattern.data()); if (regexp.error) { retStatus = "Pattern compilation. " + regexp.error_msg; return -1; } retTrackVarList = {}; - for (auto& iip: regexp.ktr.track_names) { - convert(retTrackVarList[iip.first], regexp.ktr.retrieval_info[iip.second]); - } + for (auto& iip: regexp.ktr.track_names) + retTrackVarList[iip.first] = (TrackingVariableInfo)regexp.ktr.retrieval_info[iip.second]; VirtualMachine vm(regexp.compiled_program.size(), regexp.compiled_program.data(), UINT64_MAX, UINT16_MAX, UINT32_MAX, UINT32_MAX, UINT64_MAX); diff --git a/src/libregexis024tools/stringmatching.h b/src/libregexis024tools/stringmatching.h index ac96854..9dfe655 100644 --- a/src/libregexis024tools/stringmatching.h +++ b/src/libregexis024tools/stringmatching.h @@ -2,26 +2,9 @@ #define LIBREGEXIS024_SRC_LIBREGEXIS024TOOLS_STRINGMATCHING_H #include -#include -#include #include namespace regexis024 { - struct TrackingVariableInfo { - bool stored_in_ca = true; - bool stored_in_sa = false; - - tracking_var_type_t type; - /* These fields will be -1 if unused */ - int colarr_first = -1; - int colarr_second = -1; - - int selarr_first = -1; - int selarr_second = -1; - }; - - typedef std::map track_var_list; - struct MatchInfo { bool have_match = false; std::vector ca_history; diff --git a/src/libregexis024vm/instruction_implementation.cpp b/src/libregexis024vm/instruction_implementation.cpp index 0c0fa52..507d9a0 100644 --- a/src/libregexis024vm/instruction_implementation.cpp +++ b/src/libregexis024vm/instruction_implementation.cpp @@ -22,7 +22,7 @@ namespace regexis024 { ctx.RAX = ctx.RBX = 0; } else { ctx.active_thread.delete_thread(); - ctx.try_to_continue_scheduled(); + try_to_continue_scheduled(ctx); } } @@ -69,14 +69,14 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } other.slot_occupation_status = SLOT_NEW_val; ctx.active_thread.slot_occupation_status = SLOT_EMPTY_val; ctx.READ_halted_stack_new.append(ssid); - ctx.try_to_continue_scheduled(); + try_to_continue_scheduled(ctx); } } void i_READ(VMContext &ctx) { ctx_print_debug(ctx); check_available_prg(BYTECODE_SSLOT_ID_SZ) - sslot_id_t ssid = ctx.extract_sslot_id(); + sslot_id_t ssid = extract_sslot_id(ctx); do_i_read(ctx, ssid); } @@ -88,7 +88,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } void i_JUMP(VMContext& ctx){ ctx_print_debug(ctx); check_available_prg(BYTECODE_NEAR_POINTER_SZ) - ctx.active_thread.IP = ctx.extract_near_pointer(); + ctx.active_thread.IP = extract_near_pointer(ctx); } template @@ -97,7 +97,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); check_available_prg(immArgSzT::byte_sz + BYTECODE_NEAR_POINTER_SZ); uint64_t imm_val_B = immArgSzT::extract(ctx); - near_ptr_t dest = ctx.extract_near_pointer(); + near_ptr_t dest = extract_near_pointer(ctx); uint64_t imm_val_A = ctx.INP; if (conditionT::call(imm_val_A, imm_val_B)) ctx.active_thread.IP = dest; @@ -109,25 +109,25 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } struct immArgByte{ static constexpr int byte_sz = 1; - static uint64_t extract(VMContext& ctx){return ctx.extract_b();} + static uint64_t extract(VMContext& ctx){return extract_b(ctx);} }; struct immArgWord{ static constexpr int byte_sz = 2; - static uint64_t extract(VMContext& ctx){return ctx.extract_w();} + static uint64_t extract(VMContext& ctx){return extract_w(ctx);} }; struct immArgDoubleWord{ static constexpr int byte_sz = 4; - static uint64_t extract(VMContext& ctx){return ctx.extract_dw();} + static uint64_t extract(VMContext& ctx){return extract_dw(ctx);} }; struct immArgQuadWord{ static constexpr int byte_sz = 8; - static uint64_t extract(VMContext& ctx){return ctx.extract_qw();} + static uint64_t extract(VMContext& ctx){return extract_qw(ctx);} }; void clone_thread_into_slot(Thread& source, Thread& vessel){ thread_print_debug(source); - my_assert(!(vessel.slot_occupation_status & SLOT_OCCUPIED)); - my_assert((source.slot_occupation_status & SLOT_OCCUPIED)); + assert(!(vessel.slot_occupation_status & SLOT_OCCUPIED)); + assert((source.slot_occupation_status & SLOT_OCCUPIED)); vessel = source; if (vessel.CAHptr){ vessel.CAHptr->refs++; @@ -142,8 +142,8 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); general_matching_mode_check() check_available_prg(BYTECODE_SSLOT_ID_SZ + BYTECODE_NEAR_POINTER_SZ); - sslot_id_t ssid = ctx.extract_sslot_id(); - near_ptr_t dest = ctx.extract_near_pointer(); + sslot_id_t ssid = extract_sslot_id(ctx); + near_ptr_t dest = extract_near_pointer(ctx); if (ssid >= ctx.fork_slots_number) smitsya(fork_sslot_out_of_range); Thread& other = ctx.FORK_halted_slots[ssid]; @@ -170,14 +170,14 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); general_matching_mode_check() ctx.active_thread.delete_thread(); - ctx.try_to_continue_scheduled(); + try_to_continue_scheduled(ctx); } void i_PARAM_READ_SS_NUMBER(VMContext& ctx){ ctx_print_debug(ctx); initialization_phase_check() check_available_prg(BYTECODE_SSLOT_ID_SZ) - sslot_id_t read_slots_number = ctx.extract_sslot_id(); + sslot_id_t read_slots_number = extract_sslot_id(ctx); ctx.read_slots_number = read_slots_number; } @@ -185,7 +185,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); initialization_phase_check() check_available_prg(BYTECODE_SSLOT_ID_SZ) - sslot_id_t fork_slots_number = ctx.extract_sslot_id(); + sslot_id_t fork_slots_number = extract_sslot_id(ctx); ctx.fork_slots_number = fork_slots_number; } @@ -193,7 +193,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); initialization_phase_check() check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) - tai_t selection_array_len = ctx.extract_track_array_index(); + tai_t selection_array_len = extract_track_array_index(ctx); ctx.selection_array_len = selection_array_len; } @@ -201,7 +201,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); initialization_phase_check() check_available_prg(BYTECODE_NEAR_POINTER_SZ) - near_ptr_t sift_function = ctx.extract_near_pointer(); + near_ptr_t sift_function = extract_near_pointer(ctx); ctx.have_sift_function = true; ctx.sift_function = sift_function; } @@ -220,7 +220,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); sift_mode_check() check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) - tai_t i1 = ctx.extract_track_array_index(); + tai_t i1 = extract_track_array_index(ctx); if (i1 >= ctx.selection_array_len) smitsya(selection_arr_out_of_range); ctx.RAX = get_el_from_selarr(ctx.active_thread.SAptr, i1); @@ -237,10 +237,10 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); sift_mode_check() check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ * 2) - tai_t i_start = ctx.extract_track_array_index(); + tai_t i_start = extract_track_array_index(ctx); if (i_start >= ctx.selection_array_len) smitsya(selection_arr_out_of_range); - tai_t i_end = ctx.extract_track_array_index(); + tai_t i_end = extract_track_array_index(ctx); if (i_end >= ctx.selection_array_len) smitsya(selection_arr_out_of_range); ctx.RAX = get_selarr_el_dist(ctx.active_thread.SAptr, i_start, i_end); @@ -251,7 +251,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); if (ctx.who_started_sift == opcodes::READ){ ctx.active_thread.delete_thread(); - ctx.try_to_continue_scheduled(); + try_to_continue_scheduled(ctx); } else { /* FORK or MATCH (which will also be shown as FORK) */ /* Cloning conflict ends, active_thread jumps to offsprings IP */ @@ -268,7 +268,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } /* noncloning conflict won by intruder+ */ *ctx.sifting_with = ctx.active_thread; ctx.active_thread.slot_occupation_status = SLOT_EMPTY_val; - ctx.try_to_continue_scheduled(); + try_to_continue_scheduled(ctx); } else { /* End of cloning conflict (it involved cloning) */ clone_thread_into_slot(ctx.active_thread, *ctx.sifting_with); @@ -319,8 +319,8 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); general_matching_mode_check() check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8) - tai_t ca_ind = ctx.extract_track_array_index(); - uint64_t imm = ctx.extract_qw(); + tai_t ca_ind = extract_track_array_index(ctx); + uint64_t imm = extract_qw(ctx); ca_branch_new_node(ctx, ca_ind, imm); } @@ -328,7 +328,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); general_matching_mode_check() check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) - tai_t ca_ind = ctx.extract_track_array_index(); + tai_t ca_ind = extract_track_array_index(ctx); ca_branch_new_node(ctx, ca_ind, ctx.passed_bytes); } @@ -364,8 +364,8 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); general_matching_mode_check() check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8) - tai_t sa_ind = ctx.extract_track_array_index(); - uint64_t imm = ctx.extract_qw(); + tai_t sa_ind = extract_track_array_index(ctx); + uint64_t imm = extract_qw(ctx); edit_selection_array(ctx, sa_ind, imm); } @@ -373,7 +373,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx_print_debug(ctx); general_matching_mode_check() check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) - tai_t sa_ind = ctx.extract_track_array_index(); + tai_t sa_ind = extract_track_array_index(ctx); edit_selection_array(ctx, sa_ind, ctx.passed_chars); } @@ -421,7 +421,7 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } void instruction_table(VMContext &ctx) { ctx_print_debug(ctx); - uint8_t opcode = ctx.extract_instruction(); + uint8_t opcode = extract_instruction(ctx); #define rcase(inst) case opcodes::inst: return i_ ## inst (ctx); #define jumpC(UN, st) case opcodes::JC ## UN ## _B: return i_JC(ctx); \ @@ -460,4 +460,4 @@ ctx.error = error_codes::instruction_not_for_collision_thread; return; } ctx.error = error_codes::invalid_opcode; } } -} \ No newline at end of file +} diff --git a/src/libregexis024vm/instruction_implementation.h b/src/libregexis024vm/instruction_implementation.h index 36341ca..05b38c2 100644 --- a/src/libregexis024vm/instruction_implementation.h +++ b/src/libregexis024vm/instruction_implementation.h @@ -15,23 +15,35 @@ #define SLOT_NEW 2 #define SLOT_NEW_val (SLOT_OCCUPIED | SLOT_NEW) -#define check_available_prg(regionSz) if (!ctx.check_inboundness(regionSz)){ \ +#define check_available_prg(regionSz) if (!check_inboundness(ctx, regionSz)){ \ ctx.error = error_codes::improper_finish; return; } #if defined(LIBREGEXIS024_DEBUG) && defined(LIBREGEXIS024_ALLOW_LOUD) #include -#define my_assert(expr) assert(expr) #define ctx_print_debug(ctx) debug_print_context(ctx, __func__) #define thread_print_debug(thread) debug_print_thread(thread, __func__) #else -#define my_assert(expr) assert(expr) #define ctx_print_debug(ctx) #define thread_print_debug(thread) #endif namespace regexis024 { + bool check_inboundness(VMContext& ctx, int region); + + uint8_t extract_b(VMContext& ctx); + uint16_t extract_w(VMContext& ctx); + uint32_t extract_dw(VMContext& ctx); + uint64_t extract_qw(VMContext& ctx); + + uint8_t extract_instruction(VMContext& ctx); + sslot_id_t extract_sslot_id(VMContext& ctx); + near_ptr_t extract_near_pointer(VMContext& ctx); + tai_t extract_track_array_index(VMContext& ctx); + + void try_to_continue_scheduled(VMContext& ctx); + void instruction_table(VMContext& ctx); } -#endif //LIBREGEXIS024_INSTRUCTION_IMPLEMENTATION_H \ No newline at end of file +#endif //LIBREGEXIS024_INSTRUCTION_IMPLEMENTATION_H diff --git a/src/libregexis024vm/libregexis024vm.h b/src/libregexis024vm/libregexis024vm.h index 53df55e..fdf81cf 100644 --- a/src/libregexis024vm/libregexis024vm.h +++ b/src/libregexis024vm/libregexis024vm.h @@ -53,7 +53,6 @@ namespace regexis024 { uint64_t* SAptr = NULL; void delete_thread() noexcept; - void debug_print(const char* place); }; struct VMContext{ @@ -132,22 +131,8 @@ namespace regexis024 { uint64_t INP = 0; uint64_t passed_chars = 0; uint64_t passed_bytes = 0; - - void try_to_continue_scheduled(); - - bool check_inboundness(int region); - - uint8_t extract_b(); - uint16_t extract_w(); - uint32_t extract_dw(); - uint64_t extract_qw(); - - uint8_t extract_instruction(); - sslot_id_t extract_sslot_id(); - near_ptr_t extract_near_pointer(); - tai_t extract_track_array_index(); - - void debug_print(const char* place); }; + + void wipe(VMContext& ctx) noexcept; } #endif //LIBREGEXIS024_LIBREGEXIS024VM_H diff --git a/src/libregexis024vm/libregexis024vm_context.cpp b/src/libregexis024vm/libregexis024vm_context.cpp index 78d243a..503a7d4 100644 --- a/src/libregexis024vm/libregexis024vm_context.cpp +++ b/src/libregexis024vm/libregexis024vm_context.cpp @@ -42,7 +42,7 @@ namespace regexis024 { * gonna deoccupy slot_occupation_status*/ void Thread::delete_thread() noexcept { thread_print_debug(*this); - my_assert(slot_occupation_status & SLOT_OCCUPIED); + assert(slot_occupation_status & SLOT_OCCUPIED); slot_occupation_status = SLOT_EMPTY_val; CollectionArrayNode* cur_CAptr = CAHptr; while (cur_CAptr){ @@ -60,37 +60,29 @@ namespace regexis024 { } } - void emptify_one_of_new_read_halted_stacks(VMContext& ctx, SSID_Stack& type_new_stack){ - while (!type_new_stack.empty()){ - Thread& thread = ctx.READ_halted_slots[type_new_stack.pop()]; - assert(thread.slot_occupation_status & SLOT_OCCUPIED); - thread.delete_thread(); - } - } - /* First it will try to pop pending thread from FORK_halted_stack * Then it will try popping thread from READ_halted_stack_old (checking if top * thread here is not actually SLOT_NEW). If something succeded, corresponding slot will be deoccupied, and * active slot will be occupied with it. * * try_to_continue_scheduled() assumes that active thread is unoccupied.*/ - void VMContext::try_to_continue_scheduled(){ - ctx_print_debug(*this); - my_assert(!(active_thread.slot_occupation_status & SLOT_OCCUPIED)); - if (FORK_halted_stack.sz){ - sslot_id_t ssid = FORK_halted_stack.pop(); - active_thread = FORK_halted_slots[ssid]; - FORK_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val; + void try_to_continue_scheduled(VMContext& ctx){ + ctx_print_debug(ctx); + assert(!(ctx.active_thread.slot_occupation_status & SLOT_OCCUPIED)); + if (ctx.FORK_halted_stack.sz){ + sslot_id_t ssid = ctx.FORK_halted_stack.pop(); + ctx.active_thread = ctx.FORK_halted_slots[ssid]; + ctx.FORK_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val; return; } - while (READ_halted_stack_old.sz){ - sslot_id_t ssid = READ_halted_stack_old.pop(); - if (READ_halted_slots[ssid].slot_occupation_status & SLOT_NEW){ + while (ctx.READ_halted_stack_old.sz){ + sslot_id_t ssid = ctx.READ_halted_stack_old.pop(); + if (ctx.READ_halted_slots[ssid].slot_occupation_status & SLOT_NEW){ /* This is the case when old thread was silently replaced by settled new thread */ continue; } - active_thread = READ_halted_slots[ssid]; - READ_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val; + ctx.active_thread = ctx.READ_halted_slots[ssid]; + ctx.READ_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val; return; } /* Failure here will be detected. We started with unoccupied active thread. iterator inside kick will see it */ @@ -148,28 +140,44 @@ namespace regexis024 { INP = input; passed_bytes += corresponding_byte_amount; passed_chars++; - try_to_continue_scheduled(); + try_to_continue_scheduled(*this); kick(*this); return error; } VMContext::~VMContext() { ctx_print_debug(*this); - if (initialized){ - emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new); - while (!READ_halted_stack_old.empty()){ - Thread& thread = READ_halted_slots[READ_halted_stack_old.pop()]; + if (active_thread.slot_occupation_status & SLOT_OCCUPIED) + active_thread.delete_thread(); + wipe(*this); + if (initialized) { + free(READ_halted_slots); + free(FORK_halted_slots); + } + } + + void wipe(VMContext& ctx) noexcept { + ctx_print_debug(ctx); + assert(!(ctx.active_thread.slot_occupation_status & SLOT_OCCUPIED)); + if (ctx.initialized){ + ctx.timer = 0; + ctx.CAN_total = 0; + while (!ctx.READ_halted_stack_new.empty()){ + Thread& thread = ctx.READ_halted_slots[ctx.READ_halted_stack_new.pop()]; + assert(thread.slot_occupation_status & SLOT_OCCUPIED); + thread.delete_thread(); + } + while (!ctx.READ_halted_stack_old.empty()){ + Thread& thread = ctx.READ_halted_slots[ctx.READ_halted_stack_old.pop()]; assert(thread.slot_occupation_status & SLOT_OCCUPIED); if (!(thread.slot_occupation_status & SLOT_NEW)) thread.delete_thread(); } - free(READ_halted_slots); - while (!FORK_halted_stack.empty()) - FORK_halted_slots[FORK_halted_stack.pop()].delete_thread(); - free(FORK_halted_slots); + while (!ctx.FORK_halted_stack.empty()) + ctx.FORK_halted_slots[ctx.FORK_halted_stack.pop()].delete_thread(); - if (matched_thread.slot_occupation_status & SLOT_OCCUPIED){ - matched_thread.delete_thread(); + if (ctx.matched_thread.slot_occupation_status & SLOT_OCCUPIED){ + ctx.matched_thread.delete_thread(); } } } diff --git a/src/libregexis024vm/libregexis024vm_disassembly.cpp b/src/libregexis024vm/libregexis024vm_disassembly.cpp index fcdea12..4c5c276 100644 --- a/src/libregexis024vm/libregexis024vm_disassembly.cpp +++ b/src/libregexis024vm/libregexis024vm_disassembly.cpp @@ -2,39 +2,39 @@ #include namespace regexis024 { - bool VMContext::check_inboundness(int region){ - return vmprog_check_inboundness(program_size, active_thread.IP, region); + bool check_inboundness(VMContext& ctx, int region){ + return vmprog_check_inboundness(ctx.program_size, ctx.active_thread.IP, region); } - uint8_t VMContext::extract_b() { - return vmprog_extract_b(&active_thread.IP, prg); + uint8_t extract_b(VMContext& ctx) { + return vmprog_extract_b(&ctx.active_thread.IP, ctx.prg); } - uint16_t VMContext::extract_w() { - return vmprog_extract_w(&active_thread.IP, prg); + uint16_t extract_w(VMContext& ctx) { + return vmprog_extract_w(&ctx.active_thread.IP, ctx.prg); } - uint32_t VMContext::extract_dw() { - return vmprog_extract_dw(&active_thread.IP, prg); + uint32_t extract_dw(VMContext& ctx) { + return vmprog_extract_dw(&ctx.active_thread.IP, ctx.prg); } - uint64_t VMContext::extract_qw() { - return vmprog_extract_qw(&active_thread.IP, prg); + uint64_t extract_qw(VMContext& ctx) { + return vmprog_extract_qw(&ctx.active_thread.IP, ctx.prg); } - uint8_t VMContext::extract_instruction() { - return extract_b(); + uint8_t extract_instruction(VMContext& ctx) { + return extract_b(ctx); } - sslot_id_t VMContext::extract_sslot_id() { - return extract_dw(); + sslot_id_t extract_sslot_id(VMContext& ctx) { + return extract_dw(ctx); } - near_ptr_t VMContext::extract_near_pointer() { - return extract_qw(); + near_ptr_t extract_near_pointer(VMContext& ctx) { + return extract_qw(ctx); } - tai_t VMContext::extract_track_array_index() { - return extract_w(); + tai_t extract_track_array_index(VMContext& ctx) { + return extract_w(ctx); } } diff --git a/src/libregexis024vm/libregexis024vm_interface.cpp b/src/libregexis024vm/libregexis024vm_interface.cpp index f8506f5..f3c4ad5 100644 --- a/src/libregexis024vm/libregexis024vm_interface.cpp +++ b/src/libregexis024vm/libregexis024vm_interface.cpp @@ -85,4 +85,8 @@ namespace regexis024 { throw std::runtime_error("unusable\n"); return reveal->feedCharacter(input, bytesResembled); } + + void VirtualMachine::wipeToInit() { + wipe(*reveal); + } } diff --git a/src/libregexis024vm/libregexis024vm_interface.h b/src/libregexis024vm/libregexis024vm_interface.h index fc6a510..e24292f 100644 --- a/src/libregexis024vm/libregexis024vm_interface.h +++ b/src/libregexis024vm/libregexis024vm_interface.h @@ -33,7 +33,7 @@ namespace regexis024 { error_code_t addNewMatchingThread(); error_code_t feedCharacter(uint64_t input, uint64_t bytesResembled); - + void wipeToInit(); private: bool gave_SOF = false; void* opaque;