From b11afa72eabe4016ca619acc06f4022a16921bd7 Mon Sep 17 00:00:00 2001 From: Andreev Gregory Date: Mon, 29 Jul 2024 15:30:38 +0300 Subject: [PATCH] Fixed fa_first_stage_fix.cpp --- src/libregexis024fa/fa_first_stage_fix.cpp | 14 +++++++++----- src/libregexis024test/test2.cpp | 2 +- src/libregexis024test/test4.cpp | 20 ++++++++++++++++++-- src/libregexis024tools/stringmatching.cpp | 3 +-- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/libregexis024fa/fa_first_stage_fix.cpp b/src/libregexis024fa/fa_first_stage_fix.cpp index a37ee8b..94d5801 100644 --- a/src/libregexis024fa/fa_first_stage_fix.cpp +++ b/src/libregexis024fa/fa_first_stage_fix.cpp @@ -30,7 +30,7 @@ REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_C std::vector searched; searched.emplace_back(loa.nxt_node); - beg->search_mark = 0; + loa.nxt_node->search_mark = 0; for (size_t done = 0; done < searched.size(); done++){ FA_Node& cur = *searched[done].node; @@ -77,6 +77,11 @@ REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_C if (my->type == match) info.fed_chars_extend_one_right = true; } + { + Marked& loa_nxt_aux = searched[loa.nxt_node->search_mark]; + if (loa_nxt_aux.making_copy) + reattach_nxt_node(&loa, loa_nxt_aux.copy); + } for (auto& v_sete: searched) v_sete.node->search_mark = -1; @@ -151,7 +156,6 @@ void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& result assert(resultFa.all.empty() && !resultFa.start); if (!sourceFa.start) return; - // todo: rewrite first stage using that cool technique I just invented resultFa.start = sourceFa.start; // A vector of pointers in resutFa to nodes that belong to sourceFa. They should undergo a little bit of copying. std::vector homework = {&(resultFa.start)}; @@ -165,9 +169,9 @@ void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& result if (sourceV->type == match) { FA_NodeOfMatch& mn = dynamic_cast(*sourceV); FA_NodeOfMatch* res_mn = resultFa.makeMatch(); - if (mn.ext_filter_added && mn.pending_filter != codeset_of_all) { - assert(info1.fed_chars_extend_one_right); - FA_NodeOfOneCharRead* res_ocr2n = resultFa.makeOneCharRead(mn.pending_filter, true); + if (info1.fed_chars_extend_one_right) { + FA_NodeOfOneCharRead* res_ocr2n = resultFa.makeOneCharRead( + mn.ext_filter_added ? mn.pending_filter : codeset_of_all, true); reattach_nxt_node(res_ocr2n, res_mn); sourceIdToResNode[sourceVId] = res_ocr2n; } else { diff --git a/src/libregexis024test/test2.cpp b/src/libregexis024test/test2.cpp index cddbfac..5a115c2 100644 --- a/src/libregexis024test/test2.cpp +++ b/src/libregexis024test/test2.cpp @@ -2,7 +2,7 @@ #include int main(){ - std::string regular_expression = "!selarr{boba{ca}}^a#boba(b)c$"; + std::string regular_expression = "\\>1*"; REGEX_IS024_MeaningContext regex(regular_expression.size(), regular_expression.c_str()); if (regex.error) fprintf(stderr, "%s\n", regex.error_msg.c_str()); diff --git a/src/libregexis024test/test4.cpp b/src/libregexis024test/test4.cpp index 4fd5ec0..a7f645a 100644 --- a/src/libregexis024test/test4.cpp +++ b/src/libregexis024test/test4.cpp @@ -10,14 +10,30 @@ void test(const string& input, const string& pattern, const MatchInfo& right_ans MatchInfo given_answer; track_var_list retTrackVarList; string retStatus; - matchStrToRegexp(input, pattern, given_answer, retTrackVarList, retStatus); + int ret = matchStrToRegexp(input, pattern, given_answer, retTrackVarList, retStatus); + if (ret < 0) { + throw runtime_error("Test failed. matching. " + retStatus); + } if (given_answer != right_answer) { - throw runtime_error("Test failed"); + throw runtime_error("Test failed."); } printf("Test passed\n"); } int main() { + test("C111111111111", "C\\>1*", MatchInfo({}, {})); + // return 0; + test("GET / HTTP/1.1\r\nHost: bibura sosat\r\nLos-es-raus: a\rfaafafdf\r\n\r\n", + "!dfa;(GET|POST) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n", + MatchInfo()); + test("\r24234\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo()); + test("\n3432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo()); + test("3:::;;432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {})); + test("3:::;;432 \r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {})); + test("GET / HTTP/0.9\r\nHost: bibura sosat\r\nLos-es-raus: afaafafdf\r\n\r\n", + "^(GET|POST\\>) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n", + MatchInfo({}, {})); + // return 0; test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {})); test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2})); for (int i = 0; i < 64; i++) { diff --git a/src/libregexis024tools/stringmatching.cpp b/src/libregexis024tools/stringmatching.cpp index 41f10b3..f4f9338 100644 --- a/src/libregexis024tools/stringmatching.cpp +++ b/src/libregexis024tools/stringmatching.cpp @@ -89,9 +89,8 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa retMatchInfo.sa[i] = vm.getMatchedThreadSAValue(i); retMatchInfo.ca_history = vm.getMatchedThreadCABranchReverse(); std::reverse(retMatchInfo.ca_history.begin(), retMatchInfo.ca_history.end()); - return 0; } - return -1; + return 0; } bool regexis024::MatchInfo::operator==(const MatchInfo &other) const {