Moved utf8.h/cpp to namespace json::
This commit is contained in:
		
							parent
							
								
									1c2e479012
								
							
						
					
					
						commit
						d4c48ea269
					
				| @ -1,82 +1,84 @@ | ||||
| #include "utf8.h" | ||||
| #include <assert.h> | ||||
| 
 | ||||
| int _utf8_retrieve_size(uint8_t firstByte) { | ||||
|     if (!(firstByte & 0b10000000)) | ||||
|         return 1; | ||||
|     uint8_t a = 0b11000000; | ||||
|     uint8_t b = 0b00100000; | ||||
|     for (int i = 2; i <= 4; i++){ | ||||
|         if ((firstByte & (a | b)) == a) | ||||
|             return i; | ||||
|         a |= b; | ||||
|         b >>= 1; | ||||
|     } | ||||
|     return -1; | ||||
| } | ||||
| 
 | ||||
| int32_t _utf8_retrieve_character(int sz, size_t pos, const char *string) { | ||||
|     if (sz == 1) | ||||
|         return (int32_t)string[pos]; | ||||
|     uint32_t v = ((uint8_t)string[pos]) & (0b01111111 >> sz); | ||||
|     pos++; | ||||
|     for (int i = 1; i < sz; i++){ | ||||
|         uint32_t th = (uint8_t)string[pos]; | ||||
|         if ((th & 0b11000000) != 0b10000000) | ||||
|             return -1; | ||||
|         v <<= 6; | ||||
|         v |= (th & 0b00111111); | ||||
|         pos++; | ||||
|     } | ||||
|     assert(v <= INT32_MAX); | ||||
|     return static_cast<int32_t>(v); | ||||
| } | ||||
| 
 | ||||
| void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) { | ||||
|     if (pos >= string_size) {cp = -1; return;} | ||||
|     adj = _utf8_retrieve_size((uint8_t)string[pos]); | ||||
|     if (adj < 0 || pos + adj > string_size) {cp = -1; return;} | ||||
|     if ((cp = _utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1; return;} | ||||
| } | ||||
| 
 | ||||
| bool isUtf8String(const std::string &str) { | ||||
|     size_t N = str.size(); | ||||
|     size_t cpos = 0; | ||||
|     while (cpos < N) { | ||||
|         int32_t codepoint; | ||||
|         size_t adj; | ||||
|         utf8_string_iterat(codepoint, adj, cpos, str.data(), N); | ||||
|         if (codepoint < 0) | ||||
|             return false; | ||||
|         cpos += adj; | ||||
|     } | ||||
|     return true; | ||||
| } | ||||
| 
 | ||||
| int codepoint_to_utf8(uint32_t cp, std::string &out) { | ||||
|     size_t N = out.size(); | ||||
|     auto make_compl = [cp](int imp) -> char { | ||||
|         return (char)(((cp >> imp) & 0x3f) | 0x80); | ||||
|     }; | ||||
|     if (cp > 0x10FFFF) | ||||
| namespace json { | ||||
|     int utf8_retrieve_size(uint8_t firstByte) { | ||||
|         if (!(firstByte & 0b10000000)) | ||||
|             return 1; | ||||
|         uint8_t a = 0b11000000; | ||||
|         uint8_t b = 0b00100000; | ||||
|         for (int i = 2; i <= 4; i++){ | ||||
|             if ((firstByte & (a | b)) == a) | ||||
|                 return i; | ||||
|             a |= b; | ||||
|             b >>= 1; | ||||
|         } | ||||
|         return -1; | ||||
|     if (cp <= 0x7F) { | ||||
|         out += (char)cp; | ||||
|     } else if (cp <= 0x7ff) { | ||||
|         out.resize(N + 2); | ||||
|         out[N] = (char)((cp >> 6) | 0xc0); | ||||
|         out[N + 1] = make_compl(0); | ||||
|     } else if (cp <= 0xffff) { | ||||
|         out.resize(N + 3); | ||||
|         out[N] = (char)((cp >> 12) | 0xe0); | ||||
|         out[N + 1] = make_compl(6); | ||||
|         out[N + 2] = make_compl(0); | ||||
|     } else { | ||||
|         out.resize(N + 4); | ||||
|         out[N] = (char)((cp >> 18) | 0xf0); | ||||
|         out[N + 1] = make_compl(12); | ||||
|         out[N + 2] = make_compl(6); | ||||
|         out[N + 3] = make_compl(0); | ||||
|     } | ||||
|     return 0; | ||||
| 
 | ||||
|     int32_t utf8_retrieve_character(int sz, size_t pos, const char *string) { | ||||
|         if (sz == 1) | ||||
|             return (int32_t)string[pos]; | ||||
|         uint32_t v = ((uint8_t)string[pos]) & (0b01111111 >> sz); | ||||
|         pos++; | ||||
|         for (int i = 1; i < sz; i++){ | ||||
|             uint32_t th = (uint8_t)string[pos]; | ||||
|             if ((th & 0b11000000) != 0b10000000) | ||||
|                 return -1; | ||||
|             v <<= 6; | ||||
|             v |= (th & 0b00111111); | ||||
|             pos++; | ||||
|         } | ||||
|         assert(v <= INT32_MAX); | ||||
|         return static_cast<int32_t>(v); | ||||
|     } | ||||
| 
 | ||||
|     void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) { | ||||
|         if (pos >= string_size) {cp = -1; return;} | ||||
|         adj = utf8_retrieve_size((uint8_t)string[pos]); | ||||
|         if (adj < 0 || pos + adj > string_size) {cp = -1; return;} | ||||
|         if ((cp = utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1; return;} | ||||
|     } | ||||
| 
 | ||||
|     bool isUtf8String(const std::string &str) { | ||||
|         size_t N = str.size(); | ||||
|         size_t cpos = 0; | ||||
|         while (cpos < N) { | ||||
|             int32_t codepoint; | ||||
|             size_t adj; | ||||
|             utf8_string_iterat(codepoint, adj, cpos, str.data(), N); | ||||
|             if (codepoint < 0) | ||||
|                 return false; | ||||
|             cpos += adj; | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     int codepoint_to_utf8(uint32_t cp, std::string &out) { | ||||
|         size_t N = out.size(); | ||||
|         auto make_compl = [cp](int imp) -> char { | ||||
|             return (char)(((cp >> imp) & 0x3f) | 0x80); | ||||
|         }; | ||||
|         if (cp > 0x10FFFF) | ||||
|             return -1; | ||||
|         if (cp <= 0x7F) { | ||||
|             out += (char)cp; | ||||
|         } else if (cp <= 0x7ff) { | ||||
|             out.resize(N + 2); | ||||
|             out[N] = (char)((cp >> 6) | 0xc0); | ||||
|             out[N + 1] = make_compl(0); | ||||
|         } else if (cp <= 0xffff) { | ||||
|             out.resize(N + 3); | ||||
|             out[N] = (char)((cp >> 12) | 0xe0); | ||||
|             out[N + 1] = make_compl(6); | ||||
|             out[N + 2] = make_compl(0); | ||||
|         } else { | ||||
|             out.resize(N + 4); | ||||
|             out[N] = (char)((cp >> 18) | 0xf0); | ||||
|             out[N + 1] = make_compl(12); | ||||
|             out[N + 2] = make_compl(6); | ||||
|             out[N + 3] = make_compl(0); | ||||
|         } | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
|  | ||||
| @ -4,15 +4,17 @@ | ||||
| #include <stdint.h> | ||||
| #include <string> | ||||
| 
 | ||||
| int _utf8_retrieve_size(uint8_t firstByte); | ||||
| namespace json { | ||||
|     int utf8_retrieve_size(uint8_t firstByte); | ||||
| 
 | ||||
| int32_t _utf8_retrieve_character(int sz, size_t pos, const char *string); | ||||
|     int32_t utf8_retrieve_character(int sz, size_t pos, const char *string); | ||||
| 
 | ||||
| void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size); | ||||
|     void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size); | ||||
| 
 | ||||
| bool isUtf8String(const std::string& str); | ||||
|     bool isUtf8String(const std::string& str); | ||||
| 
 | ||||
| /* Returns -1 if cp is not in 0-0x10FFFF range */ | ||||
| int codepoint_to_utf8(uint32_t cp, std::string& out); | ||||
|     /* Returns -1 if cp is not in 0-0x10FFFF range */ | ||||
|     int codepoint_to_utf8(uint32_t cp, std::string& out); | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user