1 module mutils.serializer.lexer_utils; 2 3 import std.algorithm : canFind; 4 import std.meta; 5 import std.traits; 6 7 import mutils.container.vector; 8 import mutils.conv; 9 10 void updateLineAndCol(ref uint line, ref uint column, string oldSlice, string newSLice) { 11 foreach (char ch; oldSlice[0 .. oldSlice.length - newSLice.length]) { 12 if (ch == '\n') { 13 line++; 14 column = 0; 15 } else { 16 column++; 17 } 18 } 19 } 20 21 void serializeWhiteTokens(bool load, Container)(ref TokenData token, ref Container con) { 22 static if (load == true) { 23 size_t whiteNum = 0; 24 foreach (ch; con) { 25 if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') { 26 whiteNum++; 27 } else { 28 break; 29 } 30 } 31 if (whiteNum > 0) { 32 token.str = con[0 .. whiteNum]; 33 con = con[whiteNum .. $]; 34 token.type = StandardTokens.white; 35 return; 36 } 37 token.type = StandardTokens.notoken; 38 } else { 39 if (token.type == StandardTokens.white) { 40 con ~= cast(char[]) token.str; 41 } 42 } 43 } 44 45 void serializeCommentMultiline(bool load, Container)(ref TokenData token, ref Container con) { 46 static if (load == true) { 47 char[2] slst = ['/', '*']; 48 assert(con[0 .. 2] == slst[]); 49 con = con[2 .. $]; 50 foreach (i, ch; con) { 51 if (ch == '*' && i != con.length - 1 && con[i + 1] == '/') { 52 token.str = con[0 .. i]; 53 con = con[i + 2 .. $]; 54 token.type = StandardTokens.comment_multiline; 55 return; 56 } 57 } 58 token.str = con; 59 con = null; 60 token.type = StandardTokens.comment_multiline; 61 } else { 62 if (token.type == StandardTokens.comment_multiline) { 63 con ~= cast(char[]) "/*"; 64 con ~= cast(char[]) token.str; 65 con ~= cast(char[]) "*/"; 66 } 67 } 68 } 69 70 unittest { 71 string str = "/* aaa bbb ccc */"; 72 TokenData tk; 73 serializeCommentMultiline!(true)(tk, str); 74 assert(tk.str == " aaa bbb ccc "); 75 } 76 77 void serializeCommentLine(bool load, Container)(ref TokenData token, ref Container con) { 78 static if (load == true) { 79 char[2] slsl = ['/', '/']; 80 assert(con[0 .. 2] == slsl[]); 81 con = con[2 .. $]; 82 foreach (i, ch; con) { 83 if (ch == '\n') { 84 token.str = con[0 .. i - 1]; 85 con = con[i .. $]; 86 token.type = StandardTokens.comment_line; 87 return; 88 } 89 } 90 token.str = con; 91 con = null; 92 token.type = StandardTokens.comment_line; 93 } else { 94 if (token.type == StandardTokens.comment_line) { 95 con ~= cast(char[]) "//"; 96 con ~= cast(char[]) token.str; 97 } 98 } 99 } 100 101 bool isIdentifierFirstChar(char ch) { 102 return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_'; 103 } 104 105 void serializeIdentifier(bool load, Container)(ref TokenData token, ref Container con) { 106 static if (load == true) { 107 size_t charactersNum = 0; 108 char fch = con[0]; 109 if (isIdentifierFirstChar(con[0])) { 110 charactersNum++; 111 } else { 112 token.type = StandardTokens.notoken; 113 return; 114 } 115 foreach (ch; con[1 .. $]) { 116 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') 117 || (ch >= '0' && ch <= '9') || ch == '_') { 118 charactersNum++; 119 } else { 120 break; 121 } 122 } 123 if (charactersNum > 0) { 124 token.str = con[0 .. charactersNum]; 125 con = con[charactersNum .. $]; 126 token.type = StandardTokens.identifier; 127 return; 128 129 } 130 token.type = StandardTokens.notoken; 131 } else { 132 if (token.type == StandardTokens.white) { 133 con ~= token.str; 134 } 135 } 136 } 137 138 void serializeStringToken(bool load, Container)(ref TokenData token, ref Container con) { 139 static if (load == true) { 140 token.type = StandardTokens.string_; 141 assert(con[0]=='"'); 142 if(con.length<2){ 143 token.str = null; 144 con = null; 145 return; 146 } 147 if (con[0..2] == `""`) { 148 token.str = null; 149 con = con[2 .. $]; 150 return; 151 } 152 //char fch = con[0]; 153 con = con[1 .. $]; 154 size_t end = 0; 155 bool ignoreNext=false; 156 foreach (i; 1..con.length) { 157 if (ignoreNext) { 158 ignoreNext = false; 159 continue; 160 } 161 if (con[i] == '\\') { 162 ignoreNext=true; 163 continue; 164 } 165 if (con[i] == '"') { 166 end=i; 167 break; 168 } 169 } 170 token.str = con[0 .. end]; 171 con = con[end+1 .. $]; 172 } else { 173 if (token.type == StandardTokens.string_) { 174 con ~= token.str; 175 } 176 } 177 } 178 179 /// Returns string is valid only to next call to any mutils.conv function 180 string doubleToString(double num) { 181 return num.to!string; 182 } 183 184 /// Returns string is valid only to next call to any mutils.conv function 185 string longToString(long num) { 186 return num.to!string; 187 } 188 189 long stringToLong(string str) { 190 191 enum long[19] table = [ 192 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000, 100000000000, 193 1000000000000, 10000000000000, 100000000000000, 1000000000000000, 194 10000000000000000, 100000000000000000, 1000000000000000000, 195 ]; 196 assert(str.length < 20); 197 if (str.length == 0) { 198 return 0; 199 } 200 bool minus; 201 if (str.ptr[0] == '-') { 202 minus = true; 203 str = str[1 .. $]; 204 } 205 long num; 206 int i; 207 foreach_reverse (c; str) { 208 num += (c - '0') * table[i]; 209 i++; 210 } 211 212 if (minus) { 213 num *= -1; 214 } 215 return num; 216 } 217 218 void serializeNumberToken(bool load, Container)(ref TokenData token, ref Container con) { 219 static if (load == true) { 220 bool minus = false; 221 string firstPart; 222 string secondPart; 223 if (con[0] == '-') { 224 minus = true; 225 con = con[1 .. $]; 226 } 227 foreach (i, ch; con) { 228 if (ch >= '0' && ch <= '9') { 229 firstPart = con[0 .. i + 1]; 230 } else { 231 break; 232 } 233 } 234 con = con[firstPart.length .. $]; 235 if (con.length > 0 && con[0] == '.') { 236 con = con[1 .. $]; 237 foreach (i, ch; con) { 238 if (ch >= '0' && ch <= '9') { 239 secondPart = con[0 .. i + 1]; 240 } else { 241 break; 242 } 243 } 244 con = con[secondPart.length .. $]; 245 if (con[0] == 'f') { 246 con = con[1 .. $]; 247 } 248 double num = stringToLong(firstPart) + cast(double) stringToLong( 249 secondPart) / (10 ^^ secondPart.length); 250 token.double_ = minus ? -num : num; 251 token.type = StandardTokens.double_; 252 } else { 253 long num = stringToLong(firstPart); 254 token.long_ = minus ? -num : num; 255 token.type = StandardTokens.long_; 256 } 257 } else { 258 if (token.type == StandardTokens.double_) { 259 con ~= cast(char[]) doubleToString(token.double_); 260 } else if (token.type == StandardTokens.long_) { 261 con ~= cast(char[]) longToString(token.long_); 262 } else { 263 assert(0); 264 } 265 } 266 } 267 268 alias whiteTokens = AliasSeq!('\n', '\t', '\r', ' '); 269 270 enum StandardTokens { 271 notoken = 0, 272 white = 1, 273 character = 2, 274 identifier = 3, 275 string_ = 4, 276 double_ = 5, 277 long_ = 6, 278 comment_multiline = 7, 279 comment_line = 8, 280 } 281 282 struct TokenData { 283 union { 284 string str; 285 char ch; 286 long long_; 287 double double_; 288 } 289 290 uint line; 291 uint column; 292 uint type = StandardTokens.notoken; 293 294 char getChar() { 295 assert(type == StandardTokens.character); 296 return ch; 297 } 298 299 string getUnescapedString() { 300 assert(type == StandardTokens.string_); 301 302 bool hasEscapeChar = false; 303 foreach (ch; str) { 304 if (ch == '\\') { 305 hasEscapeChar = true; 306 break; 307 } 308 } 309 310 if (!hasEscapeChar) { 311 return str; 312 } 313 string copy; 314 copy.reserve(str.length); 315 bool ignoreNext = false; 316 foreach (i, ch; str) { 317 if (ignoreNext) { 318 ignoreNext = false; 319 continue; 320 } 321 if (ch == '\\' && i != str.length - 1) { 322 char nextCh = str[i + 1]; 323 ignoreNext = true; 324 if (nextCh == 'n') { 325 copy ~= '\n'; 326 } else if (nextCh == 't') { 327 copy ~= '\t'; 328 } else if (nextCh == '\\') { 329 copy ~= '\\'; 330 } else if (nextCh == '/') { 331 copy ~= '/'; 332 }else if (nextCh == '"') { 333 copy ~= '"'; 334 } 335 } 336 if (ch == '\\') { 337 continue; 338 } 339 copy ~= ch; 340 } 341 return copy; 342 343 } 344 345 string getEscapedString() { 346 return str; 347 } 348 349 bool isChar(char ch) { 350 return type == StandardTokens.character && this.ch == ch; 351 } 352 353 bool isString(string ss) { 354 return (type == StandardTokens.comment_line || type == StandardTokens.comment_multiline 355 || type == StandardTokens.identifier 356 || type == StandardTokens.string_ || type == StandardTokens.white) && str == ss; 357 } 358 359 bool isComment() { 360 return type == StandardTokens.comment_line || type == StandardTokens.comment_multiline; 361 } 362 363 void opAssign(T)(T el) 364 if (isIntegral!T || isFloatingPoint!T || is(T == string) 365 || is(Unqual!T == char) || is(T == bool)) { 366 alias TP = Unqual!T; 367 static if (isIntegral!TP || is(T == bool)) { 368 type = StandardTokens.long_; 369 this.long_ = el; 370 } else static if (isFloatingPoint!TP) { 371 type = StandardTokens.double_; 372 this.double_ = el; 373 } else static if (is(TP == string)) { 374 type = StandardTokens.string_; 375 this.str = el; 376 } else static if (is(TP == char)) { 377 type = StandardTokens.character; 378 this.ch = el; 379 } else { 380 static assert(0); 381 } 382 } 383 384 bool isAssignableTo(T)() 385 if (isIntegral!T || isFloatingPoint!T || is(T == string) || is(T == char) 386 || is(T == bool)) { 387 static if (isIntegral!T || is(T == bool)) { 388 return type == StandardTokens.long_; 389 } else static if (isFloatingPoint!T) { 390 return type == StandardTokens.double_ || type == StandardTokens.long_; 391 } else static if (is(T == string)) { 392 return type == StandardTokens.string_; 393 } else static if (is(T == char)) { 394 return type == StandardTokens.character; 395 } else { 396 static assert(0); 397 } 398 } 399 400 bool isType(T)() 401 if (isIntegral!T || isFloatingPoint!T || is(T == string) || is(T == char) 402 || is(T == bool)) { 403 static if (isIntegral!T || is(T == bool)) { 404 return type == StandardTokens.long_; 405 } else static if (isFloatingPoint!T) { 406 return type == StandardTokens.double_; 407 } else static if (is(T == string)) { 408 return type == StandardTokens.string_; 409 } else static if (is(T == char)) { 410 return type == StandardTokens.character; 411 } else { 412 static assert(0); 413 } 414 } 415 416 auto get(T)() 417 if (isIntegral!T || isFloatingPoint!T || is(T == string) || is(T == char) 418 || is(T == bool)) { 419 static if (isIntegral!T || is(T == bool)) { 420 assert(type == StandardTokens.long_); 421 return cast(T) long_; 422 } else static if (isFloatingPoint!T) { 423 assert(type == StandardTokens.double_ || type == StandardTokens.long_); 424 if (type == StandardTokens.double_) { 425 return cast(T) double_; 426 } else if (type == StandardTokens.long_) { 427 return cast(T) long_; 428 } 429 return T.init; // For release build stability 430 } else static if (is(T == string)) { 431 assert(type == StandardTokens.string_); 432 return cast(T) str; 433 } else static if (is(T == char)) { 434 assert(type == StandardTokens.character); 435 return cast(T) ch; 436 } else { 437 static assert(0); 438 } 439 } 440 441 string toString() { 442 import std.format; 443 444 switch (type) { 445 case StandardTokens.character: 446 return format("TK(%5s, '%s', %s, %s)", 447 cast(StandardTokens) type, ch, line, column); 448 case StandardTokens.string_: 449 case StandardTokens.identifier: 450 case StandardTokens.white: 451 case StandardTokens.comment_line: 452 case StandardTokens.comment_multiline: 453 return format("TK(%5s, \"%s\", %s, %s)", 454 cast(StandardTokens) type, str, line, column); 455 case StandardTokens.double_: 456 return format("TK(%5s, %s, %s, %s)", 457 cast(StandardTokens) type, double_, line, column); 458 case StandardTokens.long_: 459 return format("TK(%5s, %s, %s, %s)", 460 cast(StandardTokens) type, long_, line, column); 461 default: 462 return format("TK(%5s, ???, %s, %s)", cast(StandardTokens) type, line, column); 463 464 } 465 } 466 } 467 468 alias TokenDataVector = Vector!(TokenData); 469 470 void printAllTokens(Lexer)(ref Lexer lex) { 471 TokenData token; 472 while (token.type != Token.notoken) { 473 token = lex.getNextToken(); 474 writeln(token); 475 } 476 } 477 478 TokenDataVector tokenizeAll(Lexer)(ref Lexer lex) { 479 TokenDataVector tokens; 480 do { 481 tokens ~= lex.getNextToken(); 482 } 483 while (tokens[$ - 1].type != StandardTokens.notoken); 484 485 return tokens; 486 } 487 488 Vector!char tokensToString(Lexer)(ref Lexer lex, TokenData[] tokens) { 489 Vector!char code; 490 foreach (tk; tokens) 491 lex.toChars(tk, code); 492 return code; 493 }