1 module mutils.serializer.lexer_utils; 2 3 import std.stdio; 4 import mutils.container.ct_map; 5 import std.traits; 6 import std.algorithm:canFind; 7 import std.meta; 8 9 10 11 void updateLineAndCol(ref uint line,ref uint column, string oldSlice, string newSLice){ 12 foreach(char ch;oldSlice[0..oldSlice.length-newSLice.length]){ 13 if(ch=='\n'){ 14 line++; 15 column=0; 16 }else{ 17 column++; 18 } 19 } 20 } 21 22 void serializeWhiteTokens(bool load, Container)(ref TokenData token, ref Container con){ 23 static if(load==true){ 24 size_t whiteNum=0; 25 foreach(ch;con){ 26 if (ch==' ' || ch=='\t' || ch=='\n' || ch=='\r' ){ 27 whiteNum++; 28 }else{ 29 break; 30 } 31 } 32 if(whiteNum>0){ 33 token.str=con[0..whiteNum]; 34 con=con[whiteNum..$]; 35 token.type=StandardTokens.white; 36 return; 37 } 38 token.type=StandardTokens.notoken; 39 }else{ 40 if(token.type==StandardTokens.white){ 41 con~=cast(char[])token.str; 42 } 43 } 44 } 45 46 void serializeCommentMultiline(bool load, Container)(ref TokenData token, ref Container con){ 47 static if(load==true){ 48 assert(con[0..2]==['/','*']); 49 con=con[2..$]; 50 foreach(i, ch;con){ 51 if (ch=='*' && i!=con.length-1 && con[i+1]=='/'){ 52 token.str=con[0..i]; 53 con=con[i+2..$]; 54 token.type=StandardTokens.comment_multiline; 55 return; 56 } 57 } 58 token.str=con; 59 con=null; 60 token.type=StandardTokens.comment_multiline; 61 }else{ 62 if(token.type==StandardTokens.comment_multiline){ 63 con~=cast(char[])"/*"; 64 con~=cast(char[])token.str; 65 con~=cast(char[])"*/"; 66 } 67 } 68 } 69 70 unittest{ 71 string str="/* aaa bbb ccc */"; 72 TokenData tk; 73 serializeCommentMultiline!(true)(tk, str); 74 assert(tk.str==" aaa bbb ccc "); 75 } 76 77 void serializeCommentLine(bool load, Container)(ref TokenData token, ref Container con){ 78 static if(load==true){ 79 assert(con[0..2]==['/','/']); 80 con=con[2..$]; 81 foreach(i, ch;con){ 82 if (ch=='\n'){ 83 token.str=con[0..i-1]; 84 con=con[i..$]; 85 token.type=StandardTokens.comment_line; 86 return; 87 } 88 } 89 token.str=con; 90 con=null; 91 token.type=StandardTokens.comment_line; 92 }else{ 93 if(token.type==StandardTokens.comment_line){ 94 con~=cast(char[])"//"; 95 con~=cast(char[])token.str; 96 } 97 } 98 } 99 100 bool isIdentifierFirstChar(char ch){ 101 return (ch>='a' && ch<='z') || (ch>='A' && ch<='Z') || ch=='_'; 102 } 103 104 void serializeIdentifier(bool load, Container)(ref TokenData token, ref Container con){ 105 static if(load==true){ 106 size_t charactersNum=0; 107 char fch=con[0]; 108 if(isIdentifierFirstChar(con[0])){ 109 charactersNum++; 110 }else{ 111 token.type=StandardTokens.notoken; 112 return; 113 } 114 foreach(ch;con[1..$]){ 115 if ( (ch>='a' && ch<='z') || (ch>='A' && ch<='Z') || (ch>='0' && ch<='9') || ch=='_'){ 116 charactersNum++; 117 }else{ 118 break; 119 } 120 } 121 if(charactersNum>0){ 122 token.str=con[0..charactersNum]; 123 con=con[charactersNum..$]; 124 token.type=StandardTokens.identifier; 125 return; 126 127 } 128 token.type=StandardTokens.notoken; 129 }else{ 130 if(token.type==StandardTokens.white){ 131 con~=token.str; 132 } 133 } 134 } 135 136 void serializeStringToken(bool load, Container)(ref TokenData token, ref Container con){ 137 import std.string; 138 static if(load==true){ 139 char fch=con[0]; 140 if(fch=='"'){ 141 size_t end=con[1..$].indexOf('"'); 142 if(end==-1){ 143 end=con.length; 144 token.str=con[1..end]; 145 con=con[end..$]; 146 }else{ 147 end+=1; 148 token.str=con[1..end]; 149 con=con[end+1..$]; 150 } 151 152 } 153 token.type=StandardTokens.string_; 154 }else{ 155 if(token.type==StandardTokens.string_){ 156 con~=token.str; 157 } 158 } 159 } 160 161 import mutils.conv; 162 /// Returns string is valid only to next call to any mutils.conv function 163 string doubleToString(double num){ 164 return num.to!string; 165 } 166 167 /// Returns string is valid only to next call to any mutils.conv function 168 string longToString(long num){ 169 return num.to!string; 170 } 171 172 long stringToLong(string str){ 173 return str.to!long; 174 } 175 176 void serializeNumberToken(bool load, Container)(ref TokenData token, ref Container con){ 177 static if(load==true){ 178 bool minus=false; 179 string firstPart; 180 string secondPart; 181 if(con[0]=='-'){ 182 minus=true; 183 con=con[1..$]; 184 } 185 foreach(i,ch;con){ 186 if( ch>='0' && ch<='9'){ 187 firstPart=con[0..i+1]; 188 }else{ 189 break; 190 } 191 } 192 con=con[firstPart.length..$]; 193 if(con.length>0 && con[0]=='.'){ 194 con=con[1..$]; 195 foreach(i,ch;con){ 196 if(ch>='0' && ch<='9'){ 197 secondPart=con[0..i+1]; 198 }else{ 199 break; 200 } 201 } 202 con=con[secondPart.length..$]; 203 if(con[0]=='f'){ 204 con=con[1..$]; 205 } 206 double num=stringToLong(firstPart)+cast(double)stringToLong(secondPart)/(10^^secondPart.length); 207 token.double_=minus?-num:num; 208 token.type=StandardTokens.double_; 209 }else{ 210 long num=stringToLong(firstPart); 211 token.long_=minus?-num:num; 212 token.type=StandardTokens.long_; 213 } 214 }else{ 215 if(token.type==StandardTokens.double_){ 216 con~=cast(char[])doubleToString(token.double_); 217 }else if(token.type==StandardTokens.long_){ 218 con~=cast(char[])longToString(token.long_); 219 }else{ 220 assert(0); 221 } 222 } 223 } 224 225 226 alias whiteTokens=AliasSeq!('\n','\t','\r',' '); 227 228 229 230 import mutils.container.vector; 231 232 233 enum StandardTokens{ 234 notoken=0, 235 white=1, 236 character=2, 237 identifier=3, 238 string_=4, 239 double_=5, 240 long_=6, 241 comment_multiline=7, 242 comment_line=8, 243 } 244 245 struct TokenData{ 246 union{ 247 string str; 248 char ch; 249 long long_; 250 double double_; 251 } 252 uint line; 253 uint column; 254 uint type=StandardTokens.notoken; 255 256 char getChar(){ 257 assert(type==StandardTokens.character); 258 return ch; 259 } 260 261 string getUnescapedString(){ 262 assert(type==StandardTokens.string_); 263 return str; 264 } 265 266 string getEscapedString(){ 267 return str; 268 } 269 270 bool isChar(char ch){ 271 return type==StandardTokens.character && this.ch==ch; 272 } 273 274 bool isString(string ss){ 275 return ( 276 type==StandardTokens.comment_line || 277 type==StandardTokens.comment_multiline || 278 type==StandardTokens.identifier || 279 type==StandardTokens.string_ || 280 type==StandardTokens.white 281 ) && 282 str==ss; 283 } 284 285 bool isComment(){ 286 return type==StandardTokens.comment_line || type==StandardTokens.comment_multiline; 287 } 288 289 290 291 292 void opAssign(T)(T el) 293 if(isIntegral!T || isFloatingPoint!T || is(T==string) || is(Unqual!T==char) || is(T==bool) ) 294 { 295 alias TP=Unqual!T; 296 static if(isIntegral!TP || is(T==bool)){ 297 type=StandardTokens.long_; 298 this.long_=el; 299 }else static if(isFloatingPoint!TP){ 300 type=StandardTokens.double_; 301 this.double_=el; 302 }else static if( is(TP==string) ){ 303 type=StandardTokens.string_; 304 this.str=el; 305 }else static if( is(TP==char) ){ 306 type=StandardTokens.character; 307 this.ch=el; 308 }else { 309 static assert(0); 310 } 311 } 312 313 bool isType(T)() 314 if(isIntegral!T || isFloatingPoint!T || is(T==string) || is(T==char) || is(T==bool) ) 315 { 316 static if(isIntegral!T || is(T==bool) ){ 317 return type==StandardTokens.long_; 318 }else static if(isFloatingPoint!T){ 319 return type==StandardTokens.double_; 320 }else static if( is(T==string) ){ 321 return type==StandardTokens.string_; 322 }else static if( is(T==char) ){ 323 return type==StandardTokens.character; 324 }else{ 325 static assert(0); 326 } 327 } 328 329 auto get(T)() 330 if(isIntegral!T || isFloatingPoint!T || is(T==string) || is(T==char) || is(T==bool) ) 331 { 332 static if(isIntegral!T || is(T==bool)){ 333 assert(type==StandardTokens.long_); 334 return cast(T)long_; 335 }else static if(isFloatingPoint!T){ 336 assert(type==StandardTokens.double_); 337 return cast(T)double_; 338 }else static if( is(T==string) ){ 339 assert(type==StandardTokens.string_); 340 return cast(T)str; 341 }else static if( is(T==char) ){ 342 assert(type==StandardTokens.character); 343 return cast(T)ch; 344 }else { 345 static assert(0); 346 } 347 } 348 349 350 string toString(){ 351 import std.format; 352 switch(type){ 353 case StandardTokens.character: 354 return format("TK(%5s, '%s', %s, %s)",cast(StandardTokens)type,ch,line,column); 355 case StandardTokens.string_: 356 case StandardTokens.identifier: 357 case StandardTokens.white: 358 case StandardTokens.comment_line: 359 case StandardTokens.comment_multiline: 360 return format("TK(%5s, \"%s\", %s, %s)",cast(StandardTokens)type,str,line,column); 361 case StandardTokens.double_: 362 return format("TK(%5s, %s, %s, %s)",cast(StandardTokens)type,double_,line,column); 363 case StandardTokens.long_: 364 return format("TK(%5s, %s, %s, %s)",cast(StandardTokens)type,long_,line,column); 365 default: 366 return format("TK(%5s, ???, %s, %s)",cast(StandardTokens)type,line,column); 367 368 369 } 370 } 371 } 372 373 alias TokenDataVector=Vector!(TokenData); 374 375 376 void printAllTokens(Lexer)(ref Lexer lex){ 377 TokenData token; 378 while(token.type!=Token.notoken){ 379 token=lex.getNextToken(); 380 writeln(token); 381 } 382 } 383 384 385 386 TokenDataVector tokenizeAll(Lexer)(ref Lexer lex){ 387 TokenDataVector tokens; 388 do{ 389 tokens~=lex.getNextToken(); 390 }while(tokens[$-1].type!=StandardTokens.notoken); 391 392 return tokens; 393 } 394 395 396 397 string tokensToString(Lexer)(ref Lexer lex,TokenData[] tokens){ 398 string code; 399 foreach(tk;tokens) 400 lex.toChars(tk, code); 401 return code; 402 }