1 module mutils.serializer.lexer_utils;
2 
3 import std.stdio;
4 import mutils.container.ct_map;
5 import std.traits;
6 import std.algorithm:canFind;
7 import std.meta;
8 
9 
10 
11 void updateLineAndCol(ref uint line,ref uint column, string oldSlice, string newSLice){
12 	foreach(char ch;oldSlice[0..oldSlice.length-newSLice.length]){
13 		if(ch=='\n'){
14 			line++;
15 			column=0;
16 		}else{
17 			column++;
18 		}
19 	}
20 }
21 
22 void serializeWhiteTokens(bool load, Container)(ref TokenData token, ref Container con){
23 	static if(load==true){
24 		size_t whiteNum=0;
25 		foreach(ch;con){
26 			if (ch==' ' || ch=='\t' || ch=='\n'  || ch=='\r' ){
27 				whiteNum++;
28 			}else{
29 				break;
30 			}
31 		}
32 		if(whiteNum>0){
33 			token.str=con[0..whiteNum];
34 			con=con[whiteNum..$];
35 			token.type=StandardTokens.white;
36 			return;			
37 		}
38 		token.type=StandardTokens.notoken;
39 	}else{
40 		if(token.type==StandardTokens.white){
41 			con~=cast(char[])token.str;
42 		}			
43 	}
44 }
45 
46 void serializeCommentMultiline(bool load, Container)(ref TokenData token, ref Container con){
47 	static if(load==true){
48 		assert(con[0..2]==['/','*']);
49 		con=con[2..$];
50 		foreach(i, ch;con){
51 			if (ch=='*' && i!=con.length-1 && con[i+1]=='/'){
52 				token.str=con[0..i];
53 				con=con[i+2..$];
54 				token.type=StandardTokens.comment_multiline;
55 				return;
56 			}
57 		}
58 		token.str=con;
59 		con=null;
60 		token.type=StandardTokens.comment_multiline;
61 	}else{
62 		if(token.type==StandardTokens.comment_multiline){
63 			con~=cast(char[])"/*";
64 			con~=cast(char[])token.str;
65 			con~=cast(char[])"*/";
66 		}			
67 	}
68 }
69 
70 unittest{
71 	string str="/*  aaa bbb ccc */";
72 	TokenData tk;
73 	serializeCommentMultiline!(true)(tk, str);
74 	assert(tk.str=="  aaa bbb ccc ");
75 }
76 
77 void serializeCommentLine(bool load, Container)(ref TokenData token, ref Container con){
78 	static if(load==true){
79 		assert(con[0..2]==['/','/']);
80 		con=con[2..$];
81 		foreach(i, ch;con){
82 			if (ch=='\n'){
83 				token.str=con[0..i-1];
84 				con=con[i..$];
85 				token.type=StandardTokens.comment_line;
86 				return;
87 			}
88 		}
89 		token.str=con;
90 		con=null;
91 		token.type=StandardTokens.comment_line;
92 	}else{
93 		if(token.type==StandardTokens.comment_line){
94 			con~=cast(char[])"//";
95 			con~=cast(char[])token.str;
96 		}			
97 	}
98 }
99 
100 bool isIdentifierFirstChar(char ch){
101 	return (ch>='a' && ch<='z') || (ch>='A' && ch<='Z') || ch=='_';
102 }
103 
104 void serializeIdentifier(bool load, Container)(ref TokenData token, ref Container con){
105 	static if(load==true){
106 		size_t charactersNum=0;
107 		char fch=con[0];
108 		if(isIdentifierFirstChar(con[0])){
109 			charactersNum++;
110 		}else{
111 			token.type=StandardTokens.notoken;
112 			return;
113 		}
114 		foreach(ch;con[1..$]){
115 			if ( (ch>='a' && ch<='z') || (ch>='A' && ch<='Z') || (ch>='0' && ch<='9') || ch=='_'){
116 				charactersNum++;
117 			}else{
118 				break;
119 			}
120 		}
121 		if(charactersNum>0){
122 			token.str=con[0..charactersNum];
123 			con=con[charactersNum..$];
124 			token.type=StandardTokens.identifier;
125 			return;
126 			
127 		}
128 		token.type=StandardTokens.notoken;
129 	}else{
130 		if(token.type==StandardTokens.white){
131 			con~=token.str;
132 		}			
133 	}
134 }
135 
136 void serializeStringToken(bool load, Container)(ref TokenData token, ref Container con){
137 	import std.string;
138 	static if(load==true){
139 		char fch=con[0];
140 		if(fch=='"'){
141 			size_t end=con[1..$].indexOf('"');
142 			if(end==-1){
143 				end=con.length;
144 				token.str=con[1..end];
145 				con=con[end..$];
146 			}else{
147 				end+=1;
148 				token.str=con[1..end];
149 				con=con[end+1..$];
150 			}
151 
152 		}
153 		token.type=StandardTokens.string_;
154 	}else{
155 		if(token.type==StandardTokens.string_){
156 			con~=token.str;
157 		}			
158 	}
159 }
160 
161 import mutils.conv;
162 /// Returns string is valid only to next call to any mutils.conv function
163 string doubleToString(double num){
164 	return num.to!string;
165 }
166 
167 /// Returns string is valid only to next call to any mutils.conv function
168 string longToString(long num){
169 	return num.to!string;
170 }
171 
172 long stringToLong(string str){
173 	return str.to!long;
174 }
175 
176 void serializeNumberToken(bool load, Container)(ref TokenData token, ref Container con){
177 	static if(load==true){
178 		bool minus=false;
179 		string firstPart;
180 		string secondPart;
181 		if(con[0]=='-'){
182 			minus=true;
183 			con=con[1..$];
184 		}
185 		foreach(i,ch;con){
186 			if( ch>='0' && ch<='9'){
187 				firstPart=con[0..i+1];
188 			}else{
189 				break;
190 			}
191 		}
192 		con=con[firstPart.length..$];
193 		if(con.length>0 && con[0]=='.'){
194 			con=con[1..$];
195 			foreach(i,ch;con){
196 				if(ch>='0' && ch<='9'){
197 					secondPart=con[0..i+1];
198 				}else{
199 					break;
200 				}
201 			}
202 			con=con[secondPart.length..$];
203 			if(con[0]=='f'){
204 				con=con[1..$];
205 			}
206 			double num=stringToLong(firstPart)+cast(double)stringToLong(secondPart)/(10^^secondPart.length);
207 			token.double_=minus?-num:num;
208 			token.type=StandardTokens.double_;
209 		}else{
210 			long num=stringToLong(firstPart);
211 			token.long_=minus?-num:num;
212 			token.type=StandardTokens.long_;
213 		}
214 	}else{
215 		if(token.type==StandardTokens.double_){
216 			con~=cast(char[])doubleToString(token.double_);
217 		}else if(token.type==StandardTokens.long_){
218 			con~=cast(char[])longToString(token.long_);
219 		}else{
220 			assert(0);
221 		}
222 	}
223 }
224 
225 
226 alias whiteTokens=AliasSeq!('\n','\t','\r',' ');
227 
228 
229 
230 import mutils.container.vector;
231 
232 
233 enum StandardTokens{
234 	notoken=0,
235 	white=1,
236 	character=2,
237 	identifier=3,
238 	string_=4,
239 	double_=5,
240 	long_=6,
241 	comment_multiline=7,
242 	comment_line=8,
243 }
244 
245 struct TokenData{
246 	union{
247 		string str;
248 		char ch;
249 		long long_;
250 		double double_;
251 	}
252 	uint line;
253 	uint column;
254 	uint type=StandardTokens.notoken;
255 
256 	char getChar(){
257 		assert(type==StandardTokens.character);
258 		return ch;
259 	}
260 
261 	string getUnescapedString(){
262 		assert(type==StandardTokens.string_);
263 		return str;
264 	}
265 
266 	string getEscapedString(){
267 		return str;
268 	}
269 
270 	bool isChar(char ch){
271 		return type==StandardTokens.character && this.ch==ch;
272 	}
273 	
274 	bool isString(string ss){
275 		return (
276 			type==StandardTokens.comment_line ||
277 			type==StandardTokens.comment_multiline ||
278 			type==StandardTokens.identifier ||
279 			type==StandardTokens.string_ ||
280 			type==StandardTokens.white 
281 			) && 
282 			str==ss;
283 	}
284 
285 	bool isComment(){
286 		return type==StandardTokens.comment_line || type==StandardTokens.comment_multiline;
287 	}
288 
289 
290 
291 
292 	void opAssign(T)(T el)
293 		if(isIntegral!T || isFloatingPoint!T || is(T==string) || is(Unqual!T==char)  || is(T==bool) )
294 	{
295 		alias TP=Unqual!T;
296 		static if(isIntegral!TP || is(T==bool)){
297 			type=StandardTokens.long_;
298 			this.long_=el;
299 		}else static if(isFloatingPoint!TP){
300 			type=StandardTokens.double_;
301 			this.double_=el;
302 		}else static if( is(TP==string) ){
303 			type=StandardTokens.string_;
304 			this.str=el;
305 		}else static if( is(TP==char) ){
306 			type=StandardTokens.character;
307 			this.ch=el;
308 		}else {
309 			static assert(0);
310 		}
311 	}
312 
313 	bool isType(T)()
314 		if(isIntegral!T || isFloatingPoint!T || is(T==string) || is(T==char) || is(T==bool) )
315 	{
316 		static if(isIntegral!T || is(T==bool) ){
317 			return type==StandardTokens.long_;
318 		}else static if(isFloatingPoint!T){
319 			return type==StandardTokens.double_;
320 		}else static if( is(T==string) ){
321 			return type==StandardTokens.string_;
322 		}else static if( is(T==char) ){
323 			return type==StandardTokens.character;
324 		}else{
325 			static assert(0);
326 		}
327 	}
328 
329 	auto get(T)()
330 		if(isIntegral!T || isFloatingPoint!T || is(T==string) || is(T==char) || is(T==bool) )
331 	{
332 		static if(isIntegral!T || is(T==bool)){
333 			assert(type==StandardTokens.long_);
334 			return cast(T)long_;
335 		}else static if(isFloatingPoint!T){
336 			assert(type==StandardTokens.double_);
337 			return cast(T)double_;
338 		}else static if( is(T==string) ){
339 			assert(type==StandardTokens.string_);
340 			return cast(T)str;
341 		}else static if( is(T==char) ){
342 			assert(type==StandardTokens.character);
343 			return cast(T)ch;
344 		}else {
345 			static assert(0);
346 		}
347 	}
348 
349 
350 	string toString(){
351 		import std.format;
352 		switch(type){
353 			case StandardTokens.character:
354 				return format("TK(%5s, '%s', %s, %s)",cast(StandardTokens)type,ch,line,column);
355 			case StandardTokens.string_:
356 			case StandardTokens.identifier:
357 			case StandardTokens.white:
358 			case StandardTokens.comment_line:
359 			case StandardTokens.comment_multiline:
360 				return format("TK(%5s, \"%s\", %s, %s)",cast(StandardTokens)type,str,line,column);
361 			case StandardTokens.double_:
362 				return format("TK(%5s, %s, %s, %s)",cast(StandardTokens)type,double_,line,column);
363 			case StandardTokens.long_:
364 				return format("TK(%5s, %s, %s, %s)",cast(StandardTokens)type,long_,line,column);
365 			default:
366 				return format("TK(%5s, ???, %s, %s)",cast(StandardTokens)type,line,column);
367 
368 
369 		}
370 	}
371 }
372 
373 alias TokenDataVector=Vector!(TokenData);
374 
375 
376 void printAllTokens(Lexer)(ref Lexer lex){
377 	TokenData token;
378 	while(token.type!=Token.notoken){
379 		token=lex.getNextToken();
380 		writeln(token);
381 	}
382 }
383 
384 
385 
386 TokenDataVector tokenizeAll(Lexer)(ref Lexer lex){
387 	TokenDataVector tokens;
388 	do{
389 		tokens~=lex.getNextToken();
390 	}while(tokens[$-1].type!=StandardTokens.notoken);
391 	
392 	return tokens;
393 }
394 
395 
396 
397 string tokensToString(Lexer)(ref Lexer lex,TokenData[] tokens){
398 	string code;
399 	foreach(tk;tokens)
400 		lex.toChars(tk, code);
401 	return code;
402 }