1use crate::globals::{DC_VIEW_SUFFIXES, HISTORICAL_DC_KEYWORDS};
24use plex::lexer;
25
26#[rustfmt::skip]
27#[derive(Debug, Clone, PartialEq)]
28pub enum DCToken {
29 Whitespace, Comment, Newline, BooleanLiteral(bool), DecimalLiteral(i64), OctalLiteral(String), HexLiteral(String), BinaryLiteral(String), FloatLiteral(f64), CharacterLiteral(char),
54 StringLiteral(String),
55 EscapeCharacter(String), CharT, Int8T, Int16T, Int32T, Int64T, UInt8T, BoolT, UInt16T, UInt32T, UInt64T, Float32T, Float64T, Int8ArrayT, Int16ArrayT, Int32ArrayT, UInt8ArrayT, UInt16ArrayT, UInt32ArrayT, UInt32UInt8ArrayT, StringT, BlobT, Blob32T, DClass, Struct, Keyword, Typedef, From, Import, Switch, Case, Default, Break, Identifier(String), DCKeyword(String), ViewSuffix(String), Percent, Star, Plus, Hyphen, ForwardSlash, Period, OpenParenthesis, CloseParenthesis, OpenBraces, CloseBraces, OpenBrackets, CloseBrackets, Comma, Semicolon, Equals, Colon, }
120
121lexer! {
122 fn next_token(text: 'a) -> (DCToken, &'a str);
123
124 r#"[ \t\r]+"# => (DCToken::Whitespace, text),
125 r#"//[^\n]*"# => (DCToken::Comment, text),
127 r#"/[*](~(.*[*]/.*))[*]/"# => (DCToken::Comment, text),
129 r#"\n"# => (DCToken::Newline, text),
130
131 r#"true"# => (DCToken::BooleanLiteral(true), text),
132 r#"false"# => (DCToken::BooleanLiteral(false), text),
133
134 r#"0|([1-9][0-9]*)"# => (DCToken::DecimalLiteral(match text.parse::<i64>() {
135 Ok(n) => { n },
136 Err(err) => {
137 panic!("dclexer: Found DecimalLiteral token, but failed to parse as i64.\n\n{}", err);
138 },
139 }), text),
140
141 r#"0[0-7]+"# => (DCToken::OctalLiteral(text.to_owned()), text),
142 r#"0[xX][0-9a-fA-F]+"# => (DCToken::HexLiteral(text.to_owned()), text),
143 r#"0[bB][0-1]+"# => (DCToken::BinaryLiteral(text.to_owned()), text),
144
145 r#"([0-9]?)+\.[0-9]+"# => (DCToken::FloatLiteral(match text.parse::<f64>() {
146 Ok(f) => { f },
147 Err(err) => {
148 panic!("dclexer: Found FloatLiteral token, but failed to parse as f64.\n\n{}", err);
149 }
150 }), text),
151
152 r#"'.'"# => (DCToken::CharacterLiteral(text.chars().nth(1).unwrap()), text),
155 r#""[^"]*""# => (DCToken::StringLiteral(text.to_owned().replace('\"', "")), text),
157
158 r#"char"# => (DCToken::CharT, text),
161 r#"int8"# => (DCToken::Int8T, text),
162 r#"int16"# => (DCToken::Int16T, text),
163 r#"int32"# => (DCToken::Int32T, text),
164 r#"int64"# => (DCToken::Int64T, text),
165 r#"uint8"# => (DCToken::UInt8T, text),
166 r#"bool"# => (DCToken::BoolT, text),
167 r#"uint16"# => (DCToken::UInt16T, text),
168 r#"uint32"# => (DCToken::UInt32T, text),
169 r#"uint64"# => (DCToken::UInt64T, text),
170 r#"float32"# => (DCToken::Float32T, text),
171 r#"float64"# => (DCToken::Float64T, text),
172 r#"int8array"# => (DCToken::Int8ArrayT, text),
173 r#"int16array"# => (DCToken::Int16ArrayT, text),
174 r#"int32array"# => (DCToken::Int32ArrayT, text),
175 r#"uint8array"# => (DCToken::UInt8ArrayT, text),
176 r#"uint16array"# => (DCToken::UInt16ArrayT, text),
177 r#"uint32array"# => (DCToken::UInt32ArrayT, text),
178 r#"uint32uint8array"# => (DCToken::UInt32UInt8ArrayT, text),
179 r#"string"# => (DCToken::StringT, text),
180 r#"blob"# => (DCToken::BlobT, text),
181 r#"blob32"# => (DCToken::Blob32T, text),
182
183 r#"dclass"# => (DCToken::DClass, text),
184 r#"struct"# => (DCToken::Struct, text),
185 r#"keyword"# => (DCToken::Keyword, text),
186 r#"from"# => (DCToken::From, text),
187 r#"import"# => (DCToken::Import, text),
188 r#"typedef"# => (DCToken::Typedef, text),
189 r#"switch"# => (DCToken::Switch, text),
190 r#"case"# => (DCToken::Case, text),
191 r#"default"# => (DCToken::Default, text),
192 r#"break"# => (DCToken::Break, text),
193
194 r#"[a-zA-Z_][a-zA-Z0-9_]*"# => {
195 if HISTORICAL_DC_KEYWORDS.contains(&text) {
197 (DCToken::DCKeyword(text.to_owned()), text)
198 } else if DC_VIEW_SUFFIXES.contains(&text) {
199 (DCToken::ViewSuffix(text.to_owned()), text)
200 } else {
201 (DCToken::Identifier(text.to_owned()), text)
202 }
203 },
204 r#"\\(x[0-9a-fA-F]+|.)"# => (DCToken::EscapeCharacter(text.to_owned()), text),
205
206 r#"%"# => (DCToken::Percent, text),
207 r#"\*"# => (DCToken::Star, text),
208 r#"\+"# => (DCToken::Plus, text),
209 r#"-"# => (DCToken::Hyphen, text),
210 r#"/"# => (DCToken::ForwardSlash, text),
211 r#"\."# => (DCToken::Period, text),
212
213 r#"\("# => (DCToken::OpenParenthesis, text),
214 r#"\)"# => (DCToken::CloseParenthesis, text),
215 r#"\{"# => (DCToken::OpenBraces, text),
216 r#"\}"# => (DCToken::CloseBraces, text),
217 r#"\["# => (DCToken::OpenBrackets, text),
218 r#"\]"# => (DCToken::CloseBrackets, text),
219 r#"\,"# => (DCToken::Comma, text),
220 r#"\;"# => (DCToken::Semicolon, text),
221 r#"\="# => (DCToken::Equals, text),
222 r#"\:"# => (DCToken::Colon, text),
223 r#"."# => {
224 panic!("dclexer: Found an unexpected character: '{}'", text);
225 }
226}
227
228pub struct Lexer<'a> {
229 original: &'a str,
230 remaining: &'a str,
231 line: usize,
232}
233
234impl<'a> Lexer<'a> {
235 pub fn new(s: &'a str) -> Lexer<'a> {
236 Lexer {
237 original: s,
238 remaining: s,
239 line: 1,
240 }
241 }
242}
243
244#[derive(Debug, PartialEq, Clone, Copy)]
245pub struct Span {
246 pub min: usize,
247 pub max: usize,
248 pub line: usize,
249}
250
251impl std::fmt::Display for Span {
252 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
253 writeln!(f, "--- SPAN ---")?;
254 write!(f, "line: ")?;
255 self.line.fmt(f)?;
256 write!(f, ", min: ")?;
257 self.min.fmt(f)?;
258 write!(f, ", max: ")?;
259 self.max.fmt(f)
260 }
261}
262
263fn span_in(s: &str, t: &str, l: usize) -> Span {
264 let min = s.as_ptr() as usize - t.as_ptr() as usize;
265 Span {
266 min,
267 max: min + s.len(),
268 line: l,
269 }
270}
271
272impl Iterator for Lexer<'_> {
273 type Item = (DCToken, Span);
274 fn next(&mut self) -> Option<(DCToken, Span)> {
275 loop {
276 let tok: (DCToken, &str) = if let Some((tok, new_remaining)) = next_token(self.remaining) {
277 self.remaining = new_remaining;
278 tok
279 } else {
280 return None;
281 };
282 match tok {
283 (DCToken::Whitespace, _) | (DCToken::Comment, _) => {
284 continue;
286 }
287 (DCToken::Newline, _) => {
288 self.line += 1;
289 continue;
290 }
291 (tok, span) => {
292 return Some((tok, span_in(span, self.original, self.line)));
293 }
294 }
295 }
296 }
297}
298
299#[cfg(test)]
300mod tests {
301 use super::{DCToken, Lexer};
302
303 fn lexer_test_for_target(test_string: &str, target_tokens: Vec<DCToken>) {
306 let lexer = Lexer::new(&test_string).inspect(|tok| eprintln!("token: {:?}", tok));
307 let mut token_quota_reached: bool = false;
308
309 for (i, (token, _span)) in lexer.enumerate() {
310 if i >= (target_tokens.len() - 1) {
311 token_quota_reached = true;
312 }
313 if i > (target_tokens.len() - 1) {
314 panic!("Lexer returned more tokens than expected!");
315 }
316 assert_eq!(token, *target_tokens.get(i).unwrap());
317 }
318 if !token_quota_reached {
319 panic!("Did not receive all the expected tokens!");
320 }
321 }
322
323 #[test]
324 fn ignored_tokens_test() {
325 let test_string: String = String::from(
327 "// Single line comment\n\
328 /* multiline comment*/\n\
329 \n \n",
330 );
331 let lexer = Lexer::new(&test_string).inspect(|tok| eprintln!("token: {:?}", tok));
332
333 for (_token, _span) in lexer {
334 panic!("No tokens should have been returned by the lexer!");
335 }
336 }
337
338 #[test]
339 fn keyword_definition_test() {
340 let target: Vec<DCToken> = vec![
341 DCToken::Keyword,
342 DCToken::Identifier(String::from("test")),
343 DCToken::Semicolon,
344 DCToken::DClass,
346 DCToken::Struct,
347 ];
348 lexer_test_for_target("keyword test;\n dclass struct", target);
349 }
350
351 #[test]
352 fn switch_tokens_test() {
353 let target: Vec<DCToken> = vec![DCToken::Switch, DCToken::Case, DCToken::Default, DCToken::Break];
354 lexer_test_for_target("switch case default break", target);
355 }
356
357 #[test]
358 fn dclass_import_statement() {
359 let target: Vec<DCToken> = vec![
363 DCToken::From,
364 DCToken::Identifier(String::from("views")),
365 DCToken::Period,
366 DCToken::Identifier(String::from("Donut")),
367 DCToken::Import,
368 DCToken::Identifier(String::from("DistributedDonut")),
369 DCToken::ForwardSlash,
370 DCToken::ViewSuffix(String::from("AI")),
371 DCToken::ForwardSlash,
372 DCToken::ViewSuffix(String::from("OV")),
373 ];
374 lexer_test_for_target("from views.Donut import DistributedDonut/AI/OV", target);
375 }
376
377 #[test]
378 fn number_literals() {
379 let target: Vec<DCToken> = vec![
380 DCToken::DecimalLiteral(1),
382 DCToken::DecimalLiteral(9),
383 DCToken::DecimalLiteral(10),
384 DCToken::DecimalLiteral(2010),
385 DCToken::OctalLiteral(String::from("01")),
387 DCToken::OctalLiteral(String::from("07")),
388 DCToken::OctalLiteral(String::from("07472")),
389 DCToken::HexLiteral(String::from("0xa")),
391 DCToken::HexLiteral(String::from("0xA")),
392 DCToken::HexLiteral(String::from("0Xa")),
393 DCToken::HexLiteral(String::from("0XA")),
394 DCToken::HexLiteral(String::from("0x123456789abcdef")),
395 DCToken::BinaryLiteral(String::from("0b1")),
397 DCToken::BinaryLiteral(String::from("0B1")),
398 DCToken::BinaryLiteral(String::from("0b0")),
399 DCToken::BinaryLiteral(String::from("0b010")),
400 DCToken::BinaryLiteral(String::from("0b101110")),
401 DCToken::FloatLiteral(0.0),
403 DCToken::FloatLiteral(9.0),
404 DCToken::FloatLiteral(0.0),
405 DCToken::FloatLiteral(0.9),
406 DCToken::FloatLiteral(1.23456789),
407 DCToken::BooleanLiteral(true),
409 DCToken::BooleanLiteral(false),
410 ];
411 lexer_test_for_target(
412 "
413 1 9 10 2010
414 01 07 07472
415 0xa 0xA 0Xa 0XA 0x123456789abcdef
416 0b1 0B1 0b0 0b010 0b101110
417 0.0 9.0 .0 .9 1.23456789
418 true false
419 ",
420 target,
421 );
422 }
423
424 #[test]
425 fn text_literals() {
426 let target: Vec<DCToken> = vec![
427 DCToken::CharacterLiteral('a'),
429 DCToken::CharacterLiteral('1'),
430 DCToken::CharacterLiteral('*'),
431 DCToken::StringLiteral(String::from("x")),
433 DCToken::StringLiteral(String::from("foo")),
434 DCToken::StringLiteral(String::from("*")),
435 DCToken::EscapeCharacter(String::from("\\n")),
437 DCToken::EscapeCharacter(String::from("\\t")),
438 DCToken::EscapeCharacter(String::from("\\xa19")),
439 ];
440 lexer_test_for_target(
441 "'a' '1' '*' \
442 \"x\" \"foo\" \"*\" \
443 \\n \\t \\xa19",
444 target,
445 );
446 }
447
448 #[test]
449 fn data_types() {
450 #[rustfmt::skip]
451 let target: Vec<DCToken> = vec![
452 DCToken::CharT,
453 DCToken::BoolT,
454 DCToken::Int8T, DCToken::Int16T, DCToken::Int32T, DCToken::Int64T,
456 DCToken::UInt8T, DCToken::UInt16T, DCToken::UInt32T, DCToken::UInt64T,
457 DCToken::Int8ArrayT, DCToken::Int16ArrayT, DCToken::Int32ArrayT,
459 DCToken::UInt8ArrayT, DCToken::UInt16ArrayT, DCToken::UInt32ArrayT,
460 DCToken::UInt32UInt8ArrayT,
461 DCToken::Float32T,
463 DCToken::Float64T,
464 DCToken::StringT,
466 DCToken::BlobT,
467 DCToken::Blob32T,
468 ];
469 lexer_test_for_target(
470 "char bool \
471 int8 int16 int32 int64 \
472 uint8 uint16 uint32 uint64 \
473 int8array int16array int32array \
474 uint8array uint16array uint32array uint32uint8array \
475 float32 float64 string blob blob32",
476 target,
477 );
478 }
479
480 #[test]
481 fn operators_and_delimiters() {
482 let target: Vec<DCToken> = vec![
483 DCToken::Percent,
485 DCToken::Star,
486 DCToken::Plus,
487 DCToken::Hyphen,
488 DCToken::ForwardSlash,
489 DCToken::OpenParenthesis,
491 DCToken::CloseParenthesis,
492 DCToken::OpenBraces,
493 DCToken::CloseBraces,
494 DCToken::OpenBrackets,
495 DCToken::CloseBrackets,
496 DCToken::Comma,
497 DCToken::Colon,
498 ];
499 lexer_test_for_target("%*+-/(){}[],:", target);
500 }
501
502 #[test]
503 fn dc_keywords_tokens() {
504 let target: Vec<DCToken> = vec![
505 DCToken::DCKeyword("ram".to_string()),
506 DCToken::DCKeyword("required".to_string()),
507 DCToken::DCKeyword("db".to_string()),
508 DCToken::DCKeyword("airecv".to_string()),
509 DCToken::DCKeyword("ownrecv".to_string()),
510 DCToken::DCKeyword("clrecv".to_string()),
511 DCToken::DCKeyword("broadcast".to_string()),
512 DCToken::DCKeyword("ownsend".to_string()),
513 DCToken::DCKeyword("clsend".to_string()),
514 ];
515 lexer_test_for_target(
516 "ram required db airecv ownrecv \
517 clrecv broadcast ownsend clsend",
518 target,
519 );
520 }
521
522 #[test]
523 #[should_panic]
524 fn unexpected_token_test() {
525 let test_string: String = String::from("uint8 invalid_token = \\");
526 let lexer = Lexer::new(&test_string).inspect(|tok| eprintln!("token: {:?}", tok));
527
528 for (_, (_token, _span)) in lexer.enumerate() {
529 }
531 }
532
533 #[test]
534 fn register_newline() {
535 let test_string: String = String::from("keyword\nkeyword\nkeyword");
536 let lexer = Lexer::new(&test_string).inspect(|tok| eprintln!("token: {:?}", tok));
537
538 for (i, (_, span)) in lexer.enumerate() {
539 if span.line != i + 1 {
541 panic!("Lexer failed to register a new line!");
542 }
543 }
544 }
545}