Elementa v8.0.0
Minimalistic library for any C++ application (C++11 and up)
Loading...
Searching...
No Matches
parsing.h
Go to the documentation of this file.
1
3#include "elementa/license.inc"
4#include "elementa/checks.inc"
5
6#ifndef ELEMENTA_PARSING_PARSING_H
7#define ELEMENTA_PARSING_PARSING_H
8
9
10#include <utility>
11#include <tuple>
12#include <memory>
13#include <functional>
14#include <type_traits>
18#include "elementa/base/enums.h"
20#include "elementa/base/serial_channels.h"
22#include "elementa/adts/trees.h"
25
26namespace elementa
27{
28
29namespace parsing
30{
31
52/* **********************************************************************
53
54 Base error for parsers
55
56*************************************************************************/
57
59{
60 public:
61
62 ParsingError(const std::string & explanation):
63 elementa::base::Exc{std::string{"SYNTACTICAL ERROR. "} +
64 explanation} {}
65
67};
68
70{
71 public:
72
73 UnexpParsedSymbol(const std::string & details):
75 "Unexpected parsed element",
76 details)} {}
77
79};
80
81
83{
84 public:
85
86 ExpParsedSymbol(const std::string & details):
89 "Expected some parsed element",
90 details)} {}
91
93};
94
95
97{
98 public:
99
100 InvParsedElem(const std::string & details):
103 "Invalid parsed element",details)} {}
104
106};
107
108
110{
111 public:
112
113 RedefinedParsedElem(const std::string & details):
116 "Redefined parsed element",details)} {}
117
119};
120
121
123{
124 public:
125
126 UndefinedParsedElem(const std::string & details):
129 "Undefined parsed element",details)} {}
130
132};
133
134
135/* **********************************************************************
136
137 Class: StepInfo
138
139*************************************************************************/
140
142
144class StepInfo final
145{
146 public:
147
150 kMatch,
151 kRule
152 );
153
154
155 Kind kind;
157
159
169 struct Info
170 {
173
176
178 Info(const Token::Ptr & v): vt{v} {}
179
181 Info(const CFGrammar::RuleIterator & ri): rit{ri} {}
182
183 Info(const Info & oth) = default;
184 Info(Info && oth) = default;
185 Info & operator=(const Info & oth) = default;
186 Info & operator=(Info && oth) = default;
187 ~Info(void) = default;
188
189 } info; // info content
190
192 StepInfo(const Token::Ptr & vt,
193 const elementa::base::SerChLoc::Ptr & l): kind{Kind::kMatch},
194 loc{l},
195 info{vt}
197 ELE_CODE_TRACE({},"StepInfo::(ma)constr.Creating match SI this =" <<
199
202 const elementa::base::SerChLoc::Ptr & l): kind{Kind::kRule},
203 loc{l},
204 info{ri}
206 ELE_CODE_TRACE({},"StepInfo::(ri)constr. Creating rule SI this =" <<
208
210 StepInfo(const StepInfo & oth): kind{oth.kind}, loc{oth.loc}, info{oth.info}
211 {ELE_CODE_TRACE_OFF; ELE_CODE_TRACE({},"StepInfo::copy-constr."); }
212
216 ELE_CODE_TRACE({},"StepInfo::assign, oth!=this = " <<
217 (this != &oth));
218 if (this != &oth)
219 { kind = oth.kind; loc = oth.loc;
220 info = oth.info; } return(*this); }
221
223 StepInfo(StepInfo && oth): kind{oth.kind},
224 loc{std::move(oth.loc)},
225 info{std::move(oth.info)}
226 {ELE_CODE_TRACE_OFF; ELE_CODE_TRACE({},"StepInfo::move-constr."); }
227
231 ELE_CODE_TRACE({},"StepInfo::move-assign, oth!=this = " <<
232 (this != &oth));
233 if (this != &oth)
234 { kind = oth.kind; loc = std::move(oth.loc);
235 info = std::move(oth.info); } return(*this); }
236
239 {ELE_CODE_TRACE_OFF; ELE_CODE_TRACE({},"StepInfo::destructor of " <<
241
243 bool operator==(const StepInfo & o) const
244 { return((kind == o.kind)&&(*loc == *(o.loc))&&
245 (kind == Kind::kMatch ? (*info.vt) == (*(o.info.vt)) :
246 (*info.rit) == (*(o.info.rit)))); }
247
249 bool operator!=(const StepInfo & o) const { return(!operator==(o)); }
250
252 std::string to_string(const CFGrammar & gr) const;
253
254};
255
256
257/* **********************************************************************
258
259 Class: ErrorInfo
260
261*************************************************************************/
262
264
266class ErrorInfo final
267{
268 public:
269
272 kUnexpTerminal,
273 kEmptyTable
274 );
275
277 using UnexpectedTerminal = std::pair< CFGrammar::CodeElement, Token::Ptr >;
278
279
280 Kind kind;
282
284
285 struct Info
286 {
289
292
295 {}
296
299 {}
300
301 Info(const Info & oth) = default;
302 Info(Info && oth) = default;
303 Info & operator=(const Info & oth) = default;
304 Info & operator=(Info && oth) = default;
305 ~Info(void) = default;
306
307 } info; // error content
308
312 kind{Kind::kEmptyTable},
313 loc{l},
314 info{ti} {}
315
319 kind{Kind::kUnexpTerminal},
320 loc{l},
321 info{ut} {}
322
324 ErrorInfo(const ErrorInfo & oth):
325 kind{oth.kind},
326 loc{oth.loc},
327 info{oth.info}
328 {}
329
332 { if (this!=&oth)
333 { kind = oth.kind; loc = oth.loc; info = oth.info; } return(*this); }
334
337 kind{oth.kind},
338 loc{std::move(oth.loc)},
339 info{std::move(oth.info)}
340 {}
341
344 { if (this!=&oth)
345 { kind = oth.kind; loc = std::move(oth.loc);
346 info = std::move(oth.info); } return(*this); }
347
350 {ELE_CODE_TRACE_OFF; ELE_CODE_TRACE({},"ErrorInfo::destructor"); }
351
353 bool operator==(const ErrorInfo & o) const
354 { return((kind == o.kind)&&(*loc == *(o.loc))&&
355 (kind == Kind::kUnexpTerminal ?
356 (
357 (info.unexppair.first == o.info.unexppair.first) &&
358 (*info.unexppair.second == *o.info.unexppair.second)
359 ):
360 (info.emptytableindex == o.info.emptytableindex))); }
361
363 bool operator!=(const ErrorInfo & o) const { return(!operator==(o)); }
364
365
367 std::string to_string(const CFGrammar & gr) const;
368
369};
370
371
372/* **********************************************************************
373
374 Abstract Base Class: Parser
375
376*************************************************************************/
377
379
390{
391 public:
392
398 kFinished,
399 kMoreSteps,
400 kNotInTable,
401 kMismatch
402 );
403
405
407 {
408 public:
409
411
412 virtual void okStep(const StepInfo & i, Parser & p) = 0;
413
415
416 virtual void errorStep(const ErrorInfo & e, Parser & p) = 0;
417 };
418
420
423 using TransfTokenId = std::function< void(Token &) >;
424
432
439 Parser(const CFGrammar & gr, Lexer & lex,
440 const TransfTokenId & transftid = [](Token &)->void{} ):
441 gr_{gr},lexer_{lex},transftid_{transftid} {}
442
443 Parser(const Parser &) = delete;
444 Parser & operator=(const Parser &) = delete;
445 Parser(Parser &&) = delete;
446 Parser & operator=(Parser &&) = delete;
447
449 Parser(void) = default;
450
458 const CFGrammar & grammar(void) const noexcept { return(gr_); }
459
461 Lexer & lexer(void) const noexcept { return(lexer_); }
462
464 virtual void reset(void) = 0;
465
467
469 void parse(Observer & obs);
470
472
482 virtual Result parseStep(Observer & obs) = 0;
483
485 virtual void printStatus(elementa::base::OutSerCh & chout) const = 0;
486
490 protected:
491
492 const CFGrammar & gr_;
493 Lexer & lexer_;
494 TransfTokenId transftid_;
495};
496
497
498/* **********************************************************************
499
500 Class: AST
501
502*************************************************************************/
503
505
508class AST: public elementa::adts::Tree<StepInfo,char>
509{
510 public:
511
517
519 using EdgeData = char;
520
523
526
528
529 template <template <class,class> class Gr>
530 using GraphImpl = Gr<VertexData,EdgeData>;
531
538
539 static BaseGraph::VRange_Pred::Pred kPredTerminal;
540
548 using BaseTree::BaseTree;
549
557
558 const StepInfo & edgeEnd(const BaseGraph::eiterator & eit) const;
559
561
563 const std::string & edgeEndMatch(const BaseGraph::eiterator & eit,
564 CFGrammar::CodeElement c) const;
565
567
570 CFGrammar::CodeElement c) const;
571
573
575
577
579
582};
583
584
585/* **********************************************************************
586
587 Class: ASTObserver
588
589*************************************************************************/
590
592
594{
595 public:
596
598
601 ASTObserver(const AST::BaseTree::BGraph & exemplar):ast_{exemplar} {}
602
603 void okStep(const StepInfo & i, Parser & p);
604
605 void errorStep(const ErrorInfo & e, Parser & p)
607
609
611 AST & ast(void) { return(ast_); }
612
614
615 typename AST::Ptr lastSubtree(void)
616 { return(pendingtrees_.empty() ? typename AST::Ptr{} :
617 pendingtrees_.back()); }
618
619 private:
620
621 typename AST::Forest pendingtrees_; // pointers to trees, actually
622 AST ast_;
623};
624
625 // Parsing
627
628} // End parsing namespace
629
630} // End elementa namespace
631
632
633#endif
An index in a LL(1) parsing table: non-terminal + terminal.
Definition: cfgrammars.h:218
An index of a rule in the grammar, i.e., an iterator on the rules.
Definition: cfgrammars.h:125
Abstract class for observing the steps of the parser.
Definition: parsing.h:407
virtual void errorStep(const ErrorInfo &e, Parser &p)=0
A method to receive any error indication related to parsing.
virtual void okStep(const StepInfo &i, Parser &p)=0
A method to receive the result of a parsing step when it is ok.
std::pair< CodeElement, RHS > Rule
A rule in the grammar (only one alternative for the left side).
Definition: cfgrammars.h:119
TokenId CodeElement
Unique code of an element of the grammar, either terminal or non-term.
Definition: cfgrammars.h:97
A context-free grammar.
Definition: cfgrammars.h:71
#define ELE_CODE_TRACE_OFF
Place this inside local scope (e.g., routine) to deactivate traces there.
Definition: debugging.h:283
const char * explanation(void) const noexcept
Return the explanation only. It will live as long as this exception.
Definition: exceptions.h:164
Base class for all errors / exceptions in Elementa. Just derive from it.
Definition: exceptions.h:113
#define ELE_CLASS_EXCOVERRIDE(C)
Shortening macro that must be used inside classes derived from Exc.
Definition: exceptions.h:64
#define ELE_CODE_UNIMPLEMENTED
To throw an unimplemented exception.
Definition: exceptions.h:289
LongestUnsigned to_number(const void *p)
Convert a pointer address to a number.
All graph classes derive from this one.
Definition: graphs.h:107
A mutable iterator that can have multiple iteration behaviors. Forward case.
Definition: iterators.h:299
std::shared_ptr< Token > Ptr
Safe pointer to a token.
Definition: lexer.h:149
A lexer that produces terminals.
Definition: lexer.h:236
Base class for any token (i.e., lexical element) produced by a lexer.
Definition: lexer.h:146
ErrorInfo(const UnexpectedTerminal &ut, const elementa::base::SerChLoc::Ptr &l)
Constructor for unexpected terminal.
Definition: parsing.h:317
AST::Ptr lastSubtree(void)
Return a pointer to the last formed subtree in the AST.
Definition: parsing.h:615
ErrorInfo(const ErrorInfo &oth)
Copy constructor.
Definition: parsing.h:324
Lexer & lexer(void) const noexcept
Return a reference to the internal lexer.
Definition: parsing.h:461
std::pair< CFGrammar::CodeElement, Token::Ptr > UnexpectedTerminal
An unexpected terminal: first is the expected one, second the found one.
Definition: parsing.h:277
ErrorInfo & operator=(const ErrorInfo &oth)
Copy assignment.
Definition: parsing.h:331
virtual Result parseStep(Observer &obs)=0
Must do one step of the parsing, leaving the parser ready for the next.
ASTObserver(const AST::BaseTree::BGraph &exemplar)
Create an empty AST based on the given implementation of graphs.
Definition: parsing.h:601
virtual void reset(void)=0
Must prepare to begin a new parsing (not needed after construction).
Parser(const CFGrammar &gr, Lexer &lex, const TransfTokenId &transftid=[](Token &) ->void{})
Constructor for a grammar.
Definition: parsing.h:439
AST & ast(void)
Return a reference to the completely formed AST.
Definition: parsing.h:611
Kind kind
kind of error.
Definition: parsing.h:280
bool operator==(const StepInfo &o) const
Equality operator.
Definition: parsing.h:243
char EdgeData
Edge data type.
Definition: parsing.h:519
static BaseGraph::VRange_Pred::Pred kPredTerminal
A predicate to iterate on AST nodes that are terminals.
Definition: parsing.h:539
bool operator!=(const ErrorInfo &o) const
Unequality.
Definition: parsing.h:363
void moveToNextMatch(BaseGraph::eiterator &eit) const
Increments EIT until its destination is a match or end() is reached.
virtual void printStatus(elementa::base::OutSerCh &chout) const =0
Must serialize current status in text format through CHOUT, for debug.
void errorStep(const ErrorInfo &e, Parser &p)
A method to receive any error indication related to parsing.
Definition: parsing.h:605
const CFGrammar & grammar(void) const noexcept
Return a reference to the internal grammar.
Definition: parsing.h:458
std::string to_string(const CFGrammar &gr) const
Conversor to a string.
const StepInfo & edgeEnd(const BaseGraph::eiterator &eit) const
Return a reference to the StepInfo of the destination of eit.
~StepInfo(void)
Destructor.
Definition: parsing.h:238
const CFGrammar::Rule & edgeEndRule(const BaseGraph::eiterator &eit, CFGrammar::CodeElement c) const
Return the rule of the grammar of a rule node in the tree.
ELE_CLASS_ENUM(Kind, kMatch, kRule)
Kinds of result after a successful parsing step.
Gr< VertexData, EdgeData > GraphImpl
Possible implementations of the graph that supports the tree.
Definition: parsing.h:530
Parser(void)=default
Destructor.
bool operator!=(const StepInfo &o) const
Unequality.
Definition: parsing.h:249
void parse(Observer &obs)
Parse the input until a complete sentence of the language is read.
StepInfo(const StepInfo &oth)
Copy constructor.
Definition: parsing.h:210
StepInfo(const Token::Ptr &vt, const elementa::base::SerChLoc::Ptr &l)
Constructor of kind kMatch.
Definition: parsing.h:192
std::function< void(Token &) > TransfTokenId
Transformation of TokenId into CFGrammar::CodeElement.
Definition: parsing.h:423
StepInfo(const CFGrammar::RuleIterator &ri, const elementa::base::SerChLoc::Ptr &l)
Constructor of kind kRule.
Definition: parsing.h:201
StepInfo & operator=(StepInfo &&oth)
Move assignment.
Definition: parsing.h:229
elementa::base::SerChLoc::Ptr loc
Position in the input of token/rule.
Definition: parsing.h:156
typename elementa::adts::Tree< VertexData, EdgeData > BaseTree
Tree type for the AST.
Definition: parsing.h:525
ELE_CLASS_ENUM(Result, kFinished, kMoreSteps, kNotInTable, kMismatch)
Result codes after a parsing step.
StepInfo(StepInfo &&oth)
Move constructor.
Definition: parsing.h:223
StepInfo & operator=(const StepInfo &oth)
Copy assignment.
Definition: parsing.h:214
std::string to_string(const CFGrammar &gr) const
Conversor to a string.
ErrorInfo & operator=(ErrorInfo &&oth)
Move assignment.
Definition: parsing.h:343
Kind kind
kind of step.
Definition: parsing.h:155
elementa::base::SerChLoc::Ptr loc
loc of parser when error.
Definition: parsing.h:281
const std::string & edgeEndMatch(const BaseGraph::eiterator &eit, CFGrammar::CodeElement c) const
Return the token value of a match node in the tree.
bool operator==(const ErrorInfo &o) const
Equality operator.
Definition: parsing.h:353
~ErrorInfo(void)
Destructor.
Definition: parsing.h:349
void okStep(const StepInfo &i, Parser &p)
A method to receive the result of a parsing step when it is ok.
ErrorInfo(ErrorInfo &&oth)
Move constructor.
Definition: parsing.h:336
void moveToNextRule(BaseGraph::eiterator &eit) const
Increments EIT until its destination is a rule or end() is reached.
ErrorInfo(const CFGrammar::LL1TableIndex &ti, const elementa::base::SerChLoc::Ptr &l)
Constructor for table index.
Definition: parsing.h:310
ELE_CLASS_ENUM(Kind, kUnexpTerminal, kEmptyTable)
Kinds of errors during parsing.
An AST (abstract syntax tree) for a parsing.
Definition: parsing.h:509
A Parser::Observer that creates an AST during parsing.
Definition: parsing.h:594
Parameters to inform about a parsing error.
Definition: parsing.h:267
Abtract base class for any parser of context-free grammars.
Definition: parsing.h:390
Info associated to a sucessful parsing step.
Definition: parsing.h:145
std::shared_ptr< SerChLoc > Ptr
Safe pointer for polymorphic behaviours.
Definition: basics.h:776
std::ostream OutSerCh
"Base class" that represents any output serial channel in Elementa.
Definition: basics.h:247
std::string concatWithMiddle(const std::string &s1, const std::string &s2, const std::string &m=". ")
Concatenate two strings putting a middle one only if the second is not empty.
std::shared_ptr< Tree > Ptr
Pointer to a tree.
Definition: trees.h:75
std::vector< Ptr > Forest
A forest is a set of pointers to trees.
Definition: trees.h:78
This class is the base for any tree implementation.
Definition: trees.h:65
Info data about the error, depending on the kind.
Definition: parsing.h:286
CFGrammar::LL1TableIndex emptytableindex
kEmptyTable: a table index is not filled with anything.
Definition: parsing.h:291
Info(const UnexpectedTerminal &ut)
Constructor from unexpected pair.
Definition: parsing.h:298
UnexpectedTerminal unexppair
kUnexpTerminal: the expected and found terminals, respectively.
Definition: parsing.h:288
Info(const CFGrammar::LL1TableIndex &ti)
Constructor from table index.
Definition: parsing.h:294
Information depending on the kind.
Definition: parsing.h:170
Info(const CFGrammar::RuleIterator &ri)
Constructor from a rule iterator.
Definition: parsing.h:181
Token::Ptr vt
kMatch: code of the terminal plus its value.
Definition: parsing.h:172
CFGrammar::RuleIterator rit
kRule: rule that is to be emitted.
Definition: parsing.h:175
Info(const Token::Ptr &v)
Constructor from a valued terminal.
Definition: parsing.h:178