3#include "elementa/license.inc"
4#include "elementa/checks.inc"
6#ifndef ELEMENTA_PARSING_CFGRAMMARS
7#define ELEMENTA_PARSING_CFGRAMMARS
119 using Rule = std::pair<CodeElement,RHS>;
147 ELE_CODE_TRACE({},
"RuleIterator::dflt-constr. of " <<
167 ELE_CODE_TRACE({},
"RuleIterator::assign, oth!=this : " <<
169 if (
this != &oth) { Validatable::operator=(oth); copyFrom(oth);}
173 ELE_CODE_TRACE({},
"RuleIterator::move-assign, oth!=this : " <<
175 if (
this != &oth) { Validatable::operator=(oth); copyFrom(oth);}
181 ELE_CODE_TRACE({},
"RuleIterator::destructor of " <<
204 {
return(!this->
operator==(oth)); }
208 std::pair<
typename MapOfRules::const_iterator,
209 typename RHSs::size_type> pos_;
210 MapOfRules::const_iterator grbeg_, grend_;
221 using BaseClass = std::pair<CodeElement,CodeElement>;
232 BaseClass{std::move(oth)} {}
236 {
if (
this != &oth) { Validatable::operator=(oth);
237 if (valid()) BaseClass::operator=(oth); }
242 {
if (
this != &oth) { Validatable::operator=(std::move(oth));
244 BaseClass::operator=(std::move(oth)); }
251 std::string{
"First part of a LL1 table index is not a "} +
252 "non-terminal (code " + std::to_string(nt) +
")");
255 std::string{
"Second part of a LL1 table index is not a "} +
256 "terminal or endofsentence (code " +std::to_string(t) +
")");
260 {
return(valid() && o.valid() &&
261 (this->first == o.first) && (this->second == o.second)); }
264 {
return(!
operator==(o)); }
267 {
return(valid() && oth.valid() &&
268 ( (first < oth.first) ||
269 ( (first == oth.first) && (second < oth.second) ))); }
278 class CatId:
public std::pair<int,int>
282 using Base = std::pair<int,int>;
287 using LL1Table = std::map<LL1TableIndex,RuleIterator>;
358 {
return(
static_cast<size_t>(
kCodeEnd - lastterm_)); }
362 {
return(
static_cast<size_t>(lastnonterm_ -
kCodeEmpty)); }
367 {
if (!
isTerminal(c))
return(
false);
return(lastterm_ <= c); }
372 {
if (!
isNonTerminal(c))
return(
false);
return(lastnonterm_ >= c); }
378 return(catsterms_[cat].size()); }
384 return(catsnonterms_[cat].size()); }
390 {
return(
static_cast<VectorOfCats::size_type
>(cat) <
391 catsterms_.size()); }
397 {
return(
static_cast<VectorOfCats::size_type
>(cat) <
398 catsnonterms_.size()); }
405 (
static_cast<VectorOfCatIDs::size_type
>(catid.second) <
406 catsterms_[catid.first].size())); }
413 (
static_cast<VectorOfCatIDs::size_type
>(catid.second) <
414 catsnonterms_[catid.first].size())); }
421 return(catsterms_[catid.first][catid.second]); }
428 return(catsnonterms_[catid.first][catid.second]); }
435 return(
CatId{linkstotermcats_[-c].first,linkstotermcats_[-c].second});}
442 return(
CatId{linkstonontermcats_[c].first,
443 linkstonontermcats_[c].second});}
450 return(linkstotermcats_[-c].first); }
457 return(linkstonontermcats_[c].first); }
515 static_cast<int>(catsterms_.size())}); }
521 static_cast<int>(catsnonterms_.size())}); }
529 static_cast<int>(catsterms_[cat].size())}); }
537 static_cast<int>(catsnonterms_[cat].size())}); }
674 std::string * trace =
nullptr)
const;
728 using MapOfNames = std::map<std::string,CodeElement>;
729 using MapOfNamesSearchRes = std::pair< MapOfNames::iterator,bool >;
730 using MapOfNamesItem = std::pair<std::string,CodeElement>;
732 using VectorOfCatIDs = std::vector<int>;
733 using VectorOfCats = std::vector< VectorOfCatIDs >;
734 using CatAndID = std::pair<int,int>;
735 using VectorOfCatAndIDs = std::vector<CatAndID>;
740 std::vector<const std::string *> linkstotermnames_, linkstonontermnames_;
742 MapOfNames namesterms_,namesnonterms_;
746 VectorOfCats catsterms_,catsnonterms_;
747 VectorOfCatAndIDs linkstotermcats_, linkstonontermcats_;
757 static_assert(std::is_same<TokenId,CFGrammar::CodeElement>::value,
758 "CFGrammar::CodeElement must equal TokenId");
826 SzerError{
"Invalid element" + details} {}
834 SzerError{
"Invalid list of categorized elements"
842 Repeated(
const std::string & details):
843 SzerError{
"Repeated in the specification"
851 Unused(
const std::string & details):
852 SzerError{
"Unused in the specification"
861 SzerError{
"Unexpected element" + details} {}
912 Section whichSection(
const std::string & token)
const noexcept;
913 std::string concatLoc(
const std::string & str)
const;
914 void throwUnexpected(
const std::string & whatexp,
915 const std::string & whatfound,
916 const std::string & place =
"")
const;
An id of an element of the grammar divided into category and index.
An index in a LL(1) parsing table: non-terminal + terminal.
LL1TableIndex(LL1TableIndex &&oth)
Move constructor.
LL1TableIndex & operator=(LL1TableIndex &&oth)
Move assignment.
LL1TableIndex(const LL1TableIndex &oth)
Copy constructor.
LL1TableIndex(CodeElement nt, CodeElement t)
Constructor for a valid index.
LL1TableIndex & operator=(const LL1TableIndex &oth)
Assignment.
LL1TableIndex(void)=default
Default constructor: invalid index.
An index of a rule in the grammar, i.e., an iterator on the rules.
bool operator==(const RuleIterator &oth) const
Equality comparison.
RuleIterator operator--(int)
Point to the previous rule if it can (postfix).
RuleIterator operator++(int)
Point to the next rule if it can (postfix).
Rule operator*(void) const
De-referencing operator: return a copy of the given rule.
bool operator!=(const RuleIterator &oth) const
Inequality.
std::bidirectional_iterator_tag Category
Category of the iterator (for STL-compatibility)
RuleIterator & operator++(void)
Point to the next rule if it can (prefix).
std::ptrdiff_t Distance
Distance between iterators (for STL-compatibility)
Rule T
Iterated data (for STL-compatibility)
~RuleIterator(void)
Destructor.
T * Pointer
Pointer to data (for STL-compatibility)
RuleIterator & operator--(void)
Point to the previous rule if it can (prefix).
RuleIterator(const CFGrammar &gr, bool beginorend=true)
Constructor: an index pointing to the first or end rule in grammar.
T & Reference
Reference to data (for STL-compatibility)
RuleIterator(void)
Default constructor: invalid iterator.
CFGrammar(void)
Default constructor: empty grammar.
elementa::base::IotaView< int > nonTerminalCatRange(void) const noexcept
Return an IotaView to iterate on the current non-terminal categories.
std::string to_string(const Rule &r) const
Return a string with the given rule expressed with its names.
std::string to_string(const RHS &rhs) const
Return a string with the sequence of elements in the given rhs.
int catOfNonTerminal(CodeElement c) const
Return the category only corresponding to a given non-terminal element.
static const std::string kEndName
A name for the "end of source" elem.
std::string to_string(const SetOfElements &soe) const
Return a string with the given set of alements (their names).
elementa::base::IotaView< CodeElement > terminalRange(void) const noexcept
Return an IotaView to iterate on the current terminals.
static const std::string kOpenCat
Keyword to open any category list.
int catOfTerminal(CodeElement c) const
Return the category only corresponding to a given terminal element.
CodeElement codeOfNonTerminal(const CatId &catid) const
Return the code corresponding to the given category/index of a non-term.
RuleIterator end(void) const
Return an iterator past the last rule in the grammar.
static void checkRule(const Rule &r)
Check that the rule R is well constructed for any possible grammar.
std::pair< CodeElement, RHS > Rule
A rule in the grammar (only one alternative for the left side).
std::map< LL1TableIndex, RuleIterator > LL1Table
An LL(1) parsing table (sparse).
void deser(elementa::base::InSerCh &chin, CFGrammar &cfg)
Deserialize.
CatId catidOfNonTerminal(CodeElement c) const
Return the catid corresponding to a given non-terminal element.
static const std::string kRuleEndMark
Text ending mark of a rule in a default-serialized rule.
std::vector< CodeElement > VectorOfElements
Vector of elements.
bool existsTerminalCatID(const CatId &catid) const
Return FALSE if the given terminal cat/id does not exists in the gr.
void calculateFirsts(MapOfElements &firsts) const
Fill FIRSTS with the first sets of all non-terminals in the grammar.
RuleIterator begin(void) const
Return an iterator to the first rule in the grammar.
static bool isTerminal(CodeElement c, bool extended=false) noexcept
Return TRUE if C is a valid terminal code for any grammar.
CodeElement addNonTerminal()
Register a new non-terminal.
void calculateFollows(const MapOfElements &firsts, MapOfElements &follows, std::string *trace=nullptr) const
Fill FOLLOWS with the follow sets of all non-terminals in the grammar.
void ser(elementa::base::OutSerCh &chout, const CFGrammar &cfg)
Serialize.
void fillLL1Table(const MapOfElements &firsts, const MapOfElements &follows, LL1Table &table) const
Try to fill TABLE with the parsing table of the grammar.
void checkIntegrity(bool withnames=true) const
Check that the content of the grammar is properly filled.
TokenId CodeElement
Unique code of an element of the grammar, either terminal or non-term.
std::string to_string(const LL1Table t) const
Return a string representation of an LL(1) table.
static const std::string kNonTermsSect
Keyword to enter the non-terminals section.
bool existsTerminal(CodeElement c) const
Return TRUE if the given code is of a registered terminal.
CodeElement addTerminal()
Register a new termninal.
static const std::string kRHSSep
Text separator between alternatives in a default-serialized rule.
static const CodeElement kCodeEmpty
Code of the "empty" element.
size_t sizeNonTerminals(void) const noexcept
Return the number of non-terminals in the grammar.
static bool hasRuleFirstRight(const Rule &r, CodeElement firstright)
Check whether the given rule has as first right-side element FIRSTRIGHT.
bool existsNonTerminal(CodeElement c) const
Return TRUE if the given code is of a registered non-terminal.
static const std::string kOpenSect
Keyword to open any section.
virtual ~CFGrammar(void)=default
Destructor.
static const std::string kRulesSect
Keyword to enter the rules section.
CodeElement findNonTerminal(const std::string &name) const
Return the code for non-terminal called NAME,or kCodeEmpty if not found.
size_t sizeNonTerminals(int cat) const
Return the number of non-terminals in a category.
CodeElement starting(void) const
Return the code for the non-terminal that is the starting symbol.
elementa::base::IotaView< int > terminalCatRange(void) const noexcept
Return an IotaView to iterate on the current terminal categories.
static const std::string kTermsSect
Keyword to enter the terminals section.
elementa::base::IotaView< int > terminalCatIDRange(int cat) const
Return an IotaView to iterate on the current ids of a terminal category.
bool existsNonTerminalCategory(int cat) const
Return FALSE if the given non-terminal category does not exists in gr.
static const std::string kSideSep
Text separator of left and right hand side in a default-serialized rule.
size_t sizeTerminals(void) const noexcept
Return the number of terminals in the grammar.
bool isLL1(const MapOfElements &firsts, const MapOfElements &follows, SetOfElements &fifi, MapOfElements &fifo) const
Return TRUE if the grammar is LL(1).
int newNonTerminalCategory(void)
Create a new category where the next non-terminals will be placed.
VectorOfElements RHS
The right-hand side of a rule of the grammar.
static const std::string kEmptyName
A name for the "empty" element.
bool existsTerminalCategory(int cat) const
Return FALSE if the given terminal category does not exists in the gr.
std::map< CodeElement, RHSs > MapOfRules
A mapping from left-side non-terminals to vectors of right sides.
bool leftRecursive(CodeElement &rec) const
Check any left-recursivity, direct or not.
elementa::base::IotaView< int > nonTerminalCatIDRange(int cat) const
Return an IotaView to iterate on the current ids of a non-terminal cat.
CatId catidOfTerminal(CodeElement c) const
Return the catid corresponding to a given terminal element.
bool existsNonTerminalCatID(const CatId &catid) const
Return FALSE if the given non-terminal cat/id does not exists in gr.
bool directlyLeftRecursive(Rule &r) const
Check direct left-recursivity in the grammar.
static const std::string kCloseSect
Keyword to close any section.
static const CodeElement kCodeEnd
Code of the "end of source" el.
void firstOfRHS(const RHS &rhs, const MapOfElements &firsts, SetOfElements &f) const
Fill F with the first set of the entire RHS, according to FIRSTS.
static bool isRuleHeader(const Rule &r, CodeElement left, CodeElement firstright)
Check whether R has LEFT at its left and FIRSTRIGHT at its first right.
CodeElement findTerminal(const std::string &name) const
Return the code for a terminal called NAME, or kCodeEmpty if not found.
CodeElement codeOfTerminal(const CatId &catid) const
Return the code corresponding to the given category and index of a term.
static const std::string kCloseCat
Keyword to close any category list.
std::map< CodeElement, SetOfElements > MapOfElements
Set of elements associated to another element.
elementa::base::IotaView< CodeElement > nonTerminalRange(void) const noexcept
Return an IotaView to iterate on the current non-terminals.
std::string nameOfTerminal(CodeElement c) const
Return a copy of the name associated to the terminal C.
int newTerminalCategory(void)
Create a new category where the next terminals will be placed.
std::string to_string(const MapOfElements &moe) const
Return a string with the given map of elements.
void firstFollowConflicts(const MapOfElements &firsts, const MapOfElements &follows, MapOfElements &nts) const
Fill in NTS with the terminals shared in firsts and follows of non-term.
void clear(void)
Delete all the current content of the grammar.
void assignName(CodeElement c, const std::string &name)
Assign a name to an element.
void firstFirstConflicts(const MapOfElements &firsts, SetOfElements &nts) const
Fill in NTS with the non-terminals that cause first-first conflicts.
std::string nameOfNonTerminal(CodeElement c) const
Return a copy of the name associated to the non-terminal C.
size_t sizeTerminals(int cat) const
Return the number of terminals in a category.
std::vector< RHS > RHSs
A number of RHS (usually for the same left hand side non-terminal).
void addRule(const Rule &r)
Add a new rule for a non-terminal.
std::set< CodeElement > SetOfElements
Set of elements.
static bool isNonTerminal(CodeElement c) noexcept
Return TRUE if C is a valid non-terminal code for any grammar.
const RHSs & rightsOf(CodeElement n) const
Return a reference to all the rules associated to a non-terminal N.
Serializations of CFGrammars specified in a certain textual language.
#define ELE_CODE_TRACE_OFF
Place this inside local scope (e.g., routine) to deactivate traces there.
#define ELE_CODE_PLACE
Produces a std::string with the place of source where the macro is placed.
#define ELE_CLASS_ENUM(NameEnumClass,...)
Define an extended enum type.
Base class for all errors / exceptions in Elementa. Just derive from it.
Exception for indicating that some element has not been found in some place.
#define ELE_CLASS_EXCOVERRIDE(C)
Shortening macro that must be used inside classes derived from Exc.
#define ELE_CODE_INVARG(expl)
To throw an invalid-argument exception with an explanation.
LongestUnsigned to_number(const void *p)
Convert a pointer address to a number.
A view that refers to a sequence of incrementable elements, bounded or not.
int TokenId
A unique identifier for a token.
std::istream InSerCh
"Base class" that represents any input serial channel in Elementa.
std::ostream OutSerCh
"Base class" that represents any output serial channel in Elementa.
Base abstract class / interface for any serializer.
Base class for all errors thrown by serializers.
#define ELE_CLASS_VALIDATABLE
Declare a class derived from Validatable.