3#include "elementa/license.inc"
4#include "elementa/checks.inc"
6#ifndef ELEMENTA_PARSING_LEXER_H
7#define ELEMENTA_PARSING_LEXER_H
17#include "elementa/base/serial_channels.h"
72 "Unexpected lexical element",details)}
86 "Expected some lexical element",
97 UnexpEnd(
const std::string & details):
100 "Unexpected end of channel",details)}
114 "Invalid lexical element",details)}
149 using Ptr = std::shared_ptr<Token>;
155 Token(
void) =
default;
161 virtual ~Token(
void) =
default;
163 virtual bool operator==(
const Token &)
const = 0;
164 virtual bool operator!=(
const Token &o)
const
165 {
return(!
operator==(o)); }
168 virtual const std::string &
value(
void)
const = 0;
184 class List:
public std::list<TextToken>
188 using Base = std::list<TextToken>;
201 TextToken(TextToken && oth):Token{std::move(oth)}
202 { v_ = std::move(oth.v_); }
203 TextToken & operator=(
const TextToken & oth)
205 { Token::operator=(oth); v_ = oth.v_; }
207 TextToken & operator=(TextToken && oth)
209 { Token::operator=(std::move(oth)); v_ = std::move(oth.v_); }
215 bool operator==(
const Token & o)
const
216 {
return(v_ ==
dynamic_cast<const TextToken &
>(o).v_); }
218 const std::string &
value(
void)
const {
return(v_); }
256 {
return(inputch_.channel()); }
261 {
return(inputch_); }
265 {
return(*(inputch_.filter().ploc)); }
314 using BaseVector::BaseVector;
328 for (
const auto & rp: *
this)
if (rp.first == tid)
return(rp.second);
355 Recognizers::size_type firstrecogfinished_;
369 offending_{unrecogtxt}
377 {
return(offending_); }
381 static const std::string kUnrecogPreff;
383 std::string offending_;
401 recognizers_{definition}
511static_assert(std::is_integral<TokenId>::value &&
512 std::is_signed<TokenId>::value,
513 "TokenId must be an integral, signed type");
Unrecognizable text error.
const std::string & getOffendingText(void) const noexcept
Get the text after the preffix placed in this error.
size_type whoWins(void) const
Return the recognizer that has recognized all characters fed so far.
void reset(void)
Initiate the recognition through all the FSMs, all becoming active.
const ListOfInds & active(void) const noexcept
Return the indexes of FSMs that are active right now.
void debug(void) const
Print in console the state of recognizers.
void feed(char o)
Feed FSMs with the given observation O.
std::list< size_type > ListOfInds
A list with indexes into some recognizers of the vector.
std::vector< TermRecog > BaseVector
Shortcut.
FSMClass::Ptr find(TokenId tid) const
Return one of the recognizers, or a false (null) one if not found.
std::string to_string(void) const
Return a text describing the list.
Base class for all errors / exceptions in Elementa. Just derive from it.
#define ELE_CLASS_EXCOVERRIDE(C)
Shortening macro that must be used inside classes derived from Exc.
std::shared_ptr< FSM > Ptr
Pointer to a FSM that consumes observations of type Obs.
Shortcut for FSMs that work with chars.
virtual std::string to_string(void) const
Return a string representation of the token (can be overriden).
const Token::PtrVector & log(void) const noexcept
Get a reference to the internal log of tokens.
std::shared_ptr< Token > Ptr
Safe pointer to a token.
virtual void setValue(const std::string &v)=0
Must change the textual value of the token by V.
void setValue(const std::string &v)
Must change the textual value of the token by V.
TokenId c
Id of the element, e.g., in a grammar.
FSMLexer(elementa::base::SerChIFilter< elementa::base::SerChFilt_Loc > &inputch, Recognizers &definition)
Default constructor.
std::pair< TokenId, FSMClass::Ptr > TermRecog
A FSM associated to the id of a terminal that it recognizes.
std::function< bool(Lexer &, const LexerError *, Token::Ptr &)> Observer
An observer for being called after each token is collected.
std::vector< Ptr > PtrVector
A vector of token pointers.
Lexer(elementa::base::SerChIFilter< elementa::base::SerChFilt_Loc > &inputch)
Constructor.
elementa::base::InSerCh & inputChannel(void) const noexcept
Return a reference to the associated, non-location input channel.
elementa::base::SerChLoc & location(void) const noexcept
Return a ref. to the location associated to the channel used by the lexr.
virtual const std::string & value(void) const =0
Must return a reference to the textual value for the token.
virtual Token::Ptr getToken(void)=0
Must create and return a pointer to the token built from the input.
void collect(bool log=false)
Collect all tokens from the channel of the lexer, calling the observer.
elementa::base::SerChIFilter< elementa::base::SerChFilt_Loc > & locInputChannel(void) const noexcept
Return a reference to the associated location input channel.
const std::string & value(void) const
Must return a reference to the textual value for the token.
Token::Ptr getToken(void) final
Derived classes cannot implement this.
LexerCollector(Lexer &lexer, Observer &obs)
Constructor.
virtual ~Lexer(void)=default
Virtual destructor for polymorphic deletions.
A lexical analyzer that uses a number of FSMs to recognize tokens.
A lexer that produces terminals.
A class to use a lexer for collecting all tokens sequentially.
Base error class. Lexer errors should derive from this.
A token that only consists in its textual value.
Base class for any token (i.e., lexical element) produced by a lexer.
int TokenId
A unique identifier for a token.
Location in a channel, at least in linear form, either at reading or writing.
std::istream InSerCh
"Base class" that represents any input serial channel in Elementa.
A filter for InSerCh. It filters their inputs after getting or putbacking'em.
std::string concatWithMiddle(const std::string &s1, const std::string &s2, const std::string &m=". ")
Concatenate two strings putting a middle one only if the second is not empty.