1 /*============================================================================== 2 | 3 | NAME 4 | 5 | ppParser.h 6 | 7 | DESCRIPTION 8 | 9 | Header file for polynomial parser classes. 10 | 11 | User manual and technical documentation are described in detail in my web page at 12 | http://seanerikoconnor.freeservers.com/Mathematics/AbstractAlgebra/PrimitivePolynomials/overview.html 13 | 14 | LEGAL 15 | 16 | Primpoly Version 16.1 - A Program for Computing Primitive Polynomials. 17 | Copyright (C) 1999-2021 by Sean Erik O'Connor. All Rights Reserved. 18 | 19 | This program is free software: you can redistribute it and/or modify 20 | it under the terms of the GNU General Public License as published by 21 | the Free Software Foundation, either version 3 of the License, or 22 | (at your option) any later version. 23 | 24 | This program is distributed in the hope that it will be useful, 25 | but WITHOUT ANY WARRANTY; without even the implied warranty of 26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 | GNU General Public License for more details. 28 | 29 | You should have received a copy of the GNU General Public License 30 | along with this program. If not, see http://www.gnu.org/licenses/. 31 | 32 | The author's address is seanerikoconnor!AT!gmail!DOT!com 33 | with the !DOT! replaced by . and the !AT! replaced by @ 34 | 35 ==============================================================================*/ 36 37 // Wrap this header file to prevent duplication if it is included 38 // accidentally more than once. 39 #ifndef __PPPARSER_H__ 40 41 42 // Some miscellaneous constants. 43 #define MAX_NUM_COMMAND_LINE_ARGS 3 44 const ppuint minModulus{ 2 } ; 45 const ppuint minDegree{ 2 } ; 46 47 48 /*============================================================================= 49 | 50 | NAME 51 | 52 | Action 53 | 54 | DESCRIPTION 55 | 56 | Types of LR parser actions. 57 | 58 +============================================================================*/ 59 60 enum class Action 61 { 62 Shift, Reduce, Accept, Error 63 } ; 64 65 66 /*============================================================================= 67 | 68 | NAME 69 | 70 | enumToString 71 | 72 | DESCRIPTION 73 | 74 | Helper function to translate enum types into human readable format. 75 | 76 +============================================================================*/ 77 78 template< typename SymbolType > 79 string enumToString( const SymbolType & st ) ; 80 81 82 /*============================================================================= 83 | 84 | NAME 85 | 86 | Symbol 87 | 88 | DESCRIPTION 89 | 90 | Define the VALUE of a symbol. Used in the parser's syntax directed 91 | translation and also for the final polynomial value returned from the 92 | parser. 93 | 94 +============================================================================*/ 95 96 // Tokens and other types of symbols used in parsing. 97 template< typename SymbolType, typename ValueType > 98 class Symbol 99 { 100 public: 101 Symbol() ; 102 103 Symbol( SymbolType type, int state ) ; 104 105 Symbol( SymbolType type, int state, ValueType value ) ; 106 107 Symbol( const Symbol< SymbolType, ValueType > & s ) ; 108 109 // Destructor. Virtual so derived class destructor 110 // automatically calls its base class destructor. 111 virtual ~Symbol() 112 { 113 } 114 115 Symbol< SymbolType, ValueType > & operator=( const Symbol<SymbolType,ValueType> & s ) ; 116 117 // Print function for Symbol class. 118 // Define this templated non-member friend function here, otherwise the compiler will generate 119 // a link error. If the compiler sees only a declaration here, it doesn't know yet it's a template. 120 // See https://isocpp.org/wiki/faq/templates#template-friends 121 friend ostream & operator<<( ostream & out, const Symbol<SymbolType,ValueType> & s ) 122 { 123 out << enumToString<SymbolType>( s.type_ ) << " " ; 124 out << s.value_ << " " ; 125 out << "state " << s.state_ ; 126 127 return out ; 128 } 129 130 // Allow direct access to this simple data type for convenience. 131 public: 132 SymbolType type_ ; // Type of terminal or nonterminal. 133 ValueType value_ ; // Value of the symbol. 134 int state_ ; // State or production number. 135 } ; 136 137 /*============================================================================= 138 | 139 | NAME 140 | 141 | ActionState 142 | 143 | DESCRIPTION 144 | 145 | A pair consisting of action and state for the LR parser. 146 | Actions are (shift s, reduce p, accept, error) and the new state 147 | number to transition to. 148 | 149 +============================================================================*/ 150 151 class ActionState 152 { 153 public: 154 ActionState() ; 155 156 ActionState( Action type, int state ) ; 157 158 // Destructor. Virtual so derived class destructor 159 // automatically calls its base class destructor. 160 virtual ~ActionState() 161 { 162 } 163 164 ActionState( const ActionState & as ) ; 165 166 ActionState & operator=( const ActionState & as ) ; 167 168 friend ostream & operator<<( ostream & out, const ActionState & as ) ; 169 170 // Allow direct access to this simple data type for convenience. 171 public: 172 Action action_ ; 173 int state_ ; 174 } ; 175 176 /*============================================================================= 177 | 178 | NAME 179 | 180 | ParserError 181 | 182 | DESCRIPTION 183 | 184 | Exceptions for the parser class. 185 | 186 +============================================================================*/ 187 188 class ParserError : public runtime_error 189 { 190 public: 191 // Throw with error message, file name and line number. 192 ParserError( const string & description, const string & file, const int & line ) 193 : runtime_error( description + " in file " + file + " at line " + to_string(line) ) 194 { 195 } ; 196 197 // Throw with an error message. 198 ParserError( const string & description ) 199 : runtime_error( description ) 200 { 201 } ; 202 203 // Throw with default error message. 204 ParserError() 205 : runtime_error( "Parser error: " ) 206 { 207 } ; 208 209 } ; 210 211 212 213 /*============================================================================= 214 | 215 | NAME 216 | 217 | Parser 218 | 219 | DESCRIPTION 220 | 221 | Abstract base class for parsing using an LALR(1) parser. 222 | 223 | EXAMPLE 224 | 225 | Example of how to use the PolyParser child class for polynomials, 226 | 227 | // Create the parser. 228 | PolyParser<PolySymbol, PolyValue> parser() ; 229 | 230 | // Sample polynomial. 231 | string s = "2 x ^ 3 + 3 x + 4, 5" ; 232 | 233 | try 234 | { 235 | PolyValue v = parser.parse( s ) ; 236 | cout << "Parsing the string = " << s << endl ; 237 | cout << "We have a polynomial = " << v << endl ; 238 | } 239 | catch( ParserError & e ) 240 | { 241 | cout << "Parser error: " << e.what() << endl ; 242 | } 243 | 244 | NOTES 245 | 246 | I used a parser generator I wrote in Common LISP to generate the LALR(1) parse tables. See my 247 | notes, 248 | http://seanerikoconnor.freeservers.com/ComputerScience/Compiler/ParserGeneratorAndParser/LRandLALRParserGeneratorAndParser.html 249 | You can also use Flex and Bison (Yacc) to generate the equivalent tables, but I like my own parser generator. 250 | 251 | While the LALR(1) parser machinery stays the same in this base class, the grammar, 252 | i.e. parse tables and error messages, will be redefined in the child class, also the syntax 253 | directed translation. We try to use the base class lexical analyzer for all child classes, if possible. 254 | 255 +============================================================================*/ 256 257 template< typename SymbolType, typename ValueType > 258 class Parser 259 { 260 // The basic member functions to create and initialize the parser, 261 // make copies and do a syntax-directed translation of a string. 262 public: 263 // Default construct the parser. 264 Parser() ; 265 266 // Destructor. Virtual so derived class destructor 267 // automatically calls its base class destructor. 268 virtual ~Parser() ; 269 270 // Copy constructor. 271 Parser( const Parser & parser ) ; 272 273 // Assignment operator. 274 virtual Parser & operator=( const Parser & parser ) ; 275 276 // Parse an input string, doing a syntax-directed translation, returning the value. 277 ValueType parse( string s ) ; 278 279 // Pure virtual functions to be defined only in the child classes, since the lexical analyzer, parse tables and 280 // syntax-directed translation depend uniquely on the grammar. 281 // 282 // Defined within the scope of the Parser class or its derived classes only, 283 // so not visible from outside the parser. 284 protected: 285 // Parse string into tokens. 286 virtual void tokenize( string sentence ) = 0 ; 287 288 // NOTE: call this in the child class constructor. It fills in the ACTION, GOTO, and ERROR_MESSAGE tables. 289 virtual void initializeTables() = 0 ; 290 291 // Reduce using production A -> beta, computing A's value from values of the tokens in beta. 292 virtual void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) = 0 ; 293 294 // These member values and functions are called directly in the child classes. 295 protected: 296 // Number of states, productions, etc. 297 int num_states_ ; // Includes the 0 state. 298 int num_productions__ ; // Number of productions. 299 int num_terminals_ ; // Number of terminal symbols. 300 int num_non_terminals_ ; // Number of nonterminal symbols. 301 302 void insertAction( int state, SymbolType terminal, Action actionType, int actionState ) ; 303 void insertGoto( int state, SymbolType nonterm, int newState ) ; 304 void insertProduction( int prodNum, SymbolType nonTerm, int rhsLength ) ; 305 void insertErrorMessage( int state, string errorMessage ) ; 306 307 protected: 308 // Parser stack containing symbols tokenized by lexical analyzer. 309 vector< Symbol<SymbolType, ValueType> > input_stack_ ; 310 311 // Parse stack containing symbols and states. 312 vector< Symbol<SymbolType, ValueType> > parse_stack_ ; 313 314 // Action table. 315 // action = ACTION_TABLE[ <state at top of parse stack> ][ <lookahead input token> ] 316 vector< vector<ActionState> > action_table_ ; 317 318 // GOTO table. 319 // For reduce actions, 320 // new goto state to push = GOTO[ current state ][ nonterminal production symbol ] 321 vector< vector<int> > goto_table_ ; 322 323 // Error messages. 324 vector<string> error_message_ ; 325 326 // Productions: Left side non-terminal symbol + production number. 327 vector< Symbol<SymbolType, ValueType> > production_ ; 328 } ; 329 330 331 332 // --------------- Derived class parsers and their symbols and values --------- 333 334 335 336 /*============================================================================= 337 | 338 | NAME 339 | 340 | PolyValue 341 | 342 | DESCRIPTION 343 | 344 | Define a symbol with a polynomial value. Used in the parser's syntax directed 345 | translation and also for the final polynomial value returned from the 346 | parser. 347 | 348 +============================================================================*/ 349 350 class PolyValue 351 { 352 public: 353 PolyValue() ; 354 355 PolyValue( const PolyValue & v ) ; 356 357 PolyValue & operator=( const PolyValue & v ) ; 358 359 friend ostream & operator<<( ostream & , const PolyValue & v ) ; 360 361 // Allow direct access to this simple data type for convenience. 362 public: 363 ppuint scalar_ ; // Number which can be a polynomial coefficient, power of x, or the modulus p. 364 // Let it be signed for maximum flexibility. We'll check if it exceeds the allowed 365 // range for internal primitive polynomial computations in the parser. 366 vector<ppuint> f_ ; // Polynomial f(x). 367 } ; 368 369 370 /*============================================================================= 371 | 372 | NAME 373 | 374 | PolySymbol 375 | 376 | DESCRIPTION 377 | 378 | Define polynomial symbol (tokens). Used in the parser's syntax directed 379 | translation. 380 | 381 +============================================================================*/ 382 383 enum class PolySymbol 384 { 385 // Terminal symbols or tokens. 386 Dollar = 0, Integer, Comma, Ecks, Plus, Exp, 387 NumTerminals, 388 389 // Nonterminal symbols. 390 S, Mod, Poly, Term, Multiplier, Power, 391 NumSymbols 392 } ; 393 394 /*============================================================================= 395 | 396 | NAME 397 | 398 | PolyParser 399 | 400 | DESCRIPTION 401 | 402 | Child class for parsing a polynomial grammar inheriting from abstract 403 | base class Parser. 404 | 405 | EXAMPLE 406 | 407 | Here is the LALR(1) grammar. Note the use of epsilon productions to 408 | make the grammar simpler. 409 | 410 | Productions: 411 | 412 | S -> Poly Mod 413 | Mod -> comma integer | EPSILON 414 | Poly -> Poly + Term | Term 415 | Term -> Multiplier Power 416 | Multiplier -> integer | EPSILON 417 | Power -> x | x ^ integer | EPSILON 418 | 419 | Terminal symbols: 420 | 421 | comma + integer ^ x $ 422 | 423 +============================================================================*/ 424 425 template< typename SymbolType, typename ValueType > 426 class PolyParser : public Parser< SymbolType, ValueType > 427 { 428 public: 429 // Construct the parser. 430 PolyParser() ; 431 432 // Destructor. 433 ~PolyParser() ; 434 435 // Copy constructor. 436 PolyParser( const PolyParser< SymbolType, ValueType > & parser ) ; 437 438 // Assignment operator. 439 PolyParser< SymbolType, ValueType > & operator=( const PolyParser< SymbolType, ValueType > & parser ) ; 440 441 // Command line argument parsing. 442 void parseCommandLine( int argc, const char * argv[] ) ; 443 444 // Allow direct access to these command line parameters for convenience. 445 bool test_polynomial_for_primitivity_ ; 446 bool list_all_primitive_polynomials_ ; 447 bool print_operation_count_ ; 448 bool print_help_ ; 449 bool slow_confirm_ ; 450 ppuint p ; 451 int n ; 452 Polynomial test_polynomial_ ; 453 454 // Location of the factorization tables. 455 static string current_working_dir_ ; 456 457 private: 458 // Parse string into tokens. 459 void tokenize( string sentence ) ; 460 461 // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables. 462 void initializeTables() ; 463 464 // Syntax directed translation. Reduce using production A -> beta, computing A's value from values of 465 // the tokens in beta. 466 void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ; 467 468 // Even though these member variables are declared in the base class as protected, they are not visible in this 469 // child class. Because we have a templated class, we must tell the C++ compiler explicitly that these symbols 470 // are from the base class. 471 using Parser<SymbolType, ValueType>::num_states_ ; 472 using Parser<SymbolType, ValueType>::num_productions__ ; 473 using Parser<SymbolType, ValueType>::num_terminals_ ; 474 using Parser<SymbolType, ValueType>::num_non_terminals_ ; 475 476 using Parser<SymbolType, ValueType>::insertAction ; 477 using Parser<SymbolType, ValueType>::insertGoto ; 478 using Parser<SymbolType, ValueType>::insertProduction ; 479 using Parser<SymbolType, ValueType>::insertErrorMessage ; 480 481 using Parser< SymbolType, ValueType >::input_stack_ ; 482 using Parser< SymbolType, ValueType >::parse_stack_ ; 483 using Parser< SymbolType, ValueType >::action_table_ ; 484 using Parser< SymbolType, ValueType >::goto_table_ ; 485 using Parser< SymbolType, ValueType >::error_message_ ; 486 using Parser< SymbolType, ValueType >::production_ ; 487 } ; 488 489 // Static member variable needs to be initialized outside of the class declaration. 490 template< typename SymbolType, typename ValueType > 491 string PolyParser<SymbolType, ValueType>::current_working_dir_ = "" ; 492 493 494 // --------------- Derived class parsers and their symbols and values --------- 495 496 497 498 /*============================================================================= 499 | 500 | NAME 501 | 502 | FactorizationValue 503 | 504 | DESCRIPTION 505 | 506 | Define a parser symbol which containst the number string in one line of 507 | the factorization table and also the prime factorization itself. 508 | This is used in the parser's syntax directed translation and also for 509 | the final value returned from the parser. 510 | 511 +============================================================================*/ 512 513 template< typename IntType > 514 class FactorizationValue 515 { 516 public: 517 FactorizationValue() ; 518 519 FactorizationValue( const FactorizationValue<IntType> & v ) ; 520 521 FactorizationValue( const IntType & p, const int & count ) ; 522 523 FactorizationValue<IntType> & operator=( const FactorizationValue<IntType> & v ) ; 524 525 // Print out the factorization. 526 // Define this templated non-member friend function here, otherwise the compiler will generate 527 // a link error. If the compiler sees only a declaration here, and not the complete implementation, 528 // it doesn't know yet it's a template. 529 // See https://isocpp.org/wiki/faq/templates#template-friends 530 friend ostream & operator<<( ostream & out, const FactorizationValue<IntType> & factorization ) 531 { 532 if (factorization.factor_.size() > 0) 533 { 534 for (auto & f : factorization.factor_) 535 out << f ; 536 } 537 if (factorization.numberString_.length() > 0) 538 out << factorization.numberString_ ; 539 540 return out ; 541 } 542 543 static IntType numberStringToInteger( const string & numberString ) ; 544 545 // Allow direct access to data for convenience. 546 public: 547 string numberString_ ; // Sequence of digits representing an integer. 548 vector< PrimeFactor<IntType> > factor_ ; // Vector of distinct prime factors to powers. 549 } ; 550 551 552 /*============================================================================= 553 | 554 | NAME 555 | 556 | FactorizationSymbols 557 | 558 | DESCRIPTION 559 | 560 | Define factoriziton symbols (tokens). Used in the parser's syntax directed 561 | translation. 562 | 563 +============================================================================*/ 564 565 enum class FactorizationSymbol 566 { 567 // Terminal symbols or tokens. 568 Dollar = 0, Integer, Period, Caret, Backslash, 569 NumTerminals, 570 571 // Nonterminal symbols. 572 S, Factorization, Factor, BigInteger, 573 574 // Count number of total symbols. 575 NumSymbols 576 } ; 577 578 579 580 /*============================================================================= 581 | 582 | NAME 583 | 584 | FactorizationParser 585 | 586 | DESCRIPTION 587 | 588 | Child class for parsing a factorization table of Cunningham numbers p ^ n - 1 589 | inherited from abstract base class Parser. 590 | 591 | EXAMPLE 592 | 593 | Each table for p has one line containing the factorization of p ^ n - 1 of the type 594 | 595 | n <num factors p1 ^ m1 . p2 ^ m2 ... 596 | 597 | Dots separate the factors. Long numbers are continued on the next line with backslashes. 598 | For example, in the table for p = 3, the line below is for n = 398 and has the 12 factors 599 | 2 ^ 4, 3, 3583 ... 11881576435363115110426317067813673631182057431802975257340612015708984828478898969687279497327343 600 | 601 | 398 12 2^4.3.3583.4588543.34266607.2146612951394313997.8670122322845042\ 602 | 61471.3742361194240057889227626965547117.118815764353631151\ 603 | 104263170678136736311820574318029752573406120157089\ 604 | 84828478898969687279497327343 605 | 606 | Here is the LALR(1) grammar. 607 | 608 | Productions: 609 | 610 | S -> integer integer Factorization 611 | Factorization -> Factorization . Factor | Factor 612 | Factor -> BigInteger ^ BigInteger | BigInteger 613 | BigInteger -> BigInteger \ integer | integer 614 | 615 | Terminal symbols: 616 | integer . ^ \ $ 617 | 618 +============================================================================*/ 619 620 template< typename SymbolType, typename ValueType > 621 class FactorizationParser : public Parser< SymbolType, ValueType > 622 { 623 public: 624 // Construct the parser. 625 FactorizationParser() ; 626 627 // Destructor. 628 ~FactorizationParser() ; 629 630 // Copy constructor. 631 FactorizationParser( const FactorizationParser< SymbolType, ValueType > & parser ) ; 632 633 // Assignment operator. 634 FactorizationParser< SymbolType, ValueType > & operator=( const FactorizationParser< SymbolType, ValueType > & parser ) ; 635 636 private: 637 // Parse string into tokens. 638 void tokenize( string sentence ) ; 639 640 // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables. 641 void initializeTables() ; 642 643 // Syntax directed translation. Reduce using production A -> beta, computing A's value from values of 644 // the tokens in beta. 645 void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ; 646 647 // Even though these member variables are declared in the base class as protected, they are not visible in this 648 // child class. We have to tell the compiler explicitly that these symbols are base class templated types so it 649 // knows what they are. 650 using Parser<SymbolType, ValueType>::num_states_ ; 651 using Parser<SymbolType, ValueType>::num_productions__ ; 652 using Parser<SymbolType, ValueType>::num_terminals_ ; 653 using Parser<SymbolType, ValueType>::num_non_terminals_ ; 654 655 using Parser<SymbolType, ValueType>::insertAction ; 656 using Parser<SymbolType, ValueType>::insertGoto ; 657 using Parser<SymbolType, ValueType>::insertProduction ; 658 using Parser<SymbolType, ValueType>::insertErrorMessage ; 659 660 using Parser< SymbolType, ValueType >::input_stack_ ; 661 using Parser< SymbolType, ValueType >::parse_stack_ ; 662 using Parser< SymbolType, ValueType >::action_table_ ; 663 using Parser< SymbolType, ValueType >::goto_table_ ; 664 using Parser< SymbolType, ValueType >::error_message_ ; 665 using Parser< SymbolType, ValueType >::production_ ; 666 } ; 667 668 #endif // __PPPARSER_H__