1 /*==============================================================================
  2 |
  3 |  NAME
  4 |
  5 |     ppParser.h
  6 |
  7 |  DESCRIPTION
  8 |
  9 |     Header file for polynomial parser classes.
 10 |
 11 |     User manual and technical documentation are described in detail in my web page at
 12 |     http://seanerikoconnor.freeservers.com/Mathematics/AbstractAlgebra/PrimitivePolynomials/overview.html
 13 |
 14 |  LEGAL
 15 |
 16 |     Primpoly Version 16.1 - A Program for Computing Primitive Polynomials.
 17 |     Copyright (C) 1999-2021 by Sean Erik O'Connor.  All Rights Reserved.
 18 |
 19 |     This program is free software: you can redistribute it and/or modify
 20 |     it under the terms of the GNU General Public License as published by
 21 |     the Free Software Foundation, either version 3 of the License, or
 22 |     (at your option) any later version.
 23 |
 24 |     This program is distributed in the hope that it will be useful,
 25 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
 26 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 27 |     GNU General Public License for more details.
 28 |
 29 |     You should have received a copy of the GNU General Public License
 30 |     along with this program.  If not, see http://www.gnu.org/licenses/.
 31 |     
 32 |     The author's address is seanerikoconnor!AT!gmail!DOT!com
 33 |     with the !DOT! replaced by . and the !AT! replaced by @
 34 |
 35 ==============================================================================*/
 36 
 37 // Wrap this header file to prevent duplication if it is included
 38 // accidentally more than once.
 39 #ifndef __PPPARSER_H__
 40 
 41 
 42 // Some miscellaneous constants.
 43 #define MAX_NUM_COMMAND_LINE_ARGS 3
 44 const ppuint minModulus{ 2 } ;
 45 const ppuint minDegree{ 2 } ;
 46 
 47 
 48 /*=============================================================================
 49  |
 50  | NAME
 51  |
 52  |     Action
 53  |
 54  | DESCRIPTION
 55  |
 56  |     Types of LR parser actions.
 57  |
 58  +============================================================================*/
 59 
 60 enum class Action
 61 {
 62     Shift, Reduce, Accept, Error
 63 } ;
 64 
 65 
 66 /*=============================================================================
 67  |
 68  | NAME
 69  |
 70  |     enumToString
 71  |
 72  | DESCRIPTION
 73  |
 74  |     Helper function to translate enum types into human readable format.
 75  |
 76  +============================================================================*/
 77 
 78 template< typename SymbolType >
 79 string enumToString( const SymbolType & st ) ;
 80 
 81 
 82 /*=============================================================================
 83  |
 84  | NAME
 85  |
 86  |     Symbol
 87  |
 88  | DESCRIPTION
 89  |
 90  |     Define the VALUE of a symbol.  Used in the parser's syntax directed
 91  |     translation and also for the final polynomial value returned from the
 92  |     parser.
 93  |
 94  +============================================================================*/
 95 
 96 // Tokens and other types of symbols used in parsing.
 97 template< typename SymbolType, typename ValueType >
 98 class Symbol
 99 {
100     public:
101         Symbol() ;
102 
103         Symbol( SymbolType type, int state ) ;
104 
105         Symbol( SymbolType type, int state, ValueType value )  ;
106 
107         Symbol( const Symbol< SymbolType, ValueType > & s ) ;
108 
109         // Destructor.  Virtual so derived class destructor
110         // automatically calls its base class destructor.
111         virtual ~Symbol()
112         {
113         }
114 
115         Symbol< SymbolType, ValueType > & operator=( const Symbol<SymbolType,ValueType> & s ) ;
116 
117         // Print function for Symbol class.
118         // Define this templated non-member friend function here, otherwise the compiler will generate
119         // a link error.  If the compiler sees only a declaration here, it doesn't know yet it's a template.
120         // See https://isocpp.org/wiki/faq/templates#template-friends
121         friend ostream & operator<<( ostream & out, const Symbol<SymbolType,ValueType> & s )
122         {
123             out << enumToString<SymbolType>( s.type_ ) << "   " ;
124             out << s.value_ << "   " ;
125             out << "state " << s.state_ ;
126 
127             return out ;
128         }
129 
130     // Allow direct access to this simple data type for convenience.
131     public:
132         SymbolType type_ ;  // Type of terminal or nonterminal.
133         ValueType  value_ ; // Value of the symbol.
134         int        state_ ; // State or production number.
135 } ;
136 
137 /*=============================================================================
138  |
139  | NAME
140  |
141  |     ActionState
142  |
143  | DESCRIPTION
144  |
145  |     A pair consisting of action and state for the LR parser.
146  |     Actions are (shift s, reduce p, accept, error) and the new state
147  |     number to transition to.
148  |
149  +============================================================================*/
150 
151 class ActionState
152 {
153     public:
154         ActionState() ;
155 
156         ActionState( Action type, int state ) ;
157 
158         // Destructor.  Virtual so derived class destructor
159         // automatically calls its base class destructor.
160         virtual ~ActionState()
161         {
162         }
163 
164         ActionState( const ActionState & as ) ;
165 
166         ActionState & operator=( const ActionState & as ) ;
167 
168         friend ostream & operator<<( ostream & out, const ActionState & as ) ;
169 
170     // Allow direct access to this simple data type for convenience.
171     public:
172         Action action_ ;
173         int    state_ ;
174 } ;
175 
176 /*=============================================================================
177  |
178  | NAME
179  |
180  |     ParserError
181  |
182  | DESCRIPTION
183  |
184  |     Exceptions for the parser class.
185  |
186  +============================================================================*/
187 
188 class ParserError : public runtime_error
189 {
190     public:
191         // Throw with error message, file name and line number.
192         ParserError( const string & description, const string & file, const int & line )
193         : runtime_error( description + " in file " + file + " at line " + to_string(line) )
194         {
195         } ;
196 
197         // Throw with an error message.
198         ParserError( const string & description )
199             : runtime_error( description )
200         {
201         } ;
202 
203         // Throw with default error message.
204         ParserError()
205             : runtime_error( "Parser error:  " )
206         {
207         } ;
208 
209 } ;
210 
211 
212 
213 /*=============================================================================
214 |
215 | NAME
216 |
217 |     Parser
218 |
219 | DESCRIPTION
220 |
221 |     Abstract base class for parsing using an LALR(1) parser.
222 |
223 | EXAMPLE
224 |
225 |     Example of how to use the PolyParser child class for polynomials,
226 |
227 |        // Create the parser.
228 |        PolyParser<PolySymbol, PolyValue> parser() ;
229 |
230 |        // Sample polynomial.
231 |        string s = "2 x ^ 3 + 3 x + 4, 5" ;
232 |
233 |        try 
234 |        {
235 |            PolyValue v = parser.parse( s ) ;
236 |            cout << "Parsing the string = " << s << endl ;
237 |            cout << "We have a polynomial = " << v << endl ;
238 |        }
239 |        catch( ParserError & e )
240 |        {
241 |            cout << "Parser error:  " << e.what() << endl ;
242 |        }
243 |
244 | NOTES
245 |
246 |     I used a parser generator I wrote in Common LISP to generate the LALR(1) parse tables.  See my
247 |     notes,
248 |         http://seanerikoconnor.freeservers.com/ComputerScience/Compiler/ParserGeneratorAndParser/LRandLALRParserGeneratorAndParser.html
249 |     You can also use Flex and Bison (Yacc) to generate the equivalent tables, but I like my own parser generator.
250 |
251 |     While the LALR(1) parser machinery stays the same in this base class, the grammar,
252 |     i.e. parse tables and error messages, will be redefined in the child class, also the syntax
253 |     directed translation.  We try to use the base class lexical analyzer for all child classes, if possible.
254 |  
255 +============================================================================*/
256 
257 template< typename SymbolType, typename ValueType >
258 class Parser
259 {
260     // The basic member functions to create and initialize the parser,
261     // make copies and do a syntax-directed translation of a string.
262     public:
263         // Default construct the parser.
264         Parser() ;
265 
266         // Destructor.  Virtual so derived class destructor
267         // automatically calls its base class destructor.
268         virtual ~Parser() ;
269 
270         // Copy constructor.
271         Parser( const Parser & parser ) ;
272 
273         // Assignment operator.
274         virtual Parser & operator=( const Parser & parser ) ;
275 
276         // Parse an input string, doing a syntax-directed translation, returning the value.
277         ValueType parse( string s ) ;
278 
279     // Pure virtual functions to be defined only in the child classes, since the lexical analyzer, parse tables and
280     // syntax-directed translation depend uniquely on the grammar.
281     //
282     // Defined within the scope of the Parser class or its derived classes only,
283     // so not visible from outside the parser.
284     protected:
285         // Parse string into tokens.
286         virtual void tokenize( string sentence ) = 0 ;
287 
288         // NOTE:  call this in the child class constructor.  It fills in the ACTION, GOTO, and ERROR_MESSAGE tables.
289         virtual void initializeTables() = 0 ;
290 
291         // Reduce using production A -> beta, computing A's value from values of the tokens in beta.
292         virtual void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) = 0 ;
293 
294     // These member values and functions are called directly in the child classes.
295     protected:
296         // Number of states, productions, etc.
297         int num_states_ ;        // Includes the 0 state.
298         int num_productions__ ;   // Number of productions.
299         int num_terminals_ ;     // Number of terminal symbols.
300         int num_non_terminals_ ;  // Number of nonterminal symbols.
301 
302         void insertAction(       int state,   SymbolType terminal, Action actionType, int actionState ) ;
303         void insertGoto(         int state,   SymbolType nonterm,  int    newState ) ;
304         void insertProduction(   int prodNum, SymbolType nonTerm,  int    rhsLength ) ;
305         void insertErrorMessage( int state,   string errorMessage ) ;
306 
307     protected:
308         // Parser stack containing symbols tokenized by lexical analyzer.
309         vector< Symbol<SymbolType, ValueType> > input_stack_ ;
310 
311         // Parse stack containing symbols and states.
312         vector< Symbol<SymbolType, ValueType> > parse_stack_ ;
313 
314         // Action table.  
315         //     action = ACTION_TABLE[ <state at top of parse stack> ][ <lookahead input token> ]
316         vector< vector<ActionState> > action_table_ ;
317 
318         // GOTO table.
319         // For reduce actions,
320         //    new goto state to push = GOTO[ current state ][ nonterminal production symbol ]
321         vector< vector<int> > goto_table_ ;
322 
323         // Error messages.
324         vector<string> error_message_ ;
325 
326         // Productions:  Left side non-terminal symbol + production number.
327         vector< Symbol<SymbolType, ValueType> > production_ ;
328 } ;
329 
330 
331 
332 // --------------- Derived class parsers and their symbols and values ---------
333 
334 
335 
336 /*=============================================================================
337  | 
338  | NAME
339  | 
340  |     PolyValue
341  | 
342  | DESCRIPTION
343  | 
344  |     Define a symbol with a polynomial value.  Used in the parser's syntax directed
345  |     translation and also for the final polynomial value returned from the 
346  |     parser.
347  |
348  +============================================================================*/
349 
350 class PolyValue
351 {
352     public:
353         PolyValue() ;
354 
355         PolyValue( const PolyValue & v ) ;
356 
357         PolyValue & operator=( const PolyValue & v ) ;
358 
359         friend ostream & operator<<( ostream & , const PolyValue & v ) ;
360 
361     // Allow direct access to this simple data type for convenience.
362     public:
363         ppuint           scalar_ ;  // Number which can be a polynomial coefficient, power of x, or the modulus p.
364                                     // Let it be signed for maximum flexibility.  We'll check if it exceeds the allowed
365                                     // range for internal primitive polynomial computations in the parser. 
366         vector<ppuint> f_ ;       // Polynomial f(x).
367 } ;
368 
369 
370 /*=============================================================================
371  |
372  | NAME
373  |
374  |     PolySymbol
375  |
376  | DESCRIPTION
377  |
378  |     Define polynomial symbol (tokens).  Used in the parser's syntax directed
379  |     translation.
380  |
381  +============================================================================*/
382 
383 enum class PolySymbol
384 {
385     // Terminal symbols or tokens.
386     Dollar = 0, Integer, Comma, Ecks, Plus, Exp,
387     NumTerminals,
388 
389     // Nonterminal symbols.
390     S, Mod, Poly, Term, Multiplier, Power,
391     NumSymbols
392 } ;
393 
394 /*=============================================================================
395 |
396 | NAME
397 |
398 |     PolyParser
399 |
400 | DESCRIPTION
401 |
402 |     Child class for parsing a polynomial grammar inheriting from abstract
403 |     base class Parser.
404 |
405 | EXAMPLE
406 |
407 |     Here is the LALR(1) grammar.  Note the use of epsilon productions to
408 |     make the grammar simpler.
409 |
410 |     Productions:
411 |
412 |         S          -> Poly Mod
413 |         Mod        -> comma integer | EPSILON
414 |         Poly       -> Poly + Term | Term
415 |         Term       -> Multiplier Power
416 |         Multiplier -> integer | EPSILON
417 |         Power      -> x | x ^ integer | EPSILON
418 |
419 |     Terminal symbols:
420 |   
421 |         comma + integer ^ x $
422 |
423 +============================================================================*/
424 
425 template< typename SymbolType, typename ValueType >
426 class PolyParser : public Parser< SymbolType, ValueType >
427 {
428     public:
429         // Construct the parser.
430         PolyParser() ;
431 
432         // Destructor.
433         ~PolyParser() ;
434 
435         // Copy constructor.
436         PolyParser( const PolyParser< SymbolType, ValueType > & parser ) ;
437 
438         // Assignment operator.
439         PolyParser< SymbolType, ValueType > & operator=( const PolyParser< SymbolType, ValueType > & parser ) ;
440 
441         // Command line argument parsing.
442         void parseCommandLine( int argc, const char * argv[] ) ;
443 
444         // Allow direct access to these command line parameters for convenience.
445         bool   test_polynomial_for_primitivity_ ;
446         bool   list_all_primitive_polynomials_ ;
447         bool   print_operation_count_ ;
448         bool   print_help_ ;
449         bool   slow_confirm_ ;
450         ppuint p ;
451         int    n ;
452         Polynomial test_polynomial_ ;
453 
454         // Location of the factorization tables.
455         static string current_working_dir_ ;
456 
457     private:
458         // Parse string into tokens.
459         void tokenize( string sentence ) ;
460 
461         // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables.
462         void initializeTables() ;
463 
464         // Syntax directed translation.  Reduce using production A -> beta, computing A's value from values of 
465         // the tokens in beta.
466         void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ;
467 
468         // Even though these member variables are declared in the base class as protected, they are not visible in this
469         // child class.  Because we have a templated class, we must tell the C++ compiler explicitly that these symbols
470         // are from the base class.
471         using Parser<SymbolType, ValueType>::num_states_ ;
472         using Parser<SymbolType, ValueType>::num_productions__ ;
473         using Parser<SymbolType, ValueType>::num_terminals_ ;
474         using Parser<SymbolType, ValueType>::num_non_terminals_ ;
475 
476         using Parser<SymbolType, ValueType>::insertAction ;
477         using Parser<SymbolType, ValueType>::insertGoto ;
478         using Parser<SymbolType, ValueType>::insertProduction ;
479         using Parser<SymbolType, ValueType>::insertErrorMessage ;
480 
481         using Parser< SymbolType, ValueType >::input_stack_ ;
482         using Parser< SymbolType, ValueType >::parse_stack_ ;
483         using Parser< SymbolType, ValueType >::action_table_ ;
484         using Parser< SymbolType, ValueType >::goto_table_ ;
485         using Parser< SymbolType, ValueType >::error_message_ ;
486         using Parser< SymbolType, ValueType >::production_ ;
487 } ;
488 
489 // Static member variable needs to be initialized outside of the class declaration.
490 template< typename SymbolType, typename ValueType >
491 string PolyParser<SymbolType, ValueType>::current_working_dir_ = "" ;
492 
493 
494 // --------------- Derived class parsers and their symbols and values ---------
495 
496 
497 
498 /*=============================================================================
499  | 
500  | NAME
501  | 
502  |     FactorizationValue
503  | 
504  | DESCRIPTION
505  | 
506  |     Define a parser symbol which containst the number string in one line of
507  |     the factorization table and also the prime factorization itself.
508  |     This is used in the parser's syntax directed translation and also for
509  |     the final value returned from the parser.
510  |
511  +============================================================================*/
512 
513 template< typename IntType >
514 class FactorizationValue
515 {
516     public:
517         FactorizationValue() ;
518 
519         FactorizationValue( const FactorizationValue<IntType> & v ) ;
520 
521         FactorizationValue( const IntType & p, const int & count ) ;
522 
523         FactorizationValue<IntType> & operator=( const FactorizationValue<IntType> & v ) ;
524 
525         // Print out the factorization.
526         // Define this templated non-member friend function here, otherwise the compiler will generate
527         // a link error.  If the compiler sees only a declaration here, and not the complete implementation,
528         // it doesn't know yet it's a template.
529         // See https://isocpp.org/wiki/faq/templates#template-friends
530         friend ostream & operator<<( ostream & out, const FactorizationValue<IntType> & factorization )
531         {
532             if (factorization.factor_.size() > 0)
533             {
534                 for (auto & f : factorization.factor_)
535                     out << f ;
536             }
537             if (factorization.numberString_.length() > 0)
538                 out << factorization.numberString_ ;
539 
540             return out ;
541         }
542 
543         static IntType numberStringToInteger( const string & numberString ) ;
544 
545     // Allow direct access to data for convenience.
546     public:
547         string                         numberString_ ;  // Sequence of digits representing an integer.
548         vector< PrimeFactor<IntType> > factor_ ;        // Vector of distinct prime factors to powers.
549 } ;
550 
551 
552 /*=============================================================================
553  |
554  | NAME
555  |
556  |     FactorizationSymbols
557  |
558  | DESCRIPTION
559  |
560  |     Define factoriziton symbols (tokens).  Used in the parser's syntax directed
561  |     translation.
562  |
563  +============================================================================*/
564 
565 enum class FactorizationSymbol
566 {
567     // Terminal symbols or tokens.
568     Dollar = 0, Integer, Period, Caret, Backslash,
569     NumTerminals,
570 
571     // Nonterminal symbols.
572     S, Factorization, Factor, BigInteger,
573 
574     // Count number of total symbols.
575     NumSymbols
576 } ;
577 
578 
579 
580 /*=============================================================================
581 |
582 | NAME
583 |
584 |     FactorizationParser
585 |
586 | DESCRIPTION
587 |
588 |     Child class for parsing a factorization table of Cunningham numbers p ^ n - 1
589 |     inherited from abstract base class Parser.
590 |
591 | EXAMPLE
592 |
593 |     Each table for p has one line containing the factorization of p ^ n - 1 of the type
594 |
595 |          n <num factors p1 ^ m1 . p2 ^ m2 ...
596 |
597 |     Dots separate the factors.  Long numbers are continued on the next line with backslashes.
598 |     For example, in the table for p = 3, the line below is for n = 398 and has the 12 factors
599 |     2 ^ 4, 3, 3583 ... 11881576435363115110426317067813673631182057431802975257340612015708984828478898969687279497327343
600 |       
601 |         398    12  2^4.3.3583.4588543.34266607.2146612951394313997.8670122322845042\
602 |                    61471.3742361194240057889227626965547117.118815764353631151\
603 |                    104263170678136736311820574318029752573406120157089\
604 |                    84828478898969687279497327343
605 |
606 |     Here is the LALR(1) grammar.  
607 | 
608 |     Productions:
609 |
610 |         S             -> integer integer Factorization
611 |         Factorization -> Factorization . Factor | Factor
612 |         Factor        -> BigInteger ^ BigInteger | BigInteger
613 |         BigInteger    -> BigInteger \ integer | integer
614 |
615 |     Terminal symbols:
616 |         integer . ^ \ $
617 |
618 +============================================================================*/
619 
620 template< typename SymbolType, typename ValueType >
621 class FactorizationParser : public Parser< SymbolType, ValueType >
622 {
623     public:
624         // Construct the parser.
625         FactorizationParser() ;
626 
627         // Destructor.
628         ~FactorizationParser() ;
629 
630         // Copy constructor.
631         FactorizationParser( const FactorizationParser< SymbolType, ValueType > & parser ) ;
632 
633         // Assignment operator.
634         FactorizationParser< SymbolType, ValueType > & operator=( const FactorizationParser< SymbolType, ValueType > & parser ) ;
635 
636     private:
637         // Parse string into tokens.
638         void tokenize( string sentence ) ;
639 
640         // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables.
641         void initializeTables() ;
642 
643         // Syntax directed translation.  Reduce using production A -> beta, computing A's value from values of 
644         // the tokens in beta.
645         void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ;
646 
647         // Even though these member variables are declared in the base class as protected, they are not visible in this
648         // child class.  We have to tell the compiler explicitly that these symbols are base class templated types so it
649         // knows what they are.
650         using Parser<SymbolType, ValueType>::num_states_ ;
651         using Parser<SymbolType, ValueType>::num_productions__ ;
652         using Parser<SymbolType, ValueType>::num_terminals_ ;
653         using Parser<SymbolType, ValueType>::num_non_terminals_ ;
654 
655         using Parser<SymbolType, ValueType>::insertAction ;
656         using Parser<SymbolType, ValueType>::insertGoto ;
657         using Parser<SymbolType, ValueType>::insertProduction ;
658         using Parser<SymbolType, ValueType>::insertErrorMessage ;
659 
660         using Parser< SymbolType, ValueType >::input_stack_ ;
661         using Parser< SymbolType, ValueType >::parse_stack_ ;
662         using Parser< SymbolType, ValueType >::action_table_ ;
663         using Parser< SymbolType, ValueType >::goto_table_ ;
664         using Parser< SymbolType, ValueType >::error_message_ ;
665         using Parser< SymbolType, ValueType >::production_ ;
666 } ;
667 
668 #endif // __PPPARSER_H__