1 #|----------------------------------------------------------------------------- 2 3 NAME 4 5 LR(1)AndLALR(1)Parser.lsp 6 7 DESCRIPTION 8 9 Bottom up LR(1)/LALR(1) parser. It halts and either accepts a sentence in 10 an LR(1) or LALR(1) grammar or it prints an error message. 11 12 13 CALLING SEQUENCE 14 15 Once you are in a Common Lisp interpreter, load this file, 16 17 (load "LR(1)AndLALR(1)Parser.lsp") 18 19 The normal calling sequence is 20 21 (parser "parse-tables.dat" "parse-input.dat" "parse-output.dat") 22 23 You can do an automated test it by calling, 24 25 (test-parser) 26 27 you may have to change the base directory location in the function test-parser. 28 29 Online documentation when you're in the lisp interpreter is given by the 30 standard documentation function, 31 32 (apropos 'element-of?) 33 => ELEMENT-OF? 34 (documentation 'element-of? 'function) 35 (documentation '*productions* 'variable) 36 37 38 INPUT FILES: 39 40 parse-tables.dat A numbered list of productions for the grammar, 41 followed by the LR(1) or LALR(1) parsing action and 42 goto tables, followed by a table of error messages. 43 See the file parse-tables.dat for an example. 44 45 parse-input.dat A sequence of sentences to parse. See the file 46 parse-input.dat for an example. 47 48 You can use UNIX's yacc compiler-compiler to generate the parse tables 49 above. Run yacc with the -v option. It generates the y.output file 50 which contains the parsing action and goto tables. 51 52 You can also run my program LR(1)AndLALR(1)ParserGenerator.lsp 53 to get the action and goto tables. 54 55 You'll need to create the error messages yourself, either by looking at 56 the goto graph output of LR(1)AndLALR(1)ParserGenerator.lsp 57 or by the state of the parse in yacc's y.output file. 58 59 60 OUTPUT FILES: 61 62 parse-output.dat The results of the parse on the input file. See 63 "parse-output.dat" for an example of correct output. 64 65 66 METHOD 67 68 We use algorithm 4.7 [Aho 86, pgs. 216-220] which works like this: 69 70 The initial parser configuration is 71 72 (s0 | a1 ... an $) 73 74 where a1 ... an is the input and s0 = 0 is the initial state. 75 The parse stack is to the left of the bar and the unprocessed 76 input is to the right. Now suppose the configuration is 77 78 (s0 x1 ... xm sm | ai ai+1 ... an $) 79 80 There are four possible things we can do: 81 82 (1) Shift the input. ACTION[ sm, ai ] = shift s 83 84 (s0 X1 ... Xm sm ai s | ai+1 ... an $) 85 86 (2) Reduce. ACTION[ sm, ai ] = reduce( A -> beta ) 87 88 (s0 X1 ... Xm-r sm-r A s | ai ai+1 ... an $) 89 90 where s = GOTO[ sm-r, A ] and r = length( beta ) 91 92 (3) Accept (i.e. halt). ACTION[ sm, ai ] = accept 93 94 The sentence is in the grammar; we halt and accept it. 95 96 (4) Abort with error. ACTION[ sm, ai ] = error 97 98 We produce the error message using the current parsing state 99 lookahead symbol ai. 100 101 REFERENCES 102 103 See http://www.seanerikoconnor.freeservers.com for a review of the 104 parsing theory behind this program. 105 106 107 [Aho 86] COMPILERS: PRINCIPLES, TECHNIQUES, AND TOOLS, 108 Alfred V. Aho, Ravi Sethi, and Jeffrey D. Ullman, 109 Addison-Wesley, 1986. 110 111 [Aho 74] "LR Parsing", Alfred V. Aho and Stephen C. Johnson, 112 Computing Surveys, Vol. 6, No. 2, June 1974, pg. 99-124. 113 114 115 AUTHOR 116 117 Sean E. O'Connor 6 Jun 1989 Version 1.0 118 03 Feb 2006 Version 5.4 released. 119 17 Jan 2008 Version 5.6 released. 120 121 LEGAL 122 123 LR(1)AndLALR(1)ParserGenerator Version 5.6 124 An LR(1) and LALR(1) Parser Generator written in Common Lisp. 125 126 Copyright (C) 1989-2024 by Sean Erik O'Connor. All Rights Reserved. 127 128 This program is free software: you can redistribute it and/or modify 129 it under the terms of the GNU General Public License as published by 130 the Free Software Foundation, either version 3 of the License, or 131 (at your option) any later version. 132 133 This program is distributed in the hope that it will be useful, 134 but WITHOUT ANY WARRANTY; without even the implied warranty of 135 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 136 GNU General Public License for more details. 137 138 You should have received a copy of the GNU General Public License 139 along with this program. If not, see <http://www.gnu.org/licenses/>. 140 141 The author's address is seanerikoconnor!AT!gmail!DOT!com 142 with the !DOT! replaced by . and the !AT! replaced by @ 143 144 BUGS 145 146 We'd like to modify the data type of the stack elements so we can 147 associate a semantic action with each reduction. 148 149 -----------------------------------------------------------------------------|# 150 151 152 ; ------------------------------------------------------------------------------ 153 ; | Global Variables | 154 ; ------------------------------------------------------------------------------ 155 156 (defvar *productions* nil 157 " List of productions of the unaugmented grammar." 158 ) 159 160 (defvar *action-table* nil) ; LR(1) or LALR(1) action table. 161 162 (defvar *goto-table* nil) ; LR(1) or LALR(1) goto table. 163 164 (defvar *error-message* nil) ; Table of error message for each state. 165 166 (defvar *input-stack* nil) ; Input stack of not yet processed symbols. 167 168 (defvar *old-input-stack* nil) ; Already processed input. 169 170 (defvar *parse-stack* nil) ; Parser stack. 171 172 (defvar *terminals* nil) 173 174 (defvar *goto-graph* nil) 175 176 (proclaim '(special *goto-table*)) 177 (proclaim '(special *error-messages*)) 178 179 (proclaim '(special *input-stack*)) 180 (proclaim '(special *old-input-stack*)) 181 (proclaim '(special *parse-stack*)) 182 183 ; DATA STRUCTURES 184 185 ; *productions* = (PRODUCTION1 PRODUCTION2...) 186 ; production = (A -> B C D ...) 187 188 189 ; *action-table* = (TABLE-LINE1 TABLE-LINE2 ...) 190 ; table-line = ((STATE1 STATE2 ...) LIST-OF-ACTIONS) 191 ; list-of-actions = (ACTION-PAIR1 ACTION-PAIR2 ...) 192 ; action-pair = (TRIGGER-SYMBOL ACTION) 193 ; action = (S i), (R i), (ACC NIL) 194 195 ; *goto-table* = (TABLE-LINE1 TABLE-LINE2 ...) 196 ; table-line = ((STATE1 STATE2 ...) LIST-OF-GOTOS) 197 ; list-of-gotos = (GOTO-PAIR1 GOTO-PAIR2 ...) 198 ; goto-pair = (TRIGGER-SYMBOL GOTO-STATE) 199 ; trigger-symbol = any nonterminal or DEFAULT 200 ; 201 ; ------------------------------------------------------------------------------ 202 203 204 205 ; ------------------------------------------------------------------------------ 206 ; | print-legal-notice | 207 ; ------------------------------------------------------------------------------ 208 ; 209 ; DESCRIPTION 210 ; 211 ; Write legal notice when the program starts up. 212 ; 213 ; CALLING SEQUENCE 214 ; 215 ; Returns: Legal notice to standard output. 216 ; 217 ; EXAMPLE 218 ; 219 ; ------------------------------------------------------------------------------ 220 221 (defun print-legal-notice() 222 223 ; Print a few newlines, the notice and a few more newlines. 224 (format t "~%~%~A~%~%" 225 " 226 LR(1)AndLALR(1)Parser Version 5.6 227 228 An LR(1) and LALR(1) Parser written in Common Lisp. 229 230 Copyright (C) 1989-2024 by Sean Erik O'Connor. All Rights Reserved. 231 232 This program is free software: you can redistribute it and/or modify 233 it under the terms of the GNU General Public License as published by 234 the Free Software Foundation, either version 3 of the License, or 235 (at your option) any later version. 236 237 This program is distributed in the hope that it will be useful, 238 but WITHOUT ANY WARRANTY; without even the implied warranty of 239 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 240 GNU General Public License for more details. 241 242 You should have received a copy of the GNU General Public License 243 along with this program. If not, see <http://www.gnu.org/licenses/>. 244 245 The author's address is seanerikoconnor!AT!gmail!DOT!com 246 with the !DOT! replaced by . and the !AT! replaced by @" 247 248 ) 249 ) 250 251 252 253 ; ********************************* Input I/O ******************************** 254 255 256 ; ------------------------------------------------------------------------------ 257 ; | print-file-to-console | 258 ; ------------------------------------------------------------------------------ 259 ; 260 ; DESCRIPTION 261 ; 262 ; List the lines of a file to the console. 263 ; 264 ; CALLING SEQUENCE 265 ; 266 ; (print-file-to-console filename) 267 ; 268 ; filename Name of the file. 269 ; 270 ; Returns: 271 ; 272 ; EXAMPLE 273 ; 274 ; (print-file-to-console "grammar.dat") 275 ; => ; GrammarE=E+T_T.dat 276 ; --------------------------------------------------------------------------- 277 ; 278 ; A grammar of arithmetic expressions, 279 ; 280 ; E -> E + T | T 281 ; ... 282 ; 283 ; ------------------------------------------------------------------------------ 284 285 (defun print-file-to-console( file-name ) 286 (format t "~%~%=========================== ~A =============================~%~%~%" file-name) 287 288 (with-open-file (stream file-name) 289 (do ( (line (read-line stream nil) ; nil inhibits throw at eof 290 (read-line stream nil) ) ; and read-line returns nil at eof 291 ) 292 ( (null line) ) ; Terminate at eof 293 (format t "~A~%" line) 294 ) 295 ) 296 ) 297 298 ; ------------------------------------------------------------------------------ 299 ; | load-input-initialize-parser | 300 ; ------------------------------------------------------------------------------ 301 ; 302 ; DESCRIPTION 303 ; 304 ; Load the productions and the parsing action and goto tables from file. 305 ; 306 ; CALLING SEQUENCE 307 ; 308 ; (load-input-initialize-parser filename) 309 ; 310 ; filename Name of the file containing a numbered list of productions, 311 ; the parsing action and goto tables, and error messages. 312 ; 313 ; Returns: 314 ; *productions*, *action-table*, *goto-table*, *error-messages*, 315 ; set to their values in the file. *parse-stack*, *input-stack*, 316 ; and *old-input-stack* are set to nil. 317 ; 318 ; EXAMPLE 319 ; 320 ; (load-input-initialize-parser "parse-tables.dat") 321 ; *productions* => ( ((1) (E -> E + T)) 322 ; ((2) (E -> T)) 323 ; ((3) (T -> T * F)) 324 ; ((4) (T -> F)) 325 ; ((5) (F -> [ E ])) 326 ; ((6) (F -> ID)) ) 327 ; 328 ; ------------------------------------------------------------------------------ 329 330 (defun load-input-initialize-parser( parsing-tables-file ) 331 332 333 (let ( (fp (open parsing-tables-file :direction :input)) ) 334 335 (setq *terminals* (read fp)) 336 (setq *productions* (read fp)) 337 (setq *goto-graph* (read fp)) 338 (setq *action-table* (read fp)) 339 (setq *goto-table* (read fp)) 340 (setq *error-messages* (read fp)) 341 342 (setq *parse-stack* nil) 343 (setq *input-stack* nil) 344 (setq *old-input-stack* nil) 345 346 (close fp)) 347 ) 348 349 350 351 352 ; ************************** General List Manipulation ************************* 353 354 (defun element-of?( element list &key (test NIL) ) 355 " 356 DESCRIPTION 357 358 Find out if an atom or a list is a member of a given list. 359 360 CALLING SEQUENCE 361 362 (element-of? element list :test test) 363 => T if element is in list; NIL if not. 364 365 test The name of the function which tests if two symbols are 366 equal. It should be a function of two arguments which 367 returns T if the symbols are equal and NIL otherwise. 368 test defaults to NIL, in which case we use #'equal to 369 compare. 370 371 EXAMPLE 372 373 (element-of? '(hot dog) '((cool cat) (cool dog))) => NIL 374 375 (defun no-value-judgements( s1 s2 ) (equal (second s1) (second s2))) 376 377 (element-of? '(hot dog) '((cool cat) (cool dog)) 378 :test 'no-value-judgements) => T 379 " 380 381 (cond ( (null list) nil) ; Not in the list. 382 383 ( (if (not (null test)) ; First item matches... 384 385 (funcall test element (first list)) ; ... according to test function 386 387 (equal element (first list))) ; ... according to equal. 388 389 t) 390 391 ( t (element-of? element (rest list) ; Try again on rest of list. 392 :test test))) 393 ) 394 395 396 397 398 ; ------------------------------------------------------------------------------ 399 ; | rfirst, rrest and rcons | 400 ; ------------------------------------------------------------------------------ 401 ; 402 ; DESCRIPTION 403 ; 404 ; Reversed versions of first (car), rest (cdr) and cons. 405 ; 406 ; CALLING SEQUENCE 407 ; 408 ; (rfirst list) => Last element in list. 409 ; (rfirst nil) => nil 410 ; (rrest list) => List with last element deleted. 411 ; (rfirst nil) => nil 412 ; (rcons atom list) => List with atom appended to the end. 413 ; 414 ; EXAMPLE 415 ; 416 ; (rfirst '(I am fnugled)) => fnugled 417 ; (rrest '(I am fnugled)) => (I am) 418 ; (rcons 'fnugled '(I am)) => (I am fnugled) 419 ; 420 ; ------------------------------------------------------------------------------ 421 422 (defun rfirst( list ) 423 424 (first (reverse list)) 425 ) 426 427 (defun rrest( list ) 428 429 (reverse (rest (reverse list))) 430 ) 431 432 (defun rcons( atom list ) 433 434 `(,@list ,atom) 435 ) 436 437 438 439 440 ; ********************************* Predicates ********************************* 441 442 ; ------------------------------------------------------------------------------ 443 ; | shift?, reduce?, accept? and error? | 444 ; ------------------------------------------------------------------------------ 445 ; 446 ; DESCRIPTION 447 ; 448 ; Operators to recognize different parsing actions. 449 ; 450 ; CALLING SEQUENCE 451 ; 452 ; (shift? action) 453 ; (reduce? action) 454 ; (accept? action) 455 ; (error? action) 456 ; 457 ; action The code for the action to perform from the parsing 458 ; action table. 459 ; 460 ; Returns: T if the action matches the code. 461 ; 462 ; EXAMPLE 463 ; 464 ; (shift? '(s 5)) => T 465 ; (reduce? '(r 3)) => T 466 ; (accept? '(acc nil)) => T 467 ; (error? '(error "sample error message")) => T 468 ; 469 ; ------------------------------------------------------------------------------ 470 471 (defun shift?( action ) 472 473 (equal (first action) 's) 474 ) 475 476 (defun reduce?( action ) 477 478 (equal (first action) 'r) 479 ) 480 481 (defun accept?( action ) 482 483 (equal (first action) 'acc) 484 ) 485 486 (defun error?( action ) 487 488 (equal (first action) 'error) 489 ) 490 491 492 493 494 ; ********************************* Table Lookup ******************************* 495 496 ; ------------------------------------------------------------------------------ 497 ; | list-lookup | 498 ; ------------------------------------------------------------------------------ 499 ; 500 ; DESCRIPTION 501 ; 502 ; Lookup an item in a line of an action, goto or error message table by 503 ; state and by symbol. 504 ; 505 ; CALLING SEQUENCE 506 ; 507 ; (list-lookup symbol table :table-type type) 508 ; 509 ; symbol The transition symbol for action or goto. Ignored when 510 ; looking up an error message. You can set it to nil. 511 ; 512 ; list A line of an action, goto or error message table to search. 513 ; 514 ; table-type The type of table to lookup in: 'action or 'goto. 515 ; Defaults to 'action. 516 ; 517 ; Returns: The action to take (for an action table) or the new state 518 ; (for a goto table). 519 ; 520 ; EXAMPLE 521 ; 522 ; Please refer to the file parse-tables.dat. 523 ; 524 ; (list-lookup 'ID '( ([ (S 4)) (ID (S 5)) (DEFAULT ERROR) ) 525 ; :table-type 'action) => (S 5) 526 ; 527 ; (list-lookup 'F '( (T 9) (F 3) (DEFAULT ERROR) ) :table-type 'goto) => 3 528 ; 529 ; ------------------------------------------------------------------------------ 530 531 (defun list-lookup( symbol list &key (table-type 'action) ) 532 533 (cond ( (or (equal (first (first list)) symbol) ; Found symbol. 534 (equal (length list) 1)) ; Not found - use default 535 ; (last) item. 536 (cond ( (or (equal table-type 'action) 537 (equal table-type 'goto)) 538 539 (second (first list))))) 540 541 542 ( t (list-lookup symbol (rest list) :table-type table-type))) 543 ) 544 545 546 ; ------------------------------------------------------------------------------ 547 ; | table-lookup | 548 ; ------------------------------------------------------------------------------ 549 ; 550 ; DESCRIPTION 551 ; 552 ; Lookup an item in an action, goto or error message table by state and 553 ; by symbol. 554 ; 555 ; CALLING SEQUENCE 556 ; 557 ; (table-lookup state symbol table :table-type type) 558 ; 559 ; state The number of the state or production. 560 ; 561 ; symbol The transition symbol for action or goto. Ignored when 562 ; looking up an error message or production. Set it to 563 ; nil for these two cases. 564 ; 565 ; table The action, goto or error message table to search in. 566 ; 567 ; table-type The type of table to lookup in: 'action, 'goto, 568 ; 'error-message or 'production. Defaults to 'action. 569 ; 570 ; Returns: The action to take (for an action table), the new state 571 ; (for a goto table), the error message (for an error message 572 ; table) the production (for the list of productions). 573 ; We return NIL if state was not found. 574 ; 575 ; EXAMPLE 576 ; 577 ; Please refer to the file parse-tables.dat. 578 ; 579 ; (table-lookup 6 'ID *action-table* :table-type 'action) => (S 5) 580 ; 581 ; (table-lookup 6 'F *goto-table* :table-type 'goto) => 3 582 ; 583 ; (table-lookup 66 'F *goto-table* :table-type 'goto) => NIL 584 ; 585 ; (table-lookup 6 nil *error-messages* :table-type 'error-message) 586 ; => "Missing id or left parenthesis" 587 ; 588 ; (table-lookup 6 nil *productions* :table-type 'production) 589 ; => (F -> ID) 590 ; 591 ; ------------------------------------------------------------------------------ 592 593 (defun table-lookup( state symbol table &key (table-type 'action) ) 594 595 (let ((first-line-of-table (first table))) 596 597 ; Does this line contain the state we want? 598 599 (cond ( (null table) nil) 600 601 ( (element-of? state (first first-line-of-table)) 602 603 ; If so, find the entry for the corresponding symbol. 604 605 (cond ( (equal table-type 'error-message) 606 607 (first (second first-line-of-table)) ) 608 609 ( (equal table-type 'production) 610 611 (second first-line-of-table) ) 612 613 (t (list-lookup symbol (second first-line-of-table) 614 :table-type table-type)))) 615 616 ; State wasn't found in the first line of the table. Search the rest of the 617 ; table. 618 619 ( t (table-lookup state symbol (rest table) :table-type table-type)))) 620 ) 621 622 623 624 625 626 ; ****************************** Parsing Functions ***************************** 627 628 ; ------------------------------------------------------------------------------ 629 ; | shift! | 630 ; ------------------------------------------------------------------------------ 631 ; 632 ; DESCRIPTION 633 ; 634 ; Make one parsing shift move. 635 ; 636 ; CALLING SEQUENCE 637 ; 638 ; (shift! state-to-shift) 639 ; 640 ; state-to-shift State to be shifted onto the top of the stack. 641 ; 642 ; Returns: (SHIFT state-to-shift) 643 ; We transfer one token from the input stack to the 644 ; parse stack and also append it to the old input stack. 645 ; We also place state-to-shift on top of the parse stack. 646 ; 647 ; EXAMPLE 648 ; 649 ; (setq *parse-stack* '(0)) 650 ; (setq *input-stack* '(id + [ id * id ] $)) 651 ; 652 ; (shift! 5) => (SHIFT 5) 653 ; 654 ; *parse-stack* => (0 id 5) 655 ; *input-stack* => (+ [ id * id ] $) 656 ; 657 ; ------------------------------------------------------------------------------ 658 659 (defun shift!( state-to-shift ) 660 661 ; Shift one token from the input stack to the parse stack. 662 ; Save the token on the old input stack. 663 ; Pop this token from the input stack. 664 665 (setq *parse-stack* (rcons (first *input-stack*) *parse-stack*)) 666 667 (setq *old-input-stack* (rcons (first *input-stack*) *old-input-stack*)) 668 669 (setq *input-stack* (rest *input-stack*)) 670 671 672 ; Shift the new state onto the parse stack. 673 674 (setq *parse-stack* (rcons state-to-shift *parse-stack*)) 675 676 `(shift ,state-to-shift) 677 ) 678 679 680 681 682 ; ------------------------------------------------------------------------------ 683 ; | reduce! | 684 ; ------------------------------------------------------------------------------ 685 ; 686 ; DESCRIPTION 687 ; 688 ; Make one parsing reduce move. 689 ; 690 ; CALLING SEQUENCE 691 ; 692 ; (reduce! production-number) 693 ; 694 ; production-number State to be shifted onto the top of the stack. 695 ; 696 ; Returns: (REDUCE production-number production) 697 ; 698 ; Pop twice the number of tokens in the right hand side 699 ; of the production off the parse stack. 700 ; 701 ; Find out the goto state, then push the left hand side 702 ; of the production and the goto state onto the parse 703 ; stack. 704 ; 705 ; EXAMPLE 706 ; 707 ; (setq *parse-stack* '(0 id 5)) 708 ; (setq *input-stack* '(id + [ id * id ] $)) 709 ; 710 ; (reduce! 6) => (REDUCE 6 (F -> ID)) 711 ; 712 ; *parse-stack* => (0 F 3) 713 ; *input-stack* => (ID + [ ID * ID ] $) 714 ; 715 ; ------------------------------------------------------------------------------ 716 717 (defun reduce!( production-number ) 718 719 ; Fetch the production. 720 721 (let* ( (production (table-lookup production-number nil *productions* 722 :table-type 'production)) 723 724 ; Get the production's right hand side length and its left hand side 725 ; non-terminal. If it is an epsilon production, A -> EPSILON, the 726 ; length is zero. 727 728 (production-length 729 730 (if (equal (last production) '(EPSILON)) 731 732 0 733 (length (nthcdr 2 production)))) 734 735 (non-term (first production)) 736 (goto-state nil) ) 737 738 739 740 ; Pop off the grammar symbols and states corresponding to the production. 741 742 (setq *parse-stack* (reverse (nthcdr (* 2 production-length) 743 (reverse *parse-stack*)))) 744 745 ; Find out the goto state. 746 747 (setq goto-state (table-lookup (rfirst *parse-stack*) 748 non-term 749 *goto-table*)) 750 751 ; Push the non-terminal onto the parse stack. 752 753 (setq *parse-stack* (rcons non-term *parse-stack*)) 754 755 756 ; Push the goto state. 757 758 (setq *parse-stack* (rcons goto-state *parse-stack*)) 759 760 761 `(reduce ,production-number ,production)) 762 763 ) 764 765 766 767 768 ; ------------------------------------------------------------------------------ 769 ; | error-message | 770 ; ------------------------------------------------------------------------------ 771 ; 772 ; DESCRIPTION 773 ; 774 ; Return an error message. 775 ; 776 ; CALLING SEQUENCE 777 ; 778 ; (error-message) 779 ; 780 ; Returns: (ERROR "error message") 781 ; The error message is based upon the current state on 782 ; top of the stack. 783 ; 784 ; EXAMPLE 785 ; 786 ; (setq *parse-stack* '(0 E 1 + 6)) 787 ; 788 ; (error-message) => (ERROR "Missing id or left parenthesis") 789 ; 790 ; ------------------------------------------------------------------------------ 791 792 (defun error-message() 793 794 (let ( (state (rfirst *parse-stack*)) ) 795 796 797 ; Lookup the error message. 798 799 `(error ,(table-lookup state nil *error-messages* 800 :table-type 'error-message))) 801 ) 802 803 804 805 806 ; ------------------------------------------------------------------------------ 807 ; | parse-one-step | 808 ; ------------------------------------------------------------------------------ 809 ; 810 ; DESCRIPTION 811 ; 812 ; Make one parsing step. 813 ; 814 ; CALLING SEQUENCE 815 ; 816 ; (parse-one-step) 817 ; 818 ; Returns: (ACCEPT), (ERROR <error message>), (SHIFT state) 819 ; or (REDUCE production-number production). 820 ; 821 ; Make changes to *parse-stack*, *input-stack* and 822 ; *old-input-stack* using the algorithm described 823 ; under METHOD in the introduction. 824 ; 825 ; EXAMPLE 826 ; 827 ; (setq *parse-stack* '(0 id 5)) 828 ; (setq *input-stack* '(id + [ id * id ] $)) 829 ; 830 ; (parse-one-step) => (REDUCE 6 (F -> ID)) 831 ; 832 ; *parse-stack* => (0 F 3) 833 ; *input-stack* => (ID + [ ID * ID ] $) 834 ; 835 ; ------------------------------------------------------------------------------ 836 837 (defun parse-one-step() 838 839 ; Action from action table based on state on top of stack and lookahead. 840 841 (let ( (action (table-lookup (rfirst *parse-stack*) 842 (first *input-stack*) 843 *action-table*)) ) 844 845 ; Based on the action, update the parse and input stacks. 846 (cond ( (shift? action) (shift! (second action))) 847 ( (reduce? action) (reduce! (second action))) 848 ( (accept? action) '(accept)) 849 ( (error? action) (error-message))) 850 ) 851 852 ) 853 854 855 856 857 ; ********************************* Main program ******************************* 858 859 ; ------------------------------------------------------------------------------ 860 ; | parser | 861 ; ------------------------------------------------------------------------------ 862 ; 863 ; DESCRIPTION 864 ; 865 ; Main program which parses a sentence in an LR(1) or LALR(1) grammar. 866 ; 867 ; CALLING SEQUENCE 868 ; 869 ; (parser parse-file in-file out-file) 870 ; 871 ; parse-file Name of file containing productions, action and goto 872 ; tables. 873 ; 874 ; in-file Name of file containing the sentences to be parsed. 875 ; 876 ; out-file Parsing results. 877 ; 878 ; EXAMPLE 879 ; 880 ; See the files "parse-input.dat" and "parse-output.dat" for sample 881 ; input and output. 882 ; 883 ; ------------------------------------------------------------------------------ 884 885 (defun parser( parse-file in-file out-file) 886 887 (let ( (fp1 (open in-file :direction :input)) 888 (fp2 (open out-file :direction :output :if-exists :supersede)) 889 (parse-action nil) 890 (raw-input nil) ) 891 892 ; Obligatory legal notice. 893 (print-legal-notice) 894 895 (load-input-initialize-parser parse-file) 896 897 ; Parse each sentence in the input. 898 899 (loop 900 901 ; Exit upon end of file. 902 903 (setq raw-input (read fp1 nil 'eof)) 904 905 (if (equal raw-input 'eof) (return)) 906 907 908 ; Read the next input sentence and append the end of input delimiter, $. 909 910 (setq *input-stack* (rcons '$ raw-input)) 911 912 913 ; Print an introductory header. 914 915 (write-line (format nil "~%~%~%~A~S~%" 916 "Parsing the sentence: " raw-input) fp2) 917 918 (write-line (format nil "~35A~20@A~3A~25A~%" 919 "PARSE STACK" 920 "INPUT STACK" " " 921 "ACTION") fp2) 922 923 (setq *parse-stack* '(0)) 924 (setq *old-input-stack* nil) 925 926 927 ; Parse each sentence. 928 929 (loop 930 931 ; Print the current parser configuration. 932 (format fp2 "~35S~20@S~3A" 933 *parse-stack* 934 *input-stack* " ") 935 936 ; One step of parsing, with updates to input and parse stacks. 937 (setq parse-action (parse-one-step)) 938 939 940 ; Print the parser action. 941 (cond ( (equal (first parse-action) 'error) 942 943 ; Declare error. 944 (write-line (format nil "~25A~%" 945 "ERROR") fp2) 946 947 ; Print the error message. 948 (write-line (format nil "~A~%" (second parse-action)) fp2) 949 ) 950 951 (t 952 (write-line (format nil "~25S~%" parse-action) fp2)) 953 ) 954 955 956 ; Accept the sentence, or halt with error. 957 958 (cond ( (equal (first parse-action) 'accept) 959 960 (write-line (format nil "~A~%" 961 "Sentence was grammatical.") 962 fp2) 963 (fresh-line fp2) 964 (fresh-line fp2) 965 (return) ) 966 967 ( (equal (first parse-action) 'error) 968 969 (write-line (format nil "~A~%" 970 "Sentence was not in the grammar.") 971 fp2) 972 (fresh-line fp2) 973 (fresh-line fp2) 974 (return) )))) 975 976 (close fp1) 977 (close fp2)) 978 ) 979 980 981 982 ; ------------------------------------------------------------------------------ 983 ; | parser-compile-all | 984 ; ------------------------------------------------------------------------------ 985 ; 986 ; DESCRIPTION 987 ; 988 ; Compile all the functions in this program, except parser-compile-all itself. 989 ; 990 ; CALLING SEQUENCE 991 ; 992 ; (parser-compile-all) 993 ; 994 ; EXAMPLE 995 ; 996 ; (parser-compile-all) => 997 ; 998 ; ;;; Compiling function LOAD-INPUT-AND-INITIALIZE...tail-merging... 999 ; assembling...emitting...done. 1000 ; 1001 ; --- and so on, with pauses for garbage collection --- 1002 ; 1003 ; ;;; Compiling function PARSER-GENERATOR...assembling...emitting...done 1004 ; NIL 1005 ; 1006 ; ------------------------------------------------------------------------------ 1007 1008 (defun parser-compile-all() 1009 1010 ; Tell the compiler the following variables are global (have dynamic binding). 1011 1012 (proclaim '(special *terminals*)) 1013 (proclaim '(special *goto-graph*)) 1014 (proclaim '(special *productions*)) 1015 (proclaim '(special *action-table*)) 1016 (proclaim '(special *goto-table*)) 1017 (proclaim '(special *error-messages*)) 1018 1019 (proclaim '(special *input-stack*)) 1020 (proclaim '(special *old-input-stack*)) 1021 (proclaim '(special *parse-stack*)) 1022 1023 (let ( (functions-to-compile 1024 1025 '(load-input-initialize-parser 1026 1027 element-of? 1028 rfirst 1029 rrest 1030 rcons 1031 1032 shift? 1033 reduce? 1034 accept? 1035 error? 1036 1037 table-lookup 1038 list-lookup 1039 1040 shift! 1041 reduce! 1042 error-message 1043 1044 parse-one-step 1045 1046 parser 1047 1048 print-file-to-console 1049 file-exists? 1050 base-path! 1051 test-parser))) 1052 1053 1054 ; Compile all the functions, except parser-compile-all itself. 1055 1056 (dolist (function-to-compile functions-to-compile) 1057 1058 (compile function-to-compile))) 1059 ) 1060 1061 1062 (defun component-present-p (value) 1063 (and value (not (eql value :unspecific)))) 1064 1065 (defun directory-pathname-p (p) 1066 (and 1067 (not (component-present-p (pathname-name p))) 1068 (not (component-present-p (pathname-type p))) 1069 p)) 1070 1071 (defun pathname-as-directory (name) 1072 (let ((pathname (pathname name))) 1073 (when (wild-pathname-p pathname) 1074 (error "Can't reliably convert wild pathnames.")) 1075 (if (not (directory-pathname-p name)) 1076 (make-pathname 1077 :directory (append (or (pathname-directory pathname) (list :relative)) 1078 (list (file-namestring pathname))) 1079 :name nil 1080 :type nil 1081 :defaults pathname) 1082 pathname))) 1083 1084 1085 ; ------------------------------------------------------------------------------ 1086 ; | file-exists? | 1087 ; ------------------------------------------------------------------------------ 1088 ; 1089 ; DESCRIPTION 1090 ; 1091 ; Portable way to check if a file or directory exists. 1092 ; 1093 ; CALLING SEQUENCE 1094 ; 1095 ; (file-exists? directory-or-file) 1096 ; 1097 ; directory-or-file Pathname for directory or file 1098 ; Returns: t if it is there, nil if not. 1099 ; 1100 ; 1101 ; EXAMPLES 1102 ; 1103 ; (file-exists? "/NotThere") => nil 1104 ; (file-exists? "/Volumes/seanoconnor") => t 1105 ; 1106 ; ------------------------------------------------------------------------------ 1107 1108 (defun file-exists? (pathname) 1109 "Check if the file exists" 1110 #+(or sbcl lispworks openmcl) 1111 (probe-file pathname) 1112 1113 #+(or allegro cmu) 1114 (or (probe-file (pathname-as-directory pathname)) 1115 (probe-file pathname)) 1116 1117 #+clisp 1118 (or (ignore-errors 1119 (probe-file (pathname-as-file pathname))) 1120 (ignore-errors 1121 (let ((directory-form (pathname-as-directory pathname))) 1122 (when (ext:probe-directory directory-form) 1123 directory-form)))) 1124 1125 #-(or sbcl cmu lispworks openmcl allegro clisp) 1126 (error "file-exists-p not implemented") 1127 ) 1128 1129 1130 1131 ; ------------------------------------------------------------------------------ 1132 ; | base-path! | 1133 ; ------------------------------------------------------------------------------ 1134 ; 1135 ; DESCRIPTION 1136 ; 1137 ; Try to find out where the base directory for the web page is located. 1138 ; 1139 ; CALLING SEQUENCE 1140 ; 1141 ; (base-path!) 1142 ; 1143 ; Returns: String of base path or nil if it can't find it. 1144 ; 1145 ; 1146 ; EXAMPLES 1147 ; 1148 ; (base-path!) => "C:/Sean/WebSite" ; Got it. 1149 ; (base-path!) => nil ; Could't find it. 1150 ; 1151 ; ------------------------------------------------------------------------------ 1152 1153 (defun base-path!() 1154 (let ( (possible-directories-list '( 1155 "/cygdrive/c/Sean/WebSite" ; Windows / Cygwin 1156 "/Users/seanoconnor/Desktop/Sean/WebSite" ; Mac OS 1157 "/home/seanoconnor/Desktop/Sean/WebSite" ; Ubuntu Linux 1158 ))) 1159 1160 (dolist (base-path possible-directories-list) 1161 ; (format t "base path = ~S exists = ~S~%" base-path (file-exists? base-path) ) 1162 (if (file-exists? base-path) (return (concatenate 'string base-path "/")))) 1163 ) 1164 ) 1165 1166 1167 1168 ; ------------------------------------------------------------------------------ 1169 ; | test-parser | 1170 ; ------------------------------------------------------------------------------ 1171 ; 1172 ; DESCRIPTION 1173 ; 1174 ; Run the parser on test input. 1175 ; 1176 ; CALLING SEQUENCE 1177 ; 1178 ; (test-parser) 1179 ; 1180 ; Set the files and paths to your requirements. I'm assuming you've 1181 ; installed cygwin if you're on a Windows machine. 1182 ; 1183 ; ------------------------------------------------------------------------------ 1184 1185 (defun test-parser() 1186 1187 ; Compile all the functions for speed. 1188 (parser-compile-all) 1189 1190 ; Parse sentences using the parse tables. 1191 (let* ( 1192 ; Set up the base directory paths. 1193 (base-path (base-path!)) 1194 (sub-path "ComputerScience/Compiler/ParserGeneratorAndParser/") 1195 (parse-table-path "ParseTables/") 1196 (sentence-path "Sentences/" ) 1197 1198 ; List the parse table files and sentences (inputs) and 1199 ; parsed sentence files (output). 1200 (parse-table-file '( "ParseTablesLALR(1)_S=SaSbEPSILON.dat" 1201 "ParseTablesLALR(1)_E=E+T_T.dat" 1202 "ParseTablesLALR(1)_Poly.dat" ) ) 1203 (sentence-file '( "SentencesS=SaSbEPSILON.dat" 1204 "SentencesE=E+T_T.dat" 1205 "SentencesPoly.dat" ) ) 1206 (parsed-file '( "ParsedSentencesLALR(1)S=SaSbEPSILON.dat" 1207 "ParsedSentencesLALR(1)E=E+T_T.dat" 1208 "ParsedSentencesLALR(1)Poly.dat") ) 1209 ) 1210 1211 (dotimes (i (length parse-table-file)) 1212 (let* ( 1213 ; Create the full file path. 1214 (full-parse-table-file 1215 (concatenate 'string 1216 base-path sub-path parse-table-path 1217 (nth i parse-table-file)) 1218 ) 1219 1220 (full-sentence-file 1221 (concatenate 'string 1222 base-path sub-path sentence-path 1223 (nth i sentence-file)) 1224 ) 1225 1226 (full-parsed-file 1227 (concatenate 'string 1228 base-path sub-path sentence-path 1229 (nth i parsed-file)) 1230 ) 1231 ) 1232 1233 ; Call the parser. 1234 (parser full-parse-table-file full-sentence-file 1235 full-parsed-file) 1236 1237 ; Display the results to the console. 1238 (print-file-to-console full-parsed-file) 1239 (print-file-to-console full-sentence-file) 1240 (print-file-to-console full-parsed-file) 1241 ) 1242 ) 1243 ) 1244 )