#!/usr/local/bin/perl -w use strict; # Dusko Koncaliev # CS63 - Principles of Computer Organization # Programming Project # # This program is a simple expression compiler for the 0-address machine. # It takes an expression of the form X=........ and translates it to # instructions for the 0-address machine, 1-address machine, 2-address machine # and 3-address machine. Note that the 2- and 3-address machines have not # been tested extensively so there may be some errors there. # # Limitations: # Only 1-character variables can be used. # A 2- or more- character variable would result in a wrong result in the # 0-address machine and a crash in the 1-address machine. # # Bugs: # Some inadequate expression may crash the 1-address machine and # produce wrong results for the 0-address machine. # # Wishlist: # Would like the program to be more robust, i.e. to have a better way of # knowing that an entry is a wrong expression. # Accept multi-character variables. # subroutines used: # Get_Expression -- this subroutine prints an introductory paragraph about # the program and gets the expression (checking that the expression is in a # valid form). # Get_Reverse_Polish -- gets the Reverse Polish Notation of the expression. # The algorithm used is similar to that of Tanenbaum, p.268-269 # Zero_Address -- uses the reverse polish notation of the expression to # compile the expression into the 0-address machine instructions, i.e. # PUSH M, POP M, ADD, SUB, MUL, DIV. # Generate_Zero_Address_File -- generates a file # (/clients/users/dusko/www/cs63/progproject/outfile.txt) for the 0-address # machine instructions output that Kennedy would use for his project. # Differs from Zero_Address in that it uses somewhat changed instructions, # i.e. PUSHI, POPI, and HALT, and also defines the addresses for any # variables used. # One_Address -- uses the reverse polish notation of the expression to # compile the expression into the 1-address machine instructions, i.e. # LOAD M, STORE M, ADD M, SUB M, MUL M, DIV M. # Two_Address -- uses the reverse polish notation of the expression to # compile the expression into the 2-address machine instructions, i.e. # MOV M1 M2, ADD M1 M2, SUB M1 M2, MUL M1 M2, DIV M1 M2 # Three_Address -- uses the reverse polish notation of the expression to # compile the expression into the 3-address machine instructions, i.e. # ADD M1 M2 M3, SUB M1 M2 M3, MUL M1 M2 M3, DIV M1 M2 M3 # declare variables: # $expression is the expression that needs to be compiled # $expression_length is the length (number of characters) of $expression # @expression_parts is an array that holds each element of the expression # @reverse_polish contains the reverse polish notation of the expression # @stack is a help stack for the conversion to reverse polish (the Texas track # in Tanenbaum's algorithm) # the rest of the variables are simply helpers (counters, etc) my ($expression, $expression_length, @expression_parts); my ($counter, $tab, $expression_counter, $array_length); my (@reverse_polish, @stack, $previous, @helpstack); my ($element, $increment); # SUBROUTINE Get_Reverse_Polish () # gets the Reverse Polish Notation of the expression # the algorithm used is similar to that of Tanenbaum, p.268-269 sub Get_Reverse_Polish () { push @reverse_polish, $expression_parts[0]; $counter = 1; while ($counter < $array_length) { $increment = 0; if ($expression_parts[$counter] eq "=") { push @stack, $expression_parts[$counter]; } if ($expression_parts[$counter] eq "+") { $previous = pop @stack; if ($previous eq "+" || $previous eq "-" || $previous eq "*" || $previous eq "/") { push @reverse_polish, $previous; $increment = 1; } else { push @stack, $previous; push @stack, $expression_parts[$counter]; } } if ($expression_parts[$counter] eq "-") { $previous = pop @stack; if ($previous eq "+" || $previous eq "-" || $previous eq "*" || $previous eq "/") { push @reverse_polish, $previous; $increment = 1; } else { push @stack, $previous; push @stack, $expression_parts[$counter]; } } if ($expression_parts[$counter] eq "*") { $previous = pop @stack; if ($previous eq "*" || $previous eq "/") { push @reverse_polish, $previous; $increment = 1; } else { push @stack, $previous; push @stack, $expression_parts[$counter]; } } if ($expression_parts[$counter] eq "/") { $previous = pop @stack; if ($previous eq "*" || $previous eq "/") { push @reverse_polish, $previous; $increment = 1; } else { push @stack, $previous; push @stack, $expression_parts[$counter]; } } if ($expression_parts[$counter] eq "(") { push @stack, $expression_parts[$counter]; } if ($expression_parts[$counter] eq ")") { $previous = pop @stack; if ($previous eq "(") { # we do not want anything to happen in this case } else { push @reverse_polish, $previous; $element =""; while ($element ne "(") { $element = pop @stack; if ($element ne "(") { push @reverse_polish, $element; } else { # found the left bracket # delete it from the stack # by not doing anything with it # (it has already been popped from the stack) } } } } if ($expression_parts[$counter] ne "=" && $expression_parts[$counter] ne "+" && $expression_parts[$counter] ne "-" && $expression_parts[$counter] ne "*" && $expression_parts[$counter] ne "/" && $expression_parts[$counter] ne "(" && $expression_parts[$counter] ne ")") { push @reverse_polish, $expression_parts[$counter]; } if ($increment == 0) { $counter++; } } # put the remainder of the elements in @stack into @reverse_polish while (@stack) { push @reverse_polish, pop @stack; } $array_length = @reverse_polish; print "\nRPN : "; for ($counter=0; $counter<$array_length; $counter++) { print "$reverse_polish[$counter] "; } } # SUBROUTINE Zero_Address () # uses the reverse polish notation of the expression to # compile the expression into the 0-address machine instructions sub Zero_Address () { for ($counter=1; $counter<$array_length-1; $counter++) { if ($reverse_polish[$counter] eq "+") { print "ADD\n"; } if ($reverse_polish[$counter] eq "-") { print "SUB\n"; } if ($reverse_polish[$counter] eq "*") { print "MUL\n"; } if ($reverse_polish[$counter] eq "/") { print "DIV\n"; } if ($reverse_polish[$counter] ne "+" && $reverse_polish[$counter] ne "-" && $reverse_polish[$counter] ne "*" && $reverse_polish[$counter] ne "/") { print "PUSH $reverse_polish[$counter]\n"; } } print "POP $reverse_polish[0]\n"; } # SUBROUTINE Generate_Zero_Address_File () # generates a file for the 0-address machine instructions output that Kennedy # would use for his project sub Generate_Zero_Address_File () { my ($output_file, @variable_holder, $variable_count, $var_hold_length); $variable_count = 1; $output_file = "/clients/users/dusko/www/cs63/progproject/outfile.txt"; open (OUTQ, ">$output_file"); for ($counter=1; $counter<$array_length-1; $counter++) { if ($reverse_polish[$counter] eq "+") { print OUTQ "ADD\n"; } if ($reverse_polish[$counter] eq "-") { print OUTQ "SUB\n"; } if ($reverse_polish[$counter] eq "*") { print OUTQ "MUL\n"; } if ($reverse_polish[$counter] eq "/") { print OUTQ "DIV\n"; } if ($reverse_polish[$counter] ne "+" && $reverse_polish[$counter] ne "-" && $reverse_polish[$counter] ne "*" && $reverse_polish[$counter] ne "/") { if ($reverse_polish[$counter] gt "9") { $variable_count++; $variable_holder[$variable_count] = $reverse_polish[$counter]; print OUTQ "PUSHD $reverse_polish[$counter]\n"; } else { print OUTQ "PUSHI $reverse_polish[$counter]\n"; } } } print OUTQ "HALT\n"; $variable_holder[1] = $reverse_polish [0]; $var_hold_length = @variable_holder; for ($variable_count = 1; $variable_count < $var_hold_length; $variable_count ++) { print OUTQ "$variable_holder[$variable_count] "; print OUTQ 1000+(($variable_count-1)*8); print OUTQ "\n"; } close (OUTQ); } # SUBROUTINE One_Address () # uses the reverse polish notation of the expression to # compile the expression into the 1-address machine instructions sub One_Address () { my ($addcounter, $loopcounter, $rpnlength); $addcounter = 0; $counter = 1; while ($reverse_polish[2] ne "=") { $counter = 1; while (($reverse_polish[$counter] ne '+') && ($reverse_polish[$counter] ne '-') && ($reverse_polish[$counter] ne "*") && ($reverse_polish[$counter] ne "/")) { $counter ++; } print "LOAD $reverse_polish[$counter-2]\n"; if ($reverse_polish[$counter] eq "+") { print "ADD $reverse_polish[$counter-1]\n"; } if ($reverse_polish[$counter] eq "-") { print "SUB $reverse_polish[$counter-1]\n"; } if ($reverse_polish[$counter] eq "*") { print "MUL $reverse_polish[$counter-1]\n"; } if ($reverse_polish[$counter] eq "/") { print "DIV $reverse_polish[$counter-1]\n"; } $addcounter++; if ($reverse_polish[$counter+1] ne "=") { print "STORE R$addcounter\n"; $reverse_polish[$counter] = "R$addcounter"; } else { print "STORE $reverse_polish[0]\n"; $reverse_polish[$counter] = $reverse_polish[0]; } $rpnlength = @reverse_polish; for ($loopcounter = $counter; $loopcounter < $rpnlength; $loopcounter++) { $reverse_polish[$loopcounter-2] = $reverse_polish[$loopcounter]; } pop @reverse_polish; pop @reverse_polish; } } # SUBROUTINE Two_Address () # uses the reverse polish notation of the expression to # compile the expression into the 2-address machine instructions sub Two_Address () { my ($addcounter, $loopcounter, $rpnlength); $addcounter = 0; $counter = 1; while ($reverse_polish[2] ne "=") { $counter = 1; while (($reverse_polish[$counter] ne '+') && ($reverse_polish[$counter] ne '-') && ($reverse_polish[$counter] ne "*") && ($reverse_polish[$counter] ne "/")) { $counter ++; } $addcounter++; if ($reverse_polish[$counter+1] ne "=") { print "MOV R$addcounter $reverse_polish[$counter-2]\n"; } else { print "MOV $reverse_polish[0] $reverse_polish[$counter-2]\n"; } if ($reverse_polish[$counter] eq "+") { if ($reverse_polish[$counter+1] ne "=") { print "ADD R$addcounter $reverse_polish[$counter-1]\n"; } else { print "ADD $reverse_polish[0] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter] eq "-") { if ($reverse_polish[$counter+1] ne "=") { print "SUB R$addcounter $reverse_polish[$counter-1]\n"; } else { print "SUB $reverse_polish[0] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter] eq "*") { if ($reverse_polish[$counter+1] ne "=") { print "MUL R$addcounter $reverse_polish[$counter-1]\n"; } else { print "MUL $reverse_polish[0] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter] eq "/") { if ($reverse_polish[$counter+1] ne "=") { print "DIV R$addcounter $reverse_polish[$counter-1]\n"; } else { print "DIV $reverse_polish[0] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter+1] ne "=") { $reverse_polish[$counter] = "R$addcounter"; } else { $reverse_polish[$counter] = $reverse_polish[0]; } $rpnlength = @reverse_polish; for ($loopcounter = $counter; $loopcounter < $rpnlength; $loopcounter++) { $reverse_polish[$loopcounter-2] = $reverse_polish[$loopcounter]; } pop @reverse_polish; pop @reverse_polish; } } # SUBROUTINE Three_Address () # uses the reverse polish notation of the expression to # compile the expression into the 3-address machine instructions sub Three_Address () { my ($addcounter, $loopcounter, $rpnlength); $addcounter = 0; $counter = 1; while ($reverse_polish[2] ne "=") { $counter = 1; while (($reverse_polish[$counter] ne '+') && ($reverse_polish[$counter] ne '-') && ($reverse_polish[$counter] ne "*") && ($reverse_polish[$counter] ne "/")) { $counter ++; } $addcounter++; if ($reverse_polish[$counter] eq "+") { if ($reverse_polish[$counter+1] ne "=") { print "ADD R$addcounter $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } else { print "ADD $reverse_polish[0] $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter] eq "-") { if ($reverse_polish[$counter+1] ne "=") { print "SUB R$addcounter $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } else { print "SUB $reverse_polish[0] $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter] eq "*") { if ($reverse_polish[$counter+1] ne "=") { print "MUL R$addcounter $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } else { print "MUL $reverse_polish[0] $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter] eq "/") { if ($reverse_polish[$counter+1] ne "=") { print "DIV R$addcounter $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } else { print "DIV $reverse_polish[0] $reverse_polish[$counter-2] $reverse_polish[$counter-1]\n"; } } if ($reverse_polish[$counter+1] ne "=") { $reverse_polish[$counter] = "R$addcounter"; } else { $reverse_polish[$counter] = $reverse_polish[0]; } $rpnlength = @reverse_polish; for ($loopcounter = $counter; $loopcounter < $rpnlength; $loopcounter++) { $reverse_polish[$loopcounter-2] = $reverse_polish[$loopcounter]; } pop @reverse_polish; pop @reverse_polish; } } # SUBROUTINE Get_Expression () # this subroutine prints an introductory paragraph about the program # and gets the expression (checking that the expression is in a valid # form). sub Get_Expression () { my ($error, $message); $error = 1; $message = "\n"; print "\nThis program takes an expression of the form X=.....,\n"; print "converts it to reverse polish notation and then compiles it\n"; print "to 0-, 1-, 2-, and 3-address machine instructions (as in\n"; print "Andrew S. Tanenbaum's, Structured Computer Organization,\n"; print "(Prentice Hall, 1990), chapter 5, problem 8.\n"; print "Note: the program can work with both constant numbers and\n"; print "variables, but is restricted to only 1-character variables.\n"; while ($error == 1) { $error = 0; print "$message"; print "Please enter the expression (in the form X=.......): "; $expression = ; if (!($expression =~ m/=/)) { $expression = "X=".$expression; } chop ($expression); # cut all the spaces (if any) from the expression while ($expression =~ s/ //){ }; #add spaces before and after each element of the expression $expression = join ' ',split /|/, $expression; $expression =~ s/\d+/ $& /g; $expression =~ s/ //g; $expression =~ s/ / /g; print ("\nExpression: $expression\n"); # get the length (number of characters) of $expression $expression_length = length ($expression); # store all the elements of the expression into @expression_parts @expression_parts = split / /, $expression; # get the length (number of elements) of @expression_parts $array_length = @expression_parts; # check validity of expression (as much as possible) my ($left_bracket_counter, $right_bracket_counter, $loopcounter); my ($equals_counter); $left_bracket_counter = 0; $right_bracket_counter = 0; $equals_counter = 0; for ($loopcounter = 0; $loopcounter < $array_length; $loopcounter++) { if ($expression_parts[$loopcounter] eq "(") { $left_bracket_counter ++; } if ($expression_parts[$loopcounter] eq ")") { $right_bracket_counter ++; } if ($expression_parts[$loopcounter] eq "=") { $equals_counter ++; } } if ($left_bracket_counter != $right_bracket_counter) { $error = 1; $message = "Entry invalid. Unequal number of left and right brackets.\n"; } if ($equals_counter > 1) { $error = 1; $message = "Entry invalid. More than one = (equality) sign.\n"; } } # while } ### MAIN BODY # get the expression Get_Expression; # convert expression into reverse polish notation Get_Reverse_Polish; print "\n\n0-address machine instructions: \n"; print "The result of this compiling (in a slightly changed form for\n"; print "Kennedy's use) can be found at\n"; print "/clients/users/dusko/www/cs63/progproject/outfile.txt\n"; Zero_Address; Generate_Zero_Address_File; my (@reverse_polish2); @reverse_polish2 = @reverse_polish; print "\n\n1-address machine instructions: \n"; One_Address; @reverse_polish = @reverse_polish2; print "\n\n2-address machine instructions: \n"; Two_Address; @reverse_polish = @reverse_polish2; print "\n\n3-address machine instructions: \n"; Three_Address;