#!/usr/bin/perl # # Copyright (C) 2001 Mark Bush # # # Cross-Platform Assembler # # This is an assembler for arbitrary assembly languages and chips. # This is achieved through a config file specifying a map from op # mnemonic/addressing mode to op-code and a list of addressing modes # with a perl RE showing how to recognise their use and a specification # of what bytes then follow the op-code. # Currently, only single-byte op-codes are supported, but this is being # updated. Also, branches are assumed to be single byte branches (so # back up to -128 or forward up to +127), though this will also be fixed. # Branches are also assumed to count from the next op-code. this will be # updated for other formats. # # Options: # ------- # -d Specify an alternate directory to search for # machine spec files. # # -m Specify the machine architecture to assemble # for. The spec must appear in a file of this # name (with an optional .spec extension) in the # config directory. # # -s Specify the address that the assembled code is # intended to start at. # # -b The architecture is big-endian. # # -o Specify a file to leave the assembled code in # (goes to STDOUT by default). # # -D Turn on debugging output (can produce quite a # lot of output!). # # Errors with the input are listed on STDERR and will usually abort # further processing. Line numbers of errors along with the fatal # command are printed. # # Syntax is based mainly on standard BBC assembly syntax as that is # the context this application was originally written for. The following # describes the main points: # # Only one command per line is allowed (multiple-command lines # make code difficult to read and should be avoided anyway! ;). # # Commants are introduced by a backslash character and extend to # the end of the line. # # A label may appear on a line of its own and refers to the next # address that would occupy a command. # # Otherwise, a line consists of an optional label followed by an # command mnemonic seperated by white space. An optional argument # may follow further white space and must match one of the addressing # mode patterns in the spec file. # # Mnemonics and psuedo-commands are case insensitive. # # The following pseudo-commands are recognised: # EQUB - the arg is a byte to add directly to the output at the # current address; # EQUW - the arg is a word (2 bytes) to add directly to the output # at the current address in machine host byte order; # EQUR - the arg is a word to add directly to the output at the # current address in high,low byte order; # EQUS - the arg is a string (optionally surrounded by double # quotes) and the byte codes for each character are added # directly to the output at the current address. # # Expressions in arguments (and byte specifiers in the spec file) # are allowed as follows: # # positive and negative integers; # "pc" specifies the current program counter (will refer to the address # of the first byte of the op-code); # (expr) used to group expressions - doesn't work quite how you think # so beware! # expr + expr specifies the sum of two expressions; # expr - expr specifies the difference between two expressions; # expr mod expr specifies the first expression modulo the second; # expr div expr specifies integer division; # &expr means the expression treated as a hex number; # asc "x" specifies the ascii code for the character between double quotes; # a label will be expanded to its address. # # Fixed address labels can be specified on a line of their own as: # label=value # where the value must be preceded by "&" or "0x" to be recognised # as hex. # # A label specification with the code must be preceded by a dot and # use of a label in an expression must not include the dot. # # Spec File # --------- # The spec file contains two types of line. # Addressing mode lines: # A:::: # where: # is a unique label for this addressing mode # specifies the number of argument bytes that will be needed # followed by "r" if this mode uses relative addressing # this is either a perl regular expression to match an # argument, or a ! seperated list of other addressing # modes (this allows one addressing mode to have different # patterns with different ways of calculating the bytes). # a colon seperated list of expressions used to calculate # the argument bytes # # Op code lines: # O::: # Different spellings of mnemonic can be used to map to the same code. # Addressing modes must be specified before they are referenced. use Getopt::Std; use Data::Dumper; $CONFDIR = "$ENV{HOME}/lib/as"; $CONFDIR = "/usr/local/lib/as" unless (-d $CONFDIR); $MACH = "65C02"; $PC = 0; $DEBUG = 0; $indent = 2; $BIG_ENDIAN = 0; $PASS = 0; %cmd = (equb=>1, equw=>1, equr=>1, equs=>1); select(STDERR); getopts("d:m:s:Dbo:"); $CONFDIR = $opt_d if ($opt_d); $MACH = $opt_m if ($opt_m); $PC = $opt_s if ($opt_s); $DEBUG = $opt_D if ($opt_D); $BIG_ENDIAN = $opt_b if ($opt_b); if ($opt_o) { open(OUTPUT, ">$opt_o") || die "Can't open $opt_o: $!\n"; } else { *OUTPUT = *STDOUT; } opendir(DIR, $CONFDIR) || die "Can't access conf dir: $CONFDIR: $!\n"; closedir(DIR); $SPEC = "$CONFDIR/$MACH"; $SPEC .= ".spec" unless (-f $SPEC); if ($PC =~ s/^(\&|0x)//i) { $PC = hex($PC); } &parse_spec(); $line_no = 0; while (<>) { printf "%04x ", $PC if ($DEBUG); print if ($DEBUG); chomp; $full_line = $_; $line_no++; s/\\.*//; # strip comments s/^\s+//; s/\s+$//; # strip leading and training white space push(@lines, {label=>undef, op=>undef, arg=>undef, address=>$PC, HEXaddr=>sprintf("%04x", $PC)}); $lines[$line_no-1]{op} = {name=>undef, mode=>undef, code=>undef, encoded=>0}; if (/^(\w+)\s*=\s*(.+)$/) { $label = lc $1; $addr = $2; printf STDERR "mapping %s => %s\n", $label, $addr if ($DEBUG); if ($addr =~ /^(\&|0x)([0-9a-fA-F]+)/) { $addr = hex($2); } $f_label{$addr} = $label; $f_addr{$label} = $addr; next; } if (/^\.(\w+)\s*/) # label { if (defined $addr{$1}) { printf STDERR "Duplicate label '%s' at line %d (ignored):\n %s\n", $1, $line_no, $full_line; } else { $label{$PC} = $1; $addr{$1} = $PC; $lines[$line_no-1]{label} = $1; } s/^\.$1\s*//; # strip label } /^$/ && ($lines[$line_no-1]{op}{encoded} = 1, next); ($op, $arg) = /(\S+)\s*(.*)/; $op = lc $op; $lines[$line_no-1]{op}{name} = $op; $lines[$line_no-1]{op}{encoded} = 0; $lines[$line_no-1]{arg} = {text=>$arg, bytes=>undef}; $cmd{$op} && (&cmd($op, $arg, $line_no), next); if (! defined $addr_modes{$op}) { printf STDERR "Unknown op code '%s' at line %d (aborting):\n %s\n", $op, $line_no; exit(1); } print "Before: ", Dumper($lines[$line_no-1]), "\n" if ($DEBUG); &op($op, $arg, $line_no, 0); print "After: ", Dumper($lines[$line_no-1]), "\n" if ($DEBUG); } if ($DEBUG) { foreach $line (@lines) { next unless (defined $line->{op}{name} || defined $line->{arg}{text} || defined $line->{label}); $label = $line->{label}; $label = ".".$label if ($label); if ($line->{op}{encoded}) { print " "; } else { print "*"; } if ($line->{ref}) { print "r"; } else { print " "; } printf STDERR "%15s", $label; printf STDERR " %-4s %-15s %04x", $line->{op}{name}, $line->{arg}{text}, $line->{address}; if ($line->{op}{encoded}) { printf STDERR " %s", $line->{op}{code} if (defined $line->{op}{code});; foreach $byte (@{$line->{arg}{bytes}}) { printf STDERR " %02x", $byte; } } print "\n"; } } $PASS = 1; print "\n\nPass 2\n\n" if ($DEBUG); $line_no = 0; foreach $line (@lines) { $line_no++; next if $line->{op}{encoded}; next unless (defined $line->{op}{name} && $line->{op}{name}); print "Before: ", Dumper($lines[$line_no-1]), "\n" if ($DEBUG); $PC = $line->{address}; $op = $line->{op}{name}; $arg = $line->{arg}{text}; printf STDERR "%s %s (pass 2)\n", $op, $arg if ($DEBUG); $cmd{$op} && (&cmd($op, $arg, $line_no), next); &op($op, $arg, $line_no, 1); print "After: ", Dumper($lines[$line_no-1]), "\n" if ($DEBUG); } $line_no = 0; foreach $line (@lines) { $line_no++; next if ($line->{op}{encoded}); next unless (defined $line->{op}{name} && $line->{op}{name}); next if (&cmd($line->{op}{name})); printf STDERR "Error: Unencoded line %d:\n %s %s %s\n", $line_no, $line->{label}, $line->{op}{name}, $line->{arg}{text}; $ERRORS++; } if ($ERRORS) { printf STDERR "Aborting due to errors.\n"; exit(1); } #print Dumper(\%addr_mode), "\n", Dumper(\@lines), "\n" if ($DEBUG); foreach $line (@lines) { next unless (defined $line->{op}{code} || defined $line->{arg}{bytes} || defined $line->{label}); $label = $line->{label}; $label = ".".$label if ($label); printf STDERR "%15s", $label; printf STDERR " %-4s %-15s %04x", $line->{op}{name}, $line->{arg}{text}, $line->{address}; if ($line->{op}{encoded}) { if (defined $line->{op}{code}) { printf STDERR " %s", $line->{op}{code}; $c = sprintf "%c", hex($line->{op}{code}); syswrite(OUTPUT, $c, 1); } foreach $byte (@{$line->{arg}{bytes}}) { printf STDERR " %02x", $byte; $c = sprintf "%c", $byte; syswrite(OUTPUT, $c, 1); } } print "\n"; } close(OUTPUT) if ($opt_o); sub parse_spec { open(SPEC, $SPEC) || die "Can't open mach spec file: $SPEC: $!\n"; my $line_no = 0; while () { chomp; $line_no++; /^A:/i && (&address_mode($_), next); /^O:/i && (&parse_op($_, $SPEC, $line_no), next); } close(SPEC); } sub address_mode { my ($line) = @_; my ($descr, $adjust, $format, $output, $mode, @bytes, $rel); $line =~ s/^A://i; ($descr, $adjust, $format, $output) = $line =~ /^([^:]*):([^:]*):([^:]*):(.*)/; @bytes = split /:/, $output; if ($adjust =~ /^(\d+)r$/) { $rel = 1; $adjust = $1; } else { $rel = 0; } $addr_mode{$descr} = {num_bytes=>scalar @bytes, bytes=>\@bytes, pattern=>$format, adjust=>$adjust, rel=>$rel}; if ($format =~ /!/) { foreach $mode (split /!/, $format) { $map{$mode} = $descr; } } } sub parse_op { my ($line, $file, $line_no) = @_; my ($type, $code, $op, $addr_mode); ($type, $code, $op, $addr_mode) = split /:/, $line; $op = lc $op; if (defined $addr_modes{$op}{$addr_mode}) { printf STDERR "Duplicate op in %s line %d:\n %s\n", $file, $line_no, $line; return; } if (! defined $addr_mode{$addr_mode}) { printf STDERR "Unknown addressing mode in %s line %d:\n %s\n", $file, $line_no, $line; return; } $addr_modes{$op}{$addr_mode} = $code; } sub cmd { my ($op, $arg, $line_no) = @_; my ($val, $byte1, $byte2, @bytes); $REF = 0; if ($op eq "equb") { $val = &expr($arg, $line_no, undef); $lines[$line_no-1]{ref} = 1 if ($REF); $lines[$line_no-1]{op}{encoded} = 2 if (defined $val); $lines[$line_no-1]{arg}{bytes} = [$val]; $PC++; } elsif ($op eq "equw") { $val = &expr($arg, $line_no, undef); $lines[$line_no-1]{ref} = 1 if ($REF); if (defined $val) { $lines[$line_no-1]{op}{encoded} = 2; $byte1 = $val % 256; $byte2 = int($val/256); ($byte1, $byte2) = ($byte2, $byte1) if ($BIG_ENDIAN); } else { $byte1 = $byte2 = undef; } $lines[$line_no-1]{arg}{bytes} = [$byte1, $byte2]; $PC += 2; } elsif ($op eq "equr") { $val = &expr($arg, $line_no, undef); $lines[$line_no-1]{ref} = 1 if ($REF); if (defined $val) { $lines[$line_no-1]{op}{encoded} = 2; $byte1 = int($val/256); $byte2 = $val % 256; # high;low order regardless of architecture } else { $byte1 = $byte2 = undef; } $lines[$line_no-1]{arg}{bytes} = [$byte1, $byte2]; $PC += 2; } elsif ($op eq "equs") { $arg =~ s/^\s*\"//; # " $arg =~ s/\"\s*$//; # " $lines[$line_no-1]{op}{encoded} = 2; @bytes = map {ord($_);} (split //, $arg); $lines[$line_no-1]{arg}{bytes} = [@bytes]; $PC += @bytes; } print "After: ", Dumper($lines[$line_no-1]), "\n" if ($DEBUG && $PASS); } sub op { my ($op, $arg, $line_no, $adjust) = @_; my (@addr_modes, $mode, $match, $byte, $res, $defined, $match_mode, $level, $final_mode, $final_match, @final_bytes, $num_bytes, %matches, %ref); $level = 255; $defined = 0; @addr_modes = keys %{$addr_modes{$op}}; foreach $mode (@addr_modes) { if ($match = &match_mode($arg, $mode)) { $match_mode = shift @$match; $num_bytes = scalar @{$addr_mode{$match_mode}{bytes}}; next if ($num_bytes>=$level); my @bytes; $defined = 1; $REF = 0; foreach $byte (@{$addr_mode{$match_mode}{bytes}}) { $res = &expr($byte, $line_no, $match); $ref{$match_mode} = 1 if ($REF); $res += 256 if ($addr_mode{$mode}{rel} && ($res<0)); if (! defined $res || ($res<0) || ($res>255)) { $defined = 0; last; } push(@bytes, $res); } next unless ($defined); printf STDERR " matched %s with %s [%s]\n", $arg, $mode, join(", ", @$match) if ($DEBUG); $matches{$match_mode} = \@bytes; if ($level>$num_bytes) { $level = $num_bytes; $final_mode = $match_mode; } } } $PC++; return unless ($final_mode); $num_bytes = scalar @{$matches{$final_mode}}; $lines[$line_no-1]{op}{encoded} = 1; $lines[$line_no-1]{ref} = 1 if ($ref{$final_mode}); $lines[$line_no-1]{rel} = 1 if ($addr_mode{$final_mode}{rel}); $lines[$line_no-1]{arg}{bytes} = $matches{$final_mode}; $lines[$line_no-1]{op}{mode} = $final_mode; $final_mode = $map{$final_mode} if (defined $map{$final_mode}); $lines[$line_no-1]{op}{code} = $addr_modes{$op}{$final_mode}; if ($adjust && $num_bytes) { &adjust($PC-1, $num_bytes); } $PC += $num_bytes; } sub adjust { my ($pc, $offset) = @_; my ($line_no, $type, $num, $mode, $addr, $bytes, $target, $code, $byte, $level, $max, $i, $prev, $keep_PC); foreach $label (keys %addr) { next unless $addr{$label} > $pc; printf STDERR "adjusting %s from %04x to %04x\n", $label, $addr{$label}, $addr{$label}+$offset if ($DEBUG); delete $label{$addr{$label}}; $addr{$label} += $offset; $label{$addr{$label}} = $label; } $this_line = 0; foreach $line (@lines) { $this_line++; $line->{address} += $offset if ($line->{address}>$pc); $line->{HEXaddr} = sprintf("%04x", $line->{address}); next unless ($line->{op}{encoded}); next unless ($line->{ref}); printf STDERR "checking to adjust %04x %s %s\n", $line->{address}, $line->{op}{name}, $line->{arg}{text} if ($DEBUG); $keep_PC = $PC; $PC = $line->{address}; if ($line->{op}{encoded}>1) { &cmd($line->{op}{name}, $line->{arg}{text}, $this_line); } else { &op($line->{op}{name}, $line->{arg}{text}, $this_line, 0); } $PC = $keep_PC; } } sub match_mode { my ($arg, $mode) = @_; my (@res, $res, $pat); $pat = $addr_mode{$mode}{pattern}; if ($pat =~ /!/) { foreach $pat (split /!/, $pat) { $res = &match_mode($arg, $pat); return $res if (defined $res); } return undef; } else { @res = $arg =~ /^($pat)$/; if (@res) { $res[0] = $mode; # overwrite extra outer bracket match return \@res; } else { return undef; } } } sub expr { my (@args) = @_; local $_ = shift @args; my $line_no = shift @args; my $args = shift @args; printf STDERR "%sexpr(%s) with args %s\n", ' 'x$indent, $_, join(", ", @$args) if ($DEBUG); $indent += 2; s/^\s+//; s/\s+$//; # strip leading and trailing space my ($e1, $e2, $res); if (/^(\-?\d+)$/) { $res = $1; } elsif (/^pc$/i) { $res = $PC; } elsif (/^\$(\d+)$/) { $res = &expr($args->[$1-1], $line_no, $args); } elsif (/^\((.*)\)$/) { $res = &expr($1, $line_no, $args); } elsif (/^(.*)\+(.*)$/) { $e1 = $1; $e2 = $2; $e1 = &expr($e1, $line_no, $args); $e2 = &expr($e2, $line_no, $args); $res = (defined $e1 && defined $e2)? ($e1 + $e2): undef; } elsif (/^(.*)\-(.*)$/) { $e1 = $1; $e2 = $2; $e1 = &expr($e1, $line_no, $args); $e2 = &expr($e2, $line_no, $args); $res = (defined $e1 && defined $e2)? ($e1 - $e2): undef; } elsif (/^(.*)\s+mod\s+(.*)$/i) { $e1 = $1; $e2 = $2; $e1 = &expr($e1, $line_no, $args); $e2 = &expr($e2, $line_no, $args); $res = (defined $e1 && defined $e2)? ($e1 % $e2): undef; } elsif (/^(.*)\s+div\s+(.*)$/i) { $e1 = $1; $e2 = $2; $e1 = &expr($e1, $line_no, $args); $e2 = &expr($e2, $line_no, $args); $res = (defined $e1 && defined $e2)? int($e1 / $e2): undef; } elsif (/^\&([0-9a-fA-F]+)$/) { $res = hex($1); } elsif (/^asc\s+\"(.)\"$/i) # " { $res = ord $1; } elsif (defined $f_addr{lc $_}) { printf STDERR "%s lookup %s => %d (%04x)\n", ' 'x$indent, $_, $f_addr{lc $_}, $f_addr{lc $_} if ($DEBUG && $PASS); $res = $f_addr{lc $_}; } elsif (defined $addr{$_}) { $REF = 1; $lines[$line_no-1]{refs}{$_} = 1; $res = $addr{$_}; } else { $res = undef; printf STDERR "Undefined reference: %s\n", $_ if ($PASS); } $indent -= 2; printf STDERR "%sresult: %d\n", ' 'x$indent, $res if ($DEBUG); return $res; }