Well if you have done a simple Undergraduate level compiler theory course[CSE@UoM do this in name of ['computational theory' or similar], then you could simply understand that.
Others may start to read a book like "Modern Compiler Implementation in Java Second Edition".
So here is the source code.
it's simple and it's only 500 lines. you can simply read and understand it.
Code: Select all
/*
* simu - Simple assembler.
*
* Copyright (c) 1995 Fabrice Bellard
*
* Contact addresses:
* mail: Fabrice Bellard, 451 chemin du mas de Matour, 34790 Grabels, France
* email: [email protected]
* url: http://www.enst.fr/~bellard
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define FORMAT_OP0 0
#define FORMAT_OP1 1
#define FORMAT_DB 2
#define FORMAT_END 3
#define FORMAT_PRINT 4
#define FORMAT_DBLED 5
typedef unsigned char uchar;
typedef unsigned int uint;
typedef struct {
char *name;
uchar opcode;
int format;
} INSTR_DESC;
INSTR_DESC instr_desc[]= {
{"DBLED",0,FORMAT_DBLED},
{"PRINT",0,FORMAT_PRINT},
{"END",0,FORMAT_END},
{"DB",0,FORMAT_DB},
{"JNZ",0x00,FORMAT_OP0},
{"JZ",0x01,FORMAT_OP0},
{"JNC",0x02,FORMAT_OP0},
{"JC",0x03,FORMAT_OP0},
{"LDPC",0x04,FORMAT_OP0},
{"STPC",0x05,FORMAT_OP0},
{"LDMEM",0x06,FORMAT_OP0},
{"STMEM",0x07,FORMAT_OP0},
{"CLC",0x08,FORMAT_OP0},
{"MOVR",0x10,FORMAT_OP1},
{"MOVA",0x20,FORMAT_OP1},
{"MOVI",0x30,FORMAT_OP1},
{"MOVIH",0x40,FORMAT_OP1},
{"RORR",0x50,FORMAT_OP1},
{"ADDA",0x60,FORMAT_OP1},
{"ADDR",0x70,FORMAT_OP1},
{"SUBA",0x80,FORMAT_OP1},
{"SUBR",0x90,FORMAT_OP1},
{"ANDA",0xA0,FORMAT_OP1},
{"ANDR",0xB0,FORMAT_OP1},
{"ORA",0xC0,FORMAT_OP1},
{"ORR",0xD0,FORMAT_OP1},
{"XORA",0xE0,FORMAT_OP1},
{"XORR",0xF0,FORMAT_OP1},
{NULL,0,0}
};
/* conversion d'un code ascii en code affichable: à compléter */
int CvtLed(int a) {
int b;
switch(a) {
case '0': b=0x3F; break;
case '1': b=0x06; break;
case '2': b=0x5B; break;
case '3': b=0x4F; break;
case '4': b=0x66; break;
case '5': b=0x6D; break;
case '6': b=0x7D; break;
case '7': b=0x07; break;
case '8': b=0x7F; break;
case '9': b=0x6F; break;
case 'A': b=0x77; break;
case 'B': b=0x7C; break;
case 'C': b=0x39; break;
case 'D': b=0x5E; break;
case 'E': b=0x79; break;
case 'F': b=0x71; break;
case 'R': b=0x50; break;
case 'S': b=0x6D; break;
case 'Y': b=0x6E; break;
case 'L': b=0x38; break;
case 'H': b=0x76; break;
case 'I': b=0x06; break;
case '-': b=0x40; break;
case 'T': b=0x78; break;
case ' ': b=0x00; break;
case 'U': b=0x1C; break;
case 'V': b=0x3E; break;
case 'M': b=0x37; break;
case 'O': b=0x3F; break;
case 'N': b=0x54; break;
case 'P': b=0x73; break;
case 'G': b=0x6F; break;
default: b=0x00; break;
}
return b ^ 0xFF;
}
FILE *file_in,*file_out;
int line_n;
void Error(char *name,char *s) {
fprintf(stderr,"\nError line %d: '%s': %s\n",line_n,name,s);
exit(1);
}
/* lexer */
#define SYM_NUM 1
#define SYM_ID 2
#define SYM_SHR 3
#define SYM_SHL 4
#define SYM_STR 5
#define ID_SIZEMAX 32
int ch;
uint lex_num;
char lex_id[ID_SIZEMAX];
int lex_sym;
char lex_str[256];
void get_ch1(void) {
while (1) {
ch=fgetc(file_in);
if (ch!='#') break;
do {
ch=fgetc(file_in);
} while (ch!=EOF && ch!='\n');
}
if (ch=='\n') line_n++;
}
void get_ch(void) {
get_ch1();
ch=toupper(ch);
}
void get_lex(void) {
uint n;
char *s;
while (ch==' ' || ch=='\t' || ch=='\n' || ch=='\r') get_ch();
if (ch=='"') {
get_ch1();
s=lex_str;
do {
*s++=ch;
get_ch1();
} while (ch!='"');
*s=0;
lex_sym=SYM_STR;
get_ch();
} else if (ch=='\'') {
get_ch();
lex_sym=SYM_NUM;
lex_num=ch;
get_ch();
if (ch!='\'') Error("'","Expected");
get_ch();
} else if (ch=='>') {
get_ch();
if (ch=='>') {
get_ch();
lex_sym=SYM_SHR;
} else {
lex_sym='>';
}
} else if (ch=='<') {
get_ch();
if (ch=='<') {
get_ch();
lex_sym=SYM_SHL;
} else {
lex_sym='<';
}
} else if (ch=='$') {
n=0;
get_ch();
while ((ch>='0' && ch<='9') || (ch>='A' && ch<='F')) {
if (ch>='A') ch=ch-'A'+10; else ch=ch-'0';
n=16*n+ch;
get_ch();
}
lex_sym=SYM_NUM;
lex_num=n;
} else if (ch>='0' && ch<='9') {
n=0;
do {
n=n*10+(ch-'0');
get_ch();
} while (ch>='0' && ch<='9');
lex_sym=SYM_NUM;
lex_num=n;
} else if (ch>='A' && ch<='Z') {
s=lex_id;
do {
*s++=ch;
get_ch();
} while ((ch>='A' && ch<='Z') || (ch>='0' && ch<='9') || ch=='_');
*s=0;
lex_sym=SYM_ID;
} else {
lex_sym=ch;
get_ch();
}
}
/* parser */
#define MEMSIZE 32768
typedef struct _LABEL {
char name[ID_SIZEMAX];
uint value;
struct _LABEL *next;
} LABEL;
LABEL *label_first;
int pass_n;
uint pc;
uchar mem[MEMSIZE];
void AddLabel(char *name,int value) {
LABEL *l;
if (pass_n==1) return;
l=malloc(sizeof(LABEL));
strcpy(l->name,name);
l->value=value;
l->next=label_first;
label_first=l;
}
uint SearchLabel(char *name) {
LABEL *l;
if (pass_n==0) return 0;
l=label_first;
while (l!=NULL) {
if (strcmp(name,l->name)==0) {
return l->value;
}
l=l->next;
}
Error(name,"Label not found");
return 0;
}
uint ParseExpr(void);
uint ParseAtom(void) {
uint n;
switch (lex_sym) {
case SYM_NUM:
n=lex_num;
get_lex();
return n;
case SYM_ID:
n=SearchLabel(lex_id);
get_lex();
return n;
case '@':
get_lex();
return pc;
case '(':
get_lex();
n=ParseExpr();
if (lex_sym!=')') Error(")","Expected");
get_lex();
return n;
case '+':
get_lex();
return ParseAtom();
case '-':
get_lex();
return -ParseAtom();
default:
Error("?","Syntax Error");
}
}
uint ParseShift(void) {
uint n,m,op;
n=ParseAtom();
while (lex_sym==SYM_SHR || lex_sym==SYM_SHL) {
op=lex_sym;
get_lex();
m=ParseAtom();
switch(op) {
case SYM_SHR: n=n>>m; break;
case SYM_SHL: n=n<<m; break;
}
}
return n;
}
uint ParseProd(void) {
uint n,m,op;
n=ParseShift();
while (lex_sym=='/' || lex_sym=='*' || lex_sym=='&') {
op=lex_sym;
get_lex();
m=ParseShift();
switch(op) {
case '*': n=n*m; break;
case '/': n=n/m; break;
case '&': n=n&m; break;
}
}
return n;
}
uint ParseSum(void) {
uint n,m,op;
n=ParseProd();
while (lex_sym=='+' || lex_sym=='-' || lex_sym=='|') {
op=lex_sym;
get_lex();
m=ParseProd();
switch(op) {
case '+': n=n+m; break;
case '-': n=n-m; break;
case '|': n=n|m; break;
}
}
return n;
}
uint ParseExpr(void) {
return ParseSum();
}
void ParseDB(void) {
char *s;
uint k;
while (1) {
switch (lex_sym) {
case SYM_STR:
s=lex_str;
while (*s) mem[pc++]=*s++;
get_lex();
break;
default:
k=ParseExpr();
mem[pc++]=k;
break;
}
if (lex_sym!=',') break;
get_lex();
}
}
void ParseDBLED(void) {
char *s;
uint k;
while (1) {
switch (lex_sym) {
case SYM_STR:
s=lex_str;
while (*s) mem[pc++]=CvtLed(*s++);
get_lex();
break;
default:
k=ParseExpr();
mem[pc++]=CvtLed(k);
break;
}
if (lex_sym!=',') break;
get_lex();
}
}
void ParseSource(int pass) {
char id[ID_SIZEMAX];
INSTR_DESC *p;
uint k;
pass_n=pass;
get_ch();
get_lex();
printf("Pass %d\n",pass+1);
while (1) {
while (lex_sym==';') get_lex();
if (lex_sym!=SYM_ID) Error("?","Opcode expected");
strcpy(id,lex_id);
get_lex();
if (lex_sym==':') {
get_lex();
AddLabel(id,pc);
} else {
p=instr_desc;
while (p->name!=NULL) {
if (strcmp(p->name,id)==0) break;
p++;
}
if (p->name==NULL) Error(id,"Unknown opcode");
switch(p->format) {
case FORMAT_OP0:
mem[pc++]=p->opcode;
break;
case FORMAT_OP1:
k=ParseExpr();
mem[pc++]=p->opcode | (k & 0xF);
break;
case FORMAT_DB:
ParseDB();
break;
case FORMAT_PRINT:
k=ParseExpr();
if (pass_n==1) printf("=%u\n",k);
break;
case FORMAT_DBLED:
ParseDBLED();
break;
case FORMAT_END:
return;
}
}
}
}
void InitMem(void) {
uint i;
for(i=0;i<MEMSIZE;i++) mem[i]=0;
}
int main(int argc,char *argv[]) {
if (argc!=3) {
fprintf(stderr,"Microprocessor Assembler (c) 1995 Fabrice Bellard\n");
fprintf(stderr,"usage: bdasm asm_source binary_image\n");
exit(0);
}
file_in=fopen(argv[1],"rt");
if (file_in==NULL) Error(argv[1],"File not found");
InitMem();
/* passe 1 */
pc=0;
line_n=1;
ParseSource(0);
/* passe 2 */
rewind(file_in);
pc=0;
line_n=1;
ParseSource(1);
fclose(file_in);
file_out=fopen(argv[2],"wb");
fwrite(mem,1,pc,file_out);
fclose(file_out);
return 0;
}
method.