Simple 8-bit assembler in C
Posted: Sun Jan 15, 2012 3:47 pm
hi all, Have you wondered how a assembler works?
Well if you have done a simple Undergraduate level compiler theory course[CSE@UoM do this in name of ['computational theory' or similar], then you could simply understand that.
Others may start to read a book like "Modern Compiler Implementation in Java Second Edition".
So here is the source code.
it's simple and it's only 500 lines. you can simply read and understand it.
Code is simple. So take your pencil and rough paper and start to analyze it.Hint: start from the main
method.
Well if you have done a simple Undergraduate level compiler theory course[CSE@UoM do this in name of ['computational theory' or similar], then you could simply understand that.
Others may start to read a book like "Modern Compiler Implementation in Java Second Edition".
So here is the source code.
it's simple and it's only 500 lines. you can simply read and understand it.
Code: Select all
/*
* simu - Simple assembler.
*
* Copyright (c) 1995 Fabrice Bellard
*
* Contact addresses:
* mail: Fabrice Bellard, 451 chemin du mas de Matour, 34790 Grabels, France
* email: [email protected]
* url: http://www.enst.fr/~bellard
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define FORMAT_OP0 0
#define FORMAT_OP1 1
#define FORMAT_DB 2
#define FORMAT_END 3
#define FORMAT_PRINT 4
#define FORMAT_DBLED 5
typedef unsigned char uchar;
typedef unsigned int uint;
typedef struct {
char *name;
uchar opcode;
int format;
} INSTR_DESC;
INSTR_DESC instr_desc[]= {
{"DBLED",0,FORMAT_DBLED},
{"PRINT",0,FORMAT_PRINT},
{"END",0,FORMAT_END},
{"DB",0,FORMAT_DB},
{"JNZ",0x00,FORMAT_OP0},
{"JZ",0x01,FORMAT_OP0},
{"JNC",0x02,FORMAT_OP0},
{"JC",0x03,FORMAT_OP0},
{"LDPC",0x04,FORMAT_OP0},
{"STPC",0x05,FORMAT_OP0},
{"LDMEM",0x06,FORMAT_OP0},
{"STMEM",0x07,FORMAT_OP0},
{"CLC",0x08,FORMAT_OP0},
{"MOVR",0x10,FORMAT_OP1},
{"MOVA",0x20,FORMAT_OP1},
{"MOVI",0x30,FORMAT_OP1},
{"MOVIH",0x40,FORMAT_OP1},
{"RORR",0x50,FORMAT_OP1},
{"ADDA",0x60,FORMAT_OP1},
{"ADDR",0x70,FORMAT_OP1},
{"SUBA",0x80,FORMAT_OP1},
{"SUBR",0x90,FORMAT_OP1},
{"ANDA",0xA0,FORMAT_OP1},
{"ANDR",0xB0,FORMAT_OP1},
{"ORA",0xC0,FORMAT_OP1},
{"ORR",0xD0,FORMAT_OP1},
{"XORA",0xE0,FORMAT_OP1},
{"XORR",0xF0,FORMAT_OP1},
{NULL,0,0}
};
/* conversion d'un code ascii en code affichable: à compléter */
int CvtLed(int a) {
int b;
switch(a) {
case '0': b=0x3F; break;
case '1': b=0x06; break;
case '2': b=0x5B; break;
case '3': b=0x4F; break;
case '4': b=0x66; break;
case '5': b=0x6D; break;
case '6': b=0x7D; break;
case '7': b=0x07; break;
case '8': b=0x7F; break;
case '9': b=0x6F; break;
case 'A': b=0x77; break;
case 'B': b=0x7C; break;
case 'C': b=0x39; break;
case 'D': b=0x5E; break;
case 'E': b=0x79; break;
case 'F': b=0x71; break;
case 'R': b=0x50; break;
case 'S': b=0x6D; break;
case 'Y': b=0x6E; break;
case 'L': b=0x38; break;
case 'H': b=0x76; break;
case 'I': b=0x06; break;
case '-': b=0x40; break;
case 'T': b=0x78; break;
case ' ': b=0x00; break;
case 'U': b=0x1C; break;
case 'V': b=0x3E; break;
case 'M': b=0x37; break;
case 'O': b=0x3F; break;
case 'N': b=0x54; break;
case 'P': b=0x73; break;
case 'G': b=0x6F; break;
default: b=0x00; break;
}
return b ^ 0xFF;
}
FILE *file_in,*file_out;
int line_n;
void Error(char *name,char *s) {
fprintf(stderr,"\nError line %d: '%s': %s\n",line_n,name,s);
exit(1);
}
/* lexer */
#define SYM_NUM 1
#define SYM_ID 2
#define SYM_SHR 3
#define SYM_SHL 4
#define SYM_STR 5
#define ID_SIZEMAX 32
int ch;
uint lex_num;
char lex_id[ID_SIZEMAX];
int lex_sym;
char lex_str[256];
void get_ch1(void) {
while (1) {
ch=fgetc(file_in);
if (ch!='#') break;
do {
ch=fgetc(file_in);
} while (ch!=EOF && ch!='\n');
}
if (ch=='\n') line_n++;
}
void get_ch(void) {
get_ch1();
ch=toupper(ch);
}
void get_lex(void) {
uint n;
char *s;
while (ch==' ' || ch=='\t' || ch=='\n' || ch=='\r') get_ch();
if (ch=='"') {
get_ch1();
s=lex_str;
do {
*s++=ch;
get_ch1();
} while (ch!='"');
*s=0;
lex_sym=SYM_STR;
get_ch();
} else if (ch=='\'') {
get_ch();
lex_sym=SYM_NUM;
lex_num=ch;
get_ch();
if (ch!='\'') Error("'","Expected");
get_ch();
} else if (ch=='>') {
get_ch();
if (ch=='>') {
get_ch();
lex_sym=SYM_SHR;
} else {
lex_sym='>';
}
} else if (ch=='<') {
get_ch();
if (ch=='<') {
get_ch();
lex_sym=SYM_SHL;
} else {
lex_sym='<';
}
} else if (ch=='$') {
n=0;
get_ch();
while ((ch>='0' && ch<='9') || (ch>='A' && ch<='F')) {
if (ch>='A') ch=ch-'A'+10; else ch=ch-'0';
n=16*n+ch;
get_ch();
}
lex_sym=SYM_NUM;
lex_num=n;
} else if (ch>='0' && ch<='9') {
n=0;
do {
n=n*10+(ch-'0');
get_ch();
} while (ch>='0' && ch<='9');
lex_sym=SYM_NUM;
lex_num=n;
} else if (ch>='A' && ch<='Z') {
s=lex_id;
do {
*s++=ch;
get_ch();
} while ((ch>='A' && ch<='Z') || (ch>='0' && ch<='9') || ch=='_');
*s=0;
lex_sym=SYM_ID;
} else {
lex_sym=ch;
get_ch();
}
}
/* parser */
#define MEMSIZE 32768
typedef struct _LABEL {
char name[ID_SIZEMAX];
uint value;
struct _LABEL *next;
} LABEL;
LABEL *label_first;
int pass_n;
uint pc;
uchar mem[MEMSIZE];
void AddLabel(char *name,int value) {
LABEL *l;
if (pass_n==1) return;
l=malloc(sizeof(LABEL));
strcpy(l->name,name);
l->value=value;
l->next=label_first;
label_first=l;
}
uint SearchLabel(char *name) {
LABEL *l;
if (pass_n==0) return 0;
l=label_first;
while (l!=NULL) {
if (strcmp(name,l->name)==0) {
return l->value;
}
l=l->next;
}
Error(name,"Label not found");
return 0;
}
uint ParseExpr(void);
uint ParseAtom(void) {
uint n;
switch (lex_sym) {
case SYM_NUM:
n=lex_num;
get_lex();
return n;
case SYM_ID:
n=SearchLabel(lex_id);
get_lex();
return n;
case '@':
get_lex();
return pc;
case '(':
get_lex();
n=ParseExpr();
if (lex_sym!=')') Error(")","Expected");
get_lex();
return n;
case '+':
get_lex();
return ParseAtom();
case '-':
get_lex();
return -ParseAtom();
default:
Error("?","Syntax Error");
}
}
uint ParseShift(void) {
uint n,m,op;
n=ParseAtom();
while (lex_sym==SYM_SHR || lex_sym==SYM_SHL) {
op=lex_sym;
get_lex();
m=ParseAtom();
switch(op) {
case SYM_SHR: n=n>>m; break;
case SYM_SHL: n=n<<m; break;
}
}
return n;
}
uint ParseProd(void) {
uint n,m,op;
n=ParseShift();
while (lex_sym=='/' || lex_sym=='*' || lex_sym=='&') {
op=lex_sym;
get_lex();
m=ParseShift();
switch(op) {
case '*': n=n*m; break;
case '/': n=n/m; break;
case '&': n=n&m; break;
}
}
return n;
}
uint ParseSum(void) {
uint n,m,op;
n=ParseProd();
while (lex_sym=='+' || lex_sym=='-' || lex_sym=='|') {
op=lex_sym;
get_lex();
m=ParseProd();
switch(op) {
case '+': n=n+m; break;
case '-': n=n-m; break;
case '|': n=n|m; break;
}
}
return n;
}
uint ParseExpr(void) {
return ParseSum();
}
void ParseDB(void) {
char *s;
uint k;
while (1) {
switch (lex_sym) {
case SYM_STR:
s=lex_str;
while (*s) mem[pc++]=*s++;
get_lex();
break;
default:
k=ParseExpr();
mem[pc++]=k;
break;
}
if (lex_sym!=',') break;
get_lex();
}
}
void ParseDBLED(void) {
char *s;
uint k;
while (1) {
switch (lex_sym) {
case SYM_STR:
s=lex_str;
while (*s) mem[pc++]=CvtLed(*s++);
get_lex();
break;
default:
k=ParseExpr();
mem[pc++]=CvtLed(k);
break;
}
if (lex_sym!=',') break;
get_lex();
}
}
void ParseSource(int pass) {
char id[ID_SIZEMAX];
INSTR_DESC *p;
uint k;
pass_n=pass;
get_ch();
get_lex();
printf("Pass %d\n",pass+1);
while (1) {
while (lex_sym==';') get_lex();
if (lex_sym!=SYM_ID) Error("?","Opcode expected");
strcpy(id,lex_id);
get_lex();
if (lex_sym==':') {
get_lex();
AddLabel(id,pc);
} else {
p=instr_desc;
while (p->name!=NULL) {
if (strcmp(p->name,id)==0) break;
p++;
}
if (p->name==NULL) Error(id,"Unknown opcode");
switch(p->format) {
case FORMAT_OP0:
mem[pc++]=p->opcode;
break;
case FORMAT_OP1:
k=ParseExpr();
mem[pc++]=p->opcode | (k & 0xF);
break;
case FORMAT_DB:
ParseDB();
break;
case FORMAT_PRINT:
k=ParseExpr();
if (pass_n==1) printf("=%u\n",k);
break;
case FORMAT_DBLED:
ParseDBLED();
break;
case FORMAT_END:
return;
}
}
}
}
void InitMem(void) {
uint i;
for(i=0;i<MEMSIZE;i++) mem[i]=0;
}
int main(int argc,char *argv[]) {
if (argc!=3) {
fprintf(stderr,"Microprocessor Assembler (c) 1995 Fabrice Bellard\n");
fprintf(stderr,"usage: bdasm asm_source binary_image\n");
exit(0);
}
file_in=fopen(argv[1],"rt");
if (file_in==NULL) Error(argv[1],"File not found");
InitMem();
/* passe 1 */
pc=0;
line_n=1;
ParseSource(0);
/* passe 2 */
rewind(file_in);
pc=0;
line_n=1;
ParseSource(1);
fclose(file_in);
file_out=fopen(argv[2],"wb");
fwrite(mem,1,pc,file_out);
fclose(file_out);
return 0;
}
method.