Simple 8-bit assembler in C

C, C++, Visual C++, C++.Net Topics
Post Reply
User avatar
SemiconductorCat
Major
Major
Posts: 455
Joined: Mon Aug 22, 2011 8:42 pm
Location: currently in hyperspace

Simple 8-bit assembler in C

Post by SemiconductorCat » Sun Jan 15, 2012 3:47 pm

hi all, Have you wondered how a assembler works?
Well if you have done a simple Undergraduate level compiler theory course[CSE@UoM do this in name of ['computational theory' or similar], then you could simply understand that.

Others may start to read a book like "Modern Compiler Implementation in Java Second Edition".

So here is the source code.
it's simple and it's only 500 lines. you can simply read and understand it.

Code: Select all

/*
 *  simu - Simple assembler.
 * 
 *  Copyright (c) 1995 Fabrice Bellard
 *
 *  Contact addresses:
 *  mail: Fabrice Bellard, 451 chemin du mas de Matour, 34790 Grabels, France
 *  email: [email protected]
 *  url: http://www.enst.fr/~bellard
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>

#define FORMAT_OP0  0
#define FORMAT_OP1  1
#define FORMAT_DB   2
#define FORMAT_END  3
#define FORMAT_PRINT 4
#define FORMAT_DBLED 5

typedef unsigned char uchar;
typedef unsigned int uint;

typedef struct {
	 char *name;
	 uchar opcode;
	 int format;
} INSTR_DESC;


INSTR_DESC instr_desc[]= {
	{"DBLED",0,FORMAT_DBLED},
	{"PRINT",0,FORMAT_PRINT},
	{"END",0,FORMAT_END},
  {"DB",0,FORMAT_DB},
  {"JNZ",0x00,FORMAT_OP0},
  {"JZ",0x01,FORMAT_OP0},
  {"JNC",0x02,FORMAT_OP0},
  {"JC",0x03,FORMAT_OP0},
  {"LDPC",0x04,FORMAT_OP0},
  {"STPC",0x05,FORMAT_OP0},
  {"LDMEM",0x06,FORMAT_OP0},
  {"STMEM",0x07,FORMAT_OP0},
  {"CLC",0x08,FORMAT_OP0},

  {"MOVR",0x10,FORMAT_OP1},
  {"MOVA",0x20,FORMAT_OP1},
  {"MOVI",0x30,FORMAT_OP1},
  {"MOVIH",0x40,FORMAT_OP1},
  {"RORR",0x50,FORMAT_OP1},
  {"ADDA",0x60,FORMAT_OP1},
  {"ADDR",0x70,FORMAT_OP1},
  {"SUBA",0x80,FORMAT_OP1},
  {"SUBR",0x90,FORMAT_OP1},
  {"ANDA",0xA0,FORMAT_OP1},
  {"ANDR",0xB0,FORMAT_OP1},
  {"ORA",0xC0,FORMAT_OP1},
  {"ORR",0xD0,FORMAT_OP1},
  {"XORA",0xE0,FORMAT_OP1},
  {"XORR",0xF0,FORMAT_OP1},
  {NULL,0,0}
};

/* conversion d'un code ascii en code affichable: à compléter */
int CvtLed(int a) {
	 int b;
	 switch(a) {
		case '0': b=0x3F; break;
		case '1': b=0x06; break;
		case '2': b=0x5B; break;
		case '3': b=0x4F; break;
		case '4': b=0x66; break;
		case '5': b=0x6D; break;
		case '6': b=0x7D; break;
		case '7': b=0x07; break;
		case '8': b=0x7F; break;
		case '9': b=0x6F; break;
		case 'A': b=0x77; break;
		case 'B': b=0x7C; break;
		case 'C': b=0x39; break;
		case 'D': b=0x5E; break;
		case 'E': b=0x79; break;
		case 'F': b=0x71; break;
		case 'R': b=0x50; break;
		case 'S': b=0x6D; break;
		case 'Y': b=0x6E; break;
		case 'L': b=0x38; break;
		case 'H': b=0x76; break;
		case 'I': b=0x06; break;
		case '-': b=0x40; break;
		case 'T': b=0x78; break;
		case ' ': b=0x00; break;
		case 'U': b=0x1C; break;
		case 'V': b=0x3E; break;
		case 'M': b=0x37; break;
		case 'O': b=0x3F; break;
		case 'N': b=0x54; break;
		case 'P': b=0x73; break;
		case 'G': b=0x6F; break;
			
		default: b=0x00; break;
	 }
	 return b ^ 0xFF;
}


FILE *file_in,*file_out;
int line_n;

void Error(char *name,char *s) {
	 fprintf(stderr,"\nError line %d: '%s': %s\n",line_n,name,s);
	 exit(1);
}


/* lexer */

#define SYM_NUM 1
#define SYM_ID  2
#define SYM_SHR 3
#define SYM_SHL 4
#define SYM_STR 5

#define ID_SIZEMAX 32 
int ch;

uint lex_num;
char lex_id[ID_SIZEMAX];
int lex_sym;
char lex_str[256];

void get_ch1(void) {
	 while (1) {
			ch=fgetc(file_in);
			if (ch!='#') break;
			do {
				 ch=fgetc(file_in);
			} while (ch!=EOF && ch!='\n');
	 }
	 if (ch=='\n') line_n++;
}

void get_ch(void) {
	 get_ch1();
	 ch=toupper(ch);
}

void get_lex(void) {
	 uint n;
	 char *s;

	 while (ch==' ' || ch=='\t' || ch=='\n' || ch=='\r') get_ch();

	 if (ch=='"') {
			get_ch1();
			s=lex_str;
			do {
				 *s++=ch;
				 get_ch1();
			} while (ch!='"');
			*s=0;
			lex_sym=SYM_STR;
			get_ch();
	 } else if (ch=='\'') {
			get_ch();
			lex_sym=SYM_NUM;
			lex_num=ch;
			get_ch();
			if (ch!='\'') Error("'","Expected");
			get_ch();
	 } else if (ch=='>') {
			get_ch();
			if (ch=='>') {
				 get_ch();
				 lex_sym=SYM_SHR;
			} else {
				 lex_sym='>';
			}
	 } else if (ch=='<') {
			get_ch();
			if (ch=='<') {
				 get_ch();
				 lex_sym=SYM_SHL;
			} else {
				 lex_sym='<';
			}
	 } else if (ch=='$') {
	   n=0;
	   get_ch();	   
	   while ((ch>='0' && ch<='9') || (ch>='A' && ch<='F'))  {
	     if (ch>='A') ch=ch-'A'+10; else ch=ch-'0';
	     n=16*n+ch;
	     get_ch();
	   }
	   lex_sym=SYM_NUM;
	   lex_num=n;
	 } else if (ch>='0' && ch<='9') {
			n=0;
			do {
				 n=n*10+(ch-'0');
				 get_ch();
			} while (ch>='0' && ch<='9');
			lex_sym=SYM_NUM;
			lex_num=n;
	 } else if (ch>='A' && ch<='Z') {
			s=lex_id;
			do {
				 *s++=ch;
				 get_ch();
			} while ((ch>='A' && ch<='Z') || (ch>='0' && ch<='9') || ch=='_');
			*s=0;
			lex_sym=SYM_ID;
	 } else {
			lex_sym=ch;
			get_ch();
	 }
}


/* parser */

#define MEMSIZE 32768

typedef struct _LABEL {
	 char name[ID_SIZEMAX];
	 uint value;
	 struct _LABEL *next;
} LABEL;

LABEL *label_first;
int pass_n;
uint pc;
uchar mem[MEMSIZE];

void AddLabel(char *name,int value) {
	 LABEL *l;
	 if (pass_n==1) return;
	 l=malloc(sizeof(LABEL));
	 strcpy(l->name,name);
	 l->value=value;
	 l->next=label_first;
	 label_first=l;
}

uint SearchLabel(char *name) {
	 LABEL *l;
	 
	 if (pass_n==0) return 0;
	 l=label_first;
	 while (l!=NULL) {
			if (strcmp(name,l->name)==0) {
				 return l->value;
			}
			l=l->next;
	 }
	 Error(name,"Label not found");
	 return 0;
}
	 

uint ParseExpr(void);

uint ParseAtom(void) {
	 uint n;
	 
	 switch (lex_sym) {
	 case SYM_NUM:
	   n=lex_num;
	   get_lex();
	   return n;
	 case SYM_ID:
	   n=SearchLabel(lex_id);
	   get_lex();
	   return n;
	 case '@':
	   get_lex();
	   return pc;
	 case '(':
	   get_lex();
	   n=ParseExpr();
	   if (lex_sym!=')') Error(")","Expected");
	   get_lex();
	   return n;
	 case '+':
	   get_lex();
	   return ParseAtom();
	 case '-':
	   get_lex();
	   return -ParseAtom();
	 default:
	   Error("?","Syntax Error");
	 }
}


uint ParseShift(void) {
	 uint n,m,op;
	 n=ParseAtom();
	 while (lex_sym==SYM_SHR || lex_sym==SYM_SHL) {
			op=lex_sym;
			get_lex();
			m=ParseAtom();
			switch(op) {
			 case SYM_SHR: n=n>>m; break;
			 case SYM_SHL: n=n<<m; break;
			}
	 }
	 return n;
}


uint ParseProd(void) {
	 uint n,m,op;
	 n=ParseShift();
	 while (lex_sym=='/' || lex_sym=='*' || lex_sym=='&') {
			op=lex_sym;
			get_lex();
			m=ParseShift();
			switch(op) {
			 case '*': n=n*m; break;
			 case '/': n=n/m; break;
			 case '&': n=n&m; break;
			}
	 }
	 return n;
}

uint ParseSum(void) {
	 uint n,m,op;
	 n=ParseProd();
	 while (lex_sym=='+' || lex_sym=='-' || lex_sym=='|') {
			op=lex_sym;
			get_lex();
			m=ParseProd();
			switch(op) {
			 case '+': n=n+m; break;
			 case '-': n=n-m; break;
			 case '|': n=n|m; break;
			}
	 }
	 return n;
}


uint ParseExpr(void) {
	 return ParseSum();
}



void ParseDB(void) {
	 char *s;
	 uint k;
	 
	 while (1) {
			switch (lex_sym) {
			 case SYM_STR:
				 s=lex_str;
				 while (*s) mem[pc++]=*s++;
				 get_lex();
				 break;
			 default:
				 k=ParseExpr();
				 mem[pc++]=k;
				 break;
			}
			if (lex_sym!=',') break;
			get_lex();
	 }
}

void ParseDBLED(void) {
	 char *s;
	 uint k;
	 
	 while (1) {
			switch (lex_sym) {
			 case SYM_STR:
				 s=lex_str;
				 while (*s) mem[pc++]=CvtLed(*s++);
				 get_lex();
				 break;
			 default:
				 k=ParseExpr();
				 mem[pc++]=CvtLed(k);
				 break;
			}
			if (lex_sym!=',') break;
			get_lex();
	 }
}

void ParseSource(int pass) {
  char id[ID_SIZEMAX];
  INSTR_DESC *p;
  uint k;

	pass_n=pass;
  get_ch();
  get_lex();
  printf("Pass %d\n",pass+1);
  while (1) {
    while (lex_sym==';') get_lex();
    if (lex_sym!=SYM_ID) Error("?","Opcode expected");
    strcpy(id,lex_id);
    get_lex();
    if (lex_sym==':') {
      get_lex();
      AddLabel(id,pc);
    } else {
      p=instr_desc;
      while (p->name!=NULL) {
	if (strcmp(p->name,id)==0) break;
	p++;
      }
      if (p->name==NULL) Error(id,"Unknown opcode");
			 switch(p->format) {
				case FORMAT_OP0:
					mem[pc++]=p->opcode;
				 break;
				case FORMAT_OP1:
				 k=ParseExpr();
					mem[pc++]=p->opcode | (k & 0xF);
				 break;
				case FORMAT_DB:
					ParseDB();
					break;
				case FORMAT_PRINT:
					k=ParseExpr();
					if (pass_n==1) printf("=%u\n",k);
					break;
				case FORMAT_DBLED:
					ParseDBLED();
					break;
				case FORMAT_END:
					return;
			 }
    }
  }
}

void InitMem(void) {
  uint i;
  for(i=0;i<MEMSIZE;i++) mem[i]=0;
}

int main(int argc,char *argv[]) {

	 if (argc!=3) {
	   fprintf(stderr,"Microprocessor Assembler (c) 1995 Fabrice Bellard\n");
	   fprintf(stderr,"usage: bdasm asm_source binary_image\n");
	   exit(0);
	 }
	 file_in=fopen(argv[1],"rt");
	 if (file_in==NULL) Error(argv[1],"File not found");

	 InitMem();

/* passe 1 */
	 pc=0;
	 line_n=1;
	 ParseSource(0);

/* passe 2 */
	 rewind(file_in);
	 pc=0;
	 line_n=1;
	 ParseSource(1);

	 fclose(file_in);

	 file_out=fopen(argv[2],"wb");
	 fwrite(mem,1,pc,file_out);
	 fclose(file_out);
	 return 0;
}

Code is simple. So take your pencil and rough paper and start to analyze it.Hint: start from the main
method.
User avatar
Saman
Lieutenant Colonel
Lieutenant Colonel
Posts: 828
Joined: Fri Jul 31, 2009 10:32 pm
Location: Mount Lavinia

Re: Simple 8-bit assembler in C

Post by Saman » Sun Jan 15, 2012 4:28 pm

Nice and quality code by one of the expert C programmers in the world Fabrice Bellard. You need to expect this quality and accuracy from a world class super coder as such. I was working with one of his popular transcoder implementation called ffmpeg. That is known as one of the best open source transcoders in the world fully written in C.

FYI: The standard reference for compilet theory is Compilers: Principles, Techniques, and Tools (2nd Edition).

Also, I doubt Java can do a better job than C in compiler programming since almost all popular compilers in the world are written in C including Java compiler itself ;)
User avatar
SemiconductorCat
Major
Major
Posts: 455
Joined: Mon Aug 22, 2011 8:42 pm
Location: currently in hyperspace

Re: Simple 8-bit assembler in C

Post by SemiconductorCat » Sun Jan 15, 2012 9:35 pm

FYMI: http://www.compilerconnection.com/books/books.htm with

I found learning mathematical theories using the java book easier.
it's chapter 3 section 2 PREDECTIVE PARSING explains the mechanism of this code.
---

another information is some people thinks that compilers are based on high mathematical
principles. It may true for the most popular industry standard compilers. But you just can
write a simple compiler with the basic Engineering Mathematics I knowledge of set theory
and number theory.[undergraduate level].

Even your not a C programmer and only working as a JAVA or JAVASCRIPT or PHP programmer
you may find many places where the knowledge and the theories will be useful again and again
in your job as a web programmer. For a example , parse the address input and get the city name
and find out whether delivery to that city/location is available to your shopping cart.

Even , these are algorithms [conventional theories where every CS student learning ] we can't
predict it's speed on it's implemented language. But the complexity of the algorithms that we've
used. For a example if we used a Simple Linked List to iterate the symbol table rather than a
double linked list, no matter it was written in Java or C it won't affect.
[ this is known as time complexity]

It's a myth that saying you can't write big applications without C. or these theories will useless
for a Java programmer.
http://bellard.org/jslinux/
^ a x86 emulator that run linux on your web browser written completely with javascript.there are
standalone javascript photoshop like program too developed by web developers. :| ,hay
don't look at me , I didn't wrote it. But it's booting and loading the linux isn't it?

--if you have more information , that will help code readers , please feel free to add them--
Post Reply

Return to “C/C++ Programming”