I'm creating a program to separate nonterminals from terminals for a programming language. An enumerator keeps track of the tag names for the nonterminals. The nonterminal ID is set as 0 and all others enumerate after (as it should). However, when the program finds the nonterminals, it prints a bizarre value of 1953167781, though some do come out as 0. The ones that seem to come out as 1953... are ones that are ordered like so:open int num = 7
, num
being the nonterminal. num
is not seen as 0 but as 1953167781. Though, if it's a declaration like: guarded class test{}
the nonterminal test
is 0, rather than 1953167781. Is there a possible reason for this phenomena? Thank you in advance for your feedback! Below are a picture of the output and an example of the process that take place.
EDIT
For example let's create headers to hold our declarations:
#ifndef __TOK_H__
#define __TOK_H__
#define MAX (1024)
typedef enum {
ID = 0,OPEN,CLOSED,GUARDED,ARTIFICIAL,STATIC,GLOBAL,CONT,SUPER,INT,FLOAT,CHAR,STRING,
BOOL,COLLECTION,CLASS,FUNCT,METHOD,STRUCT,ENUM,IF,ELSE,_OR_,DO,UNTIL,UNLESS,FOR,
FOREACH,IN,TRY,CATCH,EXCEPTION,RETURN,SKIP,BREAK,TERM,NEW,CALL,TRU,FALS,NIL,
INUMBER,FNUMBER,VCHAR,VSTRING,UNDEF,LT,GT,LE,GE,EQ,NE,AND,OR,ADD,ADDE,INCRE,SUB,SUBE,
DECRE,MULT,MULTE,DIV,DIVE,MOD,MODE,EX,EXE,INC,ASI,NOT,INHER,DOT,COM,LP,RP,LB,RB,LBR,
RBR,APO,QUO,SEMCO,EOFT,NAT
} tokentype;
typedef struct {
char str[MAX];
tokentype type;
} token;
void sscan(FILE *);
token generate(FILE *fp);
#endif
This one too:
#ifndef __RESERVED_H__
#define __RESERVED_H__
char *keywords[] = { //40 keywords/reserved words
"open","closed","guarded","artificial","static","global","cont","super",
"int","float","char","string","bool","collection","class","funct","method",
"struct","enum","if","else","or","do","until","unless","for","foreach","in",
"try","catch","exception","return","skip","break","term","new","call","true",
"false","null",
};
char *relative_operators[] = { //8 relops
"<",">","<=",">=","==","!=","&&","||",
};
char operators[6] = { //6 operators
"+-*/%^",
};
char delimeters[11] = { //11 delimeters
".,(){}[];'\"",
};
char unique_operators[] = { //5 unops
"#!&|="
};
#endif
And then the (while not efficient in any way) scanner and separator:
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "reserved.h"
#include "tok.h"
int linenum = 1;
char ch;
token TOKEN;
int isLangPunct(char ch);
int isKEY(char *word);
int isDELIM(char ch);
int isRELOP(char ch);
int isUNIQUE(char ch);
int isOPERA(char ch);
tokentype getKey(char *word);
tokentype getDel(char ch);
void sscan(FILE *fp) {
while ((ch = fgetc(fp)) != EOF) {
if (ch == '\n') {
linenum++;
continue;
} else if (isLangPunct(ch) || isalnum(ch) || isspace(ch)) {
continue;
} else {
fprintf(stderr, "Undefined character: %c at line %d.\n", ch, linenum);
}
}
rewind(fp); linenum = 1;
do {
generate(fp);
} while (TOKEN.type != EOFT);
}
int isLangPunct(char ch) {
int r = 0;
if (ch == '#' || ch == '|' || ch == '&' || ch == '=' || ch == '%' ||
ch == '!' || ch == '+' || ch == '-' || ch == '*' || ch == '/' ||
ch == '.' || ch == ',' || ch == '(' || ch == ')' || ch == ';' ||
ch == '{' || ch == '}' || ch == '[' || ch == ']' || ch == '\'' ||
ch == '"' || ch == '<' || ch == '>' || ch == '^')
{
r = 1;
} return r;
}
token generate(FILE *fp) {
char *word = malloc(sizeof(char) * MAX);
char *number = malloc(sizeof(char) * MAX);
int wi = 0, ni = 0;
while((ch = fgetc(fp)) != EOF) {
if (ch == '\n') {
linenum++; continue;
} else if (ch == '\t' || ch == ' ' || ch == '\r') {
continue;
} else if (isalpha(ch)) {
do {
word[wi++] = ch;
} while(isalpha(ch = fgetc(fp)));
word[wi] = '\0';
wi = 0;
strcpy(TOKEN.str, word);
if (isKEY(word)) {
TOKEN.type = getKey(word);
} else {
TOKEN.type = ID;
}
fseek(fp, -1, SEEK_CUR);
printf("%d ", (int)TOKEN.type);
return TOKEN;
} else if (isdigit(ch)) {
do {
number[ni++] = ch;
} while(isdigit(ch = fgetc(fp)));
if (ch == '.') {
do {
number[ni++] = ch;
} while(isdigit(ch = fgetc(fp)));
TOKEN.type = FNUMBER;
} else {
TOKEN.type = INUMBER;
}
number[ni] = '\0';
ni = 0;
strcpy(TOKEN.str, number);
printf("%s ", TOKEN.str);
fseek(fp, -1, SEEK_CUR);
return TOKEN;
} else if (isDELIM(ch)) {
TOKEN.type = getDel(ch);
char *str = &ch;
//working on getting strings and chars to have
//their own value types
strcpy(TOKEN.str, str);
printf("%s ", TOKEN.str);
return TOKEN;
} else if (isRELOP(ch) || isUNIQUE(ch)) {
switch (ch) {
case '<':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = LE;
strcpy(TOKEN.str, "<=");
} else {
TOKEN.type = LT;
strcpy(TOKEN.str, "<");
fseek(fp, -1, SEEK_CUR);
} break;
case '>':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = GE;
strcpy(TOKEN.str, ">=");
} else {
TOKEN.type = GT;
strcpy(TOKEN.str, ">");
fseek(fp, -1, SEEK_CUR);
} break;
case '=':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = EQ;
strcpy(TOKEN.str, "==");
} else {
TOKEN.type = ASI;
strcpy(TOKEN.str, "=");
fseek(fp, -1, SEEK_CUR);
} break;
case '!':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = NE;
strcpy(TOKEN.str, "!=");
} else {
TOKEN.type = NOT;
strcpy(TOKEN.str, "!");
fseek(fp, -1, SEEK_CUR);
} break;
case '&':
if ((ch = fgetc(fp)) == '&') {
TOKEN.type = AND;
strcpy(TOKEN.str, "&&");
} else {
fprintf(stderr, "Token Error: & missing at line %d.\n", linenum);
fseek(fp, -1, SEEK_CUR);
} break;
case '|':
if ((ch = fgetc(fp)) == '|') {
TOKEN.type = OR;
strcpy(TOKEN.str, "||");
} else {
TOKEN.type = INHER;
strcpy(TOKEN.str, "|");
fseek(fp, -1, SEEK_CUR);
} break;
case '#':
TOKEN.type = INC;
strcpy(TOKEN.str, "#");
}
printf("%s ", TOKEN.str);
return TOKEN;
} else if (isOPERA(ch)) {
switch (ch) {
case '+':
if ((ch = fgetc(fp)) == '+') {
TOKEN.type = INCRE;
strcpy(TOKEN.str, "++");
} else if (ch == '=') {
TOKEN.type = ADDE ;
strcpy(TOKEN.str, "+=");
fseek(fp, -1, SEEK_CUR);
} else {
TOKEN.type = ADD ;
strcpy(TOKEN.str, "+");
fseek(fp, -1, SEEK_CUR);
} break;
case '-':
if ((ch = fgetc(fp)) == '-') {
TOKEN.type = DECRE;
strcpy(TOKEN.str, "--");
} else if (ch == '=') {
TOKEN.type = SUBE;
strcpy(TOKEN.str, "-=");
fseek(fp, -1, SEEK_CUR);
} else {
TOKEN.type = SUB;
strcpy(TOKEN.str, "-");
fseek(fp, -1, SEEK_CUR);
} break;
case '*':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = MULTE;
strcpy(TOKEN.str, "*=");
} else {
TOKEN.type = MULT;
strcpy(TOKEN.str, "*");
fseek(fp, -1, SEEK_CUR);
} break;
case '/':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = DIVE;
strcpy(TOKEN.str, "/=");
} else if (ch == '/') {
do {
continue;
} while ((ch = fgetc(fp)) != '\n');
} else {
TOKEN.type = DIV;
strcpy(TOKEN.str, "/");
fseek(fp, -1, SEEK_CUR);
} break;
case '%':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = MODE;
strcpy(TOKEN.str, "%=");
} else {
TOKEN.type = MOD;
strcpy(TOKEN.str, "%");
fseek(fp, -1, SEEK_CUR);
} break;
case '^':
if ((ch = fgetc(fp)) == '=') {
TOKEN.type = EXE;
strcpy(TOKEN.str, "^=");
} else {
TOKEN.type = EX;
strcpy(TOKEN.str, "^");
fseek(fp, -1, SEEK_CUR);
} break;
}
printf("%s ", TOKEN.str);
return TOKEN;
}
}
free(word);
free(number);
TOKEN.type = EOFT;
strcpy(TOKEN.str, "\0");
printf("%s ", TOKEN.str);
return TOKEN;
}
int isKEY(char *word) {
int r = 0;
for (int i = 0; i < 40; i++) {
if (*word == *keywords[i]) {
r = 1;
}
}
return r;
}
int isDELIM(char ch) {
int r = 0;
for (int i = 0; i < 11; i++) {
if (ch == delimeters[i]) {
r = 1;
}
}
return r;
}
int isRELOP(char ch) {
int r = 0;
if (ch == '<' || ch == '>') {
r = 1;
}
return r;
}
int isUNIQUE(char ch) {
int r = 0;
for (int i = 0; i < 5; i++) {
if (ch == unique_operators[i]) {
r = 1;
}
}
return r;
}
int isOPERA(char ch) {
int r = 0;
for (int i = 0; i < 6; i++) {
if (ch == operators[i]) {
r = 1;
}
}
return r;
}
tokentype getKey(char *word) {
tokentype type;
if (!strcmp(word, "open")) { type = OPEN; }
else if (!strcmp(word, "closed")) { type = CLOSED; }
else if (!strcmp(word, "guarded")) { type = GUARDED; }
else if (!strcmp(word, "artificial")) { type = ARTIFICIAL; }
else if (!strcmp(word, "static")) { type = STATIC; }
else if (!strcmp(word, "global")) { type = GLOBAL; }
else if (!strcmp(word, "cont")) { type = CONT; }
else if (!strcmp(word, "super")) { type = SUPER; }
else if (!strcmp(word, "int")) { type = INT; }
else if (!strcmp(word, "float")) { type = FLOAT; }
else if (!strcmp(word, "char")) { type = CHAR; }
else if (!strcmp(word, "string")) { type = STRING; }
else if (!strcmp(word, "bool")) { type = BOOL; }
else if (!strcmp(word, "collection")) { type = COLLECTION; }
else if (!strcmp(word, "class")) { type = CLASS; }
else if (!strcmp(word, "funct")) { type = FUNCT; }
else if (!strcmp(word, "method")) { type = METHOD; }
else if (!strcmp(word, "struct")) { type = STRUCT; }
else if (!strcmp(word, "enum")) { type = ENUM; }
else if (!strcmp(word, "if")) { type = IF; }
else if (!strcmp(word, "else")) { type = ELSE; }
else if (!strcmp(word, "or")) { type = _OR_; }
else if (!strcmp(word, "do")) { type = DO; }
else if (!strcmp(word, "until")) { type = UNTIL; }
else if (!strcmp(word, "unless")) { type = UNLESS; }
else if (!strcmp(word, "for")) { type = FOR; }
else if (!strcmp(word, "foreach")) { type = FOREACH; }
else if (!strcmp(word, "in")) { type = IN; }
else if (!strcmp(word, "try")) { type = TRY; }
else if (!strcmp(word, "catch")) { type = CATCH; }
else if (!strcmp(word, "exception")) { type = EXCEPTION; }
else if (!strcmp(word, "return")) { type = RETURN; }
else if (!strcmp(word, "skip")) { type = SKIP; }
else if (!strcmp(word, "break")) { type = BREAK; }
else if (!strcmp(word, "term")) { type = TERM; }
else if (!strcmp(word, "new")) { type = NEW; }
else if (!strcmp(word, "call")) { type = CALL; }
else if (!strcmp(word, "true")) { type = TRU; }
else if (!strcmp(word, "false")) { type = FALS; }
else if (!strcmp(word, "null")) { type = NIL; }
return type;
}
tokentype getDel(char ch) {
tokentype type;
if (ch == '.') { type = DOT; }
if (ch == ',') { type = COM; }
if (ch == '(') { type = LP; }
if (ch == ')') { type = RP; }
if (ch == '{') { type = LB; }
if (ch == '}') { type = RB; }
if (ch == '[') { type = LBR; }
if (ch == ']') { type = RBR; }
if (ch == '"') { type = QUO; }
if (ch == '\'') { type = APO; }
if (ch == ';') { type = SEMCO; }
return type;
}
In a separate or even on the same file as above, main()
is able to open a file and read it sending the file stream to sscan(FILE *)
. And for the last piece a example file to read:
# call Sys;
# call SysIO;
static int num = 7;
cont global float fl = 12.895;
cont char letter = 'h';
collection Program {
cont closed string aswkey = "abdcbabcdabdbcdab";
open class quiz {
bool decision = true;
artificial method.study (int time) {};
open quiz() {
//do something
};
};
class test | quiz {
bool descision;
super method.study (int time) {
if (decision == true) {
//do something
} or if (time == 0) {
//do something
} else {
//do nothing
};
time = 50;
};
guarded test (bool n) {
descision = n;
};
};
funct.Enter () {
quiz Quiz = new quiz();
test Test = new test(false);
Test.study(60);
if (something != this) {
//do something
};
term;
};
};