-1

Since my 8051 compiler doesn't have a feature that detects unused variables, I decide to try to implement my own, but it doesn't work.

When the program runs, it successfully identifies all labels by reading the main file then for each label name, it scans the entire file and for each line scanned, the following function is called:

findlabel(labelname,fileline);

Upon executing the program, it incorrectly identifies the following variables as unused:

PQR, MNO, TUV, and the null

The file that I am having this program continuously scan has the following contents:

ABC equ 1h
GHI equ 2h
JKL equ 3h
TUV equ 6h
MNO equ 4h
PQR equ 5h

cjne A,#ABC,def
  mov GHI,#1h
  mov JKL,MNO
def:

But MNO is used in the "mov JKL,MNO" line.

I also tried trimming out carriage returns and extra spacing and that was no help.

What am I doing wrong?

Source code follows:

void trim(char* astr){
  while (astr[0]==' ' || astr[0]=='\r'|| astr[0]=='\t' || astr[0]=='\n'){
    strcpy(astr,astr+1);
  }
  int sz=strlen(astr)-2;
  while(astr[sz]==' ' || astr[sz]=='\r'|| astr[sz]=='\t' || astr[sz]=='\n'){
    astr[sz]='\0';sz--;
  }
}

int findlabel(char* lbl,char*fline){
    int par,isdec=0;
    char* semicolon=strcasestr(fline,";");
    char* qs=strcasestr(fline,"';'");
    if (semicolon && !qs){
      //strip everything after semicolon if not quoted
      memcpy(fline,fline,semicolon-fline);
      fline[semicolon-fline]='\0';
    }
    trim(fline);
    char* spc=strcasestr(fline," "); //Make sure there's a space inbetween text
    if (spc){
      strcpy(fline,spc+1); // toss out command
      trim(fline);
      for (par=1;par<=3;par++){
        char ilbl[2000];
        char* comma=strcasestr(fline,",");
        if (comma){
        //found comma so strip it and save parameter to ilbl
        memcpy(ilbl,fline,comma-fline);
        ilbl[comma-fline]='\0';
        strcpy(fline,comma+1);
        }else{
        //no comma so run this loop one more time with last part of file line as parameter
        strcpy(ilbl,fline);par=99;
        }
        trim(ilbl);
        if (strcasecmp(ilbl,lbl)==0){isdec=1;par=99;} //first param = #label
        if (ilbl[0]=='#'){
          strcpy(ilbl,ilbl+1);
          if (strcasecmp(ilbl,lbl)==0){isdec=1;par=99;} //first param = label
        }
      }
    }
    return isdec;
}
Tim Biegeleisen
  • 502,043
  • 27
  • 286
  • 360
Mike -- No longer here
  • 2,064
  • 1
  • 15
  • 37
  • 2
    `strcpy(ilbl,ilbl+1);` is undefined behavior. Strings used in `strcpy` may not overlap. BTW: the strcpy isn't need. Simply use `ilbl+1` in the `strcasecmp` – Support Ukraine Apr 21 '18 at 07:11
  • Seems to me that you only check for the label in one position. So in `mov JKL,MNO` your code doesn't seem to compare the `lbl` to both `JKL` and `MNO`. In general your code seem too complex. Why not directly search for the label in the string to start with? – Support Ukraine Apr 21 '18 at 07:21
  • Please indent your source code correctly! – chqrlie Apr 21 '18 at 14:13
  • @4386427 Its because I don't want to have a match come up if the label is mixed in with another label name. For example, if I defined MNO, and there are labels in my program named LMNOP I don't want the system to think MNO is found just because its within LMNOP. – Mike -- No longer here Apr 21 '18 at 14:53

1 Answers1

-1

What am I doing wrong?

Approaching the problem with a custom solution without using any known patterns generally used to solve it, as it is a known problem.

This is a parsing problem, so you would need a state machine to lex tokens into a symbol table, and then increase the symbol count whenever they are seen. At the end of the program, check symbols whose count is zero, and they are the unused ones.

struct symbol_s {
    char *name;
    size_t count;

    struct symbol_s *next;
};

This struct is a list of name and count pairs, which is enough to track the number of symbols seen.

One distinction here when parsing is the declaration (or first time use) of a symbol and their subsequent use. In your example, we can define the declaration as any symbol that is followed by an equ string. This can later be extended or modified to include other types, such as labels.

With all that, a rather clumsy implementation can be written as:

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>


const char *source = " \
ABC equ 1h             \
GHI equ 2h             \
JKL equ 3h             \
TUV equ 6h             \
MNO equ 4h             \
PQR equ 5h             \
                       \
cjne A,#ABC,def        \
  mov GHI,#1h          \
  mov JKL,MNO          \
def:                   \
";

struct symbol_s {
    char *name;
    size_t count;

    struct symbol_s *next;
};

void symbol_append_child(struct symbol_s *head, struct symbol_s *next)
{
    struct symbol_s *it;

    assert(head != NULL);
    assert(next != NULL);

    it = head;

    while (it->next != NULL && (it = it->next)) { }

    it->next = next;
}

struct symbol_s *symbol_new(char *name)
{
    struct symbol_s *symbol;

    assert(name != NULL);

    symbol = calloc(1, sizeof(*symbol));
    symbol->name = strdup(name);

    return symbol;
}

void symbol_free(struct symbol_s *symbol)
{
    free(symbol->name);
    free(symbol);
}

int main()
{
    int cursor = 0;
    int c;
    struct symbol_s head = { 0 };

    while ((c = source[cursor++]) != 0) {
        if (isalpha(c)) {
            int begin_cursor = cursor - 1;

            while ((c = source[cursor++]) && isalnum(c)) {}

            if (isspace(c) && strncmp("equ", source + cursor, 3) == 0) {
                char buffer[512];
                size_t len = cursor - begin_cursor - 1;

                strncpy(buffer, source + begin_cursor, len);
                buffer[len] = 0;

                symbol_append_child(&head, symbol_new(buffer));
            } else {
                char buffer[512];
                size_t len = cursor - begin_cursor - 1;

                strncpy(buffer, source + begin_cursor, len);
                buffer[len] = 0;

                struct symbol_s *it;
                for (it = &head; it != NULL; it = it->next) {
                    if (it->name == NULL) {
                        continue;
                    }

                    if (strncmp(it->name, source + begin_cursor, len) == 0) {
                        it->count += 1;
                    }
                }
                continue;
            }
        }
    }

    // Print unused symbols
    struct symbol_s *it;
    for (it = &head; it != NULL; it = it->next) {
        if (it->name == NULL) {
            continue;
        }

        if (it->count == 0) {
            printf("Unused symbol: %s\n", it->name, it->count);
        }
    }

    // clean up
    struct symbol_s *prev;
    for (prev = NULL, it = &head; it != NULL; prev = it, it = it->next) {
        if (it->name == NULL) {
            continue;
        }

        if (prev != NULL && prev->name) {
            symbol_free(prev);
        }
    }
    symbol_free(prev);

    return 0;
}

Which prints:

Unused symbol: TUV
Unused symbol: PQR

For running example: https://ideone.com/b3KggJ

Although, as a reminder, you can do this more easily with regexp and a higher level language.

neuro_sys
  • 805
  • 7
  • 13