-2

I am trying to write a c++ program that assembles MIPS instructions. While debugging, it keeps throwing an error at line 74 of my main:

myassembler.add(lexed[i].labels[0], lexed[i].name, tokens, i);

my main is here:

#include <fstream>
#include <iostream>
#include <iomanip>
#include <memory>
#include <stdexcept>
#include <string>
#include <sstream>
#include <vector>

#include "exceptions.h"
#include "lexer.h"
#include "util.h"
#include "assembler.h"

std::string read_file(const std::string& name) {
  std::ifstream file(name);
  if (!file.is_open()) {
    std::string error = "Could not open file: ";
    error += name;
    throw std::runtime_error(error);
  }
  std::stringstream stream;
  stream << file.rdbuf();
  return std::move(stream.str());
}

int main(int argc, char** argv) {
  // Adjusting -- argv[0] is always filename.
  --argc;
  ++argv;

  if (argc == 0) {
    std::cerr << "Need a file" << std::endl;
    return 1;
  }

  assembler myassembler;


  for (int i = 0; i < argc; ++i) {
    std::string asmName(argv[i]);

    if (!util::ends_with_subseq(asmName, std::string(".asm"))) {
      std::cerr << "Need a valid file name (that ends in .asm)" << std::endl;
      std::cerr << "(Bad name: " << asmName << ")" << std::endl;
      return 1;
    }

    // 4 is len(".asm")
    auto length = asmName.size() - string_length(".asm");
    std::string baseName(asmName.begin(), asmName.begin() + length);
    std::string objName = baseName + ".obj";
    try {
      auto text = read_file(asmName);
      try {
        auto lexed = lexer::analyze(text);      // Parses the entire file and returns a vector of instructions

        for (int i =0; i < (int)lexed.size(); i++){

            if(lexed[i].labels.size() > 0)      // Checking if there is a label in the current instruction
                std::cout << "label = " << lexed[i].labels[0] << "\n";      // Prints the label

            std::cout<< "instruction name = " << lexed[i].name<< "\n";      // Prints the name of instruction


            std::cout << "tokens = ";
            std::vector<lexer::token> tokens = lexed[i].args;   
            for(int j=0; j < (int)tokens.size(); j++){       // Prints all the tokens of this instruction like $t1, $t2, $t3
                if (tokens[j].type == lexer::token::Integer)
                    std::cout << tokens[j].integer() << " ";
                else
                    std::cout << tokens[j].string() << " ";
            }
            myassembler.add(lexed[i].labels[0], lexed[i].name, tokens, i);
            myassembler.p();
            std::cout << "\n\n\n";
        }

      } catch(const bad_asm& e) {
        std::stringstream error;
        error << "Cannot assemble the assembly code at line " << e.line;
        throw std::runtime_error(error.str());
        } catch(const bad_label& e) {
         std::stringstream error;
        error << "Undefined label " << e.what() << " at line " << e.line;
        throw std::runtime_error(error.str());
        }

    } catch (const std::runtime_error& err) {
      std::cout << err.what() << std::endl;
      return 1;
    }
  }
  /*getchar();*/
  return 0;
}

assembler.h:

#include "lexer.h"
#include <fstream>
#include <vector>
#include <string>

struct symbol
{
    std::string label = "";
    int slinenum;

};

struct relocation
{
    std::string instruct = "";
    std::string label = "";
    int rlinenum;
    int rt = 0;
    int rs = 0;
};

struct opcode
{
    std::string instruct = "";
    int opc = 0;
    bool isloadstore = false;
    int extType = 0;
    bool isbranch = false;
};

struct function
{
    std::string instruct = "";
    int funct = 0;
    bool isjr = false;
    bool isshift = false;
};

struct regs
{
    std::string name;
    int num;
};

enum instrtype
{
    R, I, neither
};

class assembler
{
public:
    assembler();
    void oinit(void);
    void finit(void);
    void rinit(void);
    void printToFile(std::fstream &file);
    void savesymb(std::string label, int line);
    void saverel(std::string instr, std::string label, int line, int rt, int rs);
    std::vector<int> formatr(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line);
    int formatr(std::string instr, lexer::token toke, int line);
    std::vector<int> formati(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line);
    std::vector<int> formati(std::string instr, lexer::token toke1, lexer::token toke2, int line);
    int findnum(std::string regname);
    void add(std::string label, std::string instr, const std::vector<lexer::token> &tokens, int linen);
    void secAdd(void);
    int rassemble(std::string instr, int rd, int rs, int rt, int shamt);
    int iassemble(std::string instr, int rt, int rs, int imm);
    void p();
private:
    std::vector<int> results;
    std::vector<symbol> symbtable;
    std::vector<relocation> reloctable;
    std::vector<opcode> ops;
    std::vector<function> functions;
    std::vector<regs> registers;
    instrtype type = neither;
};

and assembler.cpp:

// ECE 2500
// Project 1: myAssembler
// assembler.cpp
// Sheila Zhu

#include "lexer.h"
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include "assembler.h"

assembler::assembler()
{
    oinit();
    finit();
    rinit();
}

void assembler::oinit()
{
    opcode myop;
    myop.instruct = "addi";
    myop.opc = 8;
    myop.extType = 1;
    ops.push_back(myop);
    // more of the same
}

void assembler::finit()
{
    function myfunc;
    myfunc.instruct = "add";
    myfunc.funct = 32;
    functions.push_back(myfunc);
    // more of the same
}

void assembler::rinit()
{
    regs myreg;
    myreg.name = "$zero";
    myreg.num = 0;
    registers.push_back(myreg);
    //more of the same
}

void assembler::printToFile(std::fstream &file)
{
    for (int i = 0; i < (int)results.size(); i++)
        file << results.at(i) << std::endl;
}

void assembler::savesymb(std::string label, int line)
{
    symbol symb;
    symb.label = label;
    symb.slinenum = line * 4;
    symbtable.push_back(symb);
}

void assembler::saverel(std::string instr, std::string label, int line, int rt, int rs)
{
    relocation re;
    re.instruct = instr;
    re.label = label;
    re.rlinenum = line;
    re.rt = rt;
    re.rs = rs;
}

int assembler::findnum(std::string regname)
{
    for (int i = 0; i < (int)registers.size(); i++)
    {
        if (regname == registers.at(i).name)
            return registers.at(i).num;
    }
    return -1;
}

std::vector<int> assembler::formatr(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line)
{
    int rd = 0, rs = 0, rt = 0, shamt = 0;
    std::vector<int> x;
    function currf;
    for (int i = 0; i < (int)functions.size(); i++)
    {
        if (instr == functions.at(i).instruct)
            currf = functions.at(i);
    }
    try
    {
        if (currf.isshift)
        {
            if (toke1.type == lexer::token::Integer)
                throw 1;
            else
            {
                rd = findnum(toke1.string());
                if (rd == -1)
                    throw 2;
            }
            if (toke2.type == lexer::token::Integer)
                throw 1;
            else
            {
                rs = findnum(toke2.string());
                if (rs == -1)
                    throw 2;
            }
            if (toke3.type == lexer::token::Integer)
            {
                shamt = toke3.integer();
                if (shamt < 0)
                    throw 3;
            }
            else
                throw 1;
        }
        else
        {
            if (toke1.type == lexer::token::Integer)
                throw 1;
            else
            {
                rd = findnum(toke1.string());
                if (rd == -1)
                    throw 2;
            }
            if (toke2.type == lexer::token::Integer)
                throw 1;
            else
            {
                rs = findnum(toke2.string());
                if (rs == -1)
                    throw 2;
            }
            if (toke3.type == lexer::token::Integer)
                throw 1;
            else
            {
                rt = findnum(toke3.string());
                if (rt == -1)
                    throw 2;
            }
        }
    }
    catch (int e)
    {
        if (e == 1)
            std::cerr << "Wrong argument in line " << line << std::endl;
        else if (e == 2)
            std::cerr << "Invalid register name in line " << line << std::endl;
        else
            std::cerr << "Shift amount cannot be negative in line " << line << std::endl;
    }
    x.push_back(rd);
    x.push_back(rs);
    x.push_back(rt);
    x.push_back(shamt);
    return x;
}

int assembler::formatr(std::string instr, lexer::token toke, int line)
{
    int rs = 0;
    try
    {
        if (toke.type == lexer::token::Integer)
            throw 1;
        else
        {
            rs = findnum(toke.string());
            if (rs == -1)
                throw 2;
        }
    }
    catch (int e)
    {
        if (e == 1)
            std::cerr << "Wrong argument in line " << line << std::endl;
        else
            std::cerr << "Invalid register name in line " << line << std::endl;

    }
    return rs;
}

    std::vector<int> assembler::formati(std::string instr, lexer::token toke1, lexer::token toke2, lexer::token toke3, int line)
{
    int rt = 0, rs = 0, imm = 0;
    std::vector<int> x;
    opcode currop;
    for (int i = 0; i < (int)ops.size(); i++)
    {
        if (instr == ops.at(i).instruct)
            currop = ops.at(i);
    }
    try
    {
        if (currop.isbranch)
        {
            if (toke1.type == lexer::token::Integer)
                throw 1;
            else
            {
                rt = findnum(toke1.string());
                if (rt == -1)
                    throw 2;
            }
            if (toke2.type == lexer::token::Integer)
                throw 1;
            else
            {
                rs = findnum(toke2.string());
                if (rs == -1)
                    throw 2;
            }
            if (toke3.type == lexer::token::Integer)
                imm = toke3.integer();
            else
                saverel(instr, toke3.string(), line, rt, rs);
        }
        else if (currop.isloadstore)
        {
            if ((instr == "lbu") || (instr == "sb"))
            {
                if (toke2.type == lexer::token::String)
                    throw 1;
                else
                {
                    if (toke2.integer() < 0)
                        imm = (0xFFFF << 16) + (0xFF << 8) + toke2.integer();
                    else
                        imm = toke2.integer();
                }
                if (toke1.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rt = findnum(toke1.string());
                    if (rt == -1)
                        throw 2;
                }
                if (toke3.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rs = findnum(toke2.string());
                    if (rs == -1)
                        throw 2;
                }
            }
            else
            {
                if (toke2.type == lexer::token::String)
                    throw 1;
                else
                {
                    if (toke2.integer() < 0)
                        imm = (0xFFFF << 16) + toke2.integer();
                    else
                        imm = toke2.integer();
                }
                if (toke1.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rt = findnum(toke1.string());
                    if (rt == -1)
                        throw 2;
                }
                if (toke3.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rs = findnum(toke2.string());
                    if (rs == -1)
                        throw 2;
                }
            }
        }
        else
        {
            if ((instr == "andi") || (instr == "ori"))
            {
                if (toke1.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rt = findnum(toke1.string());
                    if (rt == -1)
                        throw 2;
                }
                if (toke2.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rs = findnum(toke2.string());
                    if (rs == -1)
                        throw 2;
                }
                if (toke3.type == lexer::token::Integer)
                    imm = toke3.integer();
                else
                    throw 1;
            }
            else
            {
                if (toke1.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rt = findnum(toke1.string());
                    if (rt == -1)
                        throw 2;
                }
                if (toke2.type == lexer::token::Integer)
                    throw 1;
                else
                {
                    rs = findnum(toke2.string());
                    if (rs == -1)
                        throw 2;
                }
                if (toke3.type == lexer::token::Integer)
                {
                    if (toke3.integer() < 0)
                        imm = (0xFFFF << 16) + toke2.integer();
                    else
                        imm = toke3.integer();
                }
                else
                    throw 1;
            }
        }
    }
    catch (int e)
    {
        if (e == 1)
            std::cerr << "Wrong argument in line " << line << std::endl;
        else
            std::cerr << "Invalid register name in line " << line << std::endl;
    }
    x.push_back(rt);
    x.push_back(rs);
    x.push_back(imm);
    return x;
}

std::vector<int> assembler::formati(std::string instr, lexer::token toke1, lexer::token toke2, int line)
{
    int rt = 0, imm = 0;
    std::vector<int> rval;
    try
    {
        if (toke1.type == lexer::token::Integer)
            throw 1;
        else
        {
            rt = findnum(toke1.string());
            if (rt == -1)
                throw 2;
        }
        if (toke2.type == lexer::token::String)
            throw 1;
        else
            imm = toke2.integer();
    }
    catch (int e)
    {
        if (e == 1)
            std::cerr << "Wrong argument in line " << line << std::endl;
        else
            std::cerr << "Invalid register name in line " << line << std::endl;

    }
    rval.push_back(rt);
    rval.push_back(imm);
    return rval;
}

void assembler::add(std::string label, std::string instr, const std::vector<lexer::token> &token, int linen)
{
    int assembled = 0, rd = 0, rt = 0;
    std::vector<int> argh;
    int arg;
    if (label.length() > 0)
        savesymb(label, linen);
    for (int i = 0; i < (int)functions.size(); i++)
    {
        if (instr == functions.at(i).instruct)
            type = R;
    }
    for (int i = 0; i < (int)ops.size(); i++)
    {
        if (instr == ops.at(i).instruct)
            type = I;
    }
    if (type == R)
    {
        try
        {
            if (instr == "jr")
            {
                if ((int)token.size() == 1)
                {
                    arg = formatr(instr, token.at(0), linen);
                    assembled = rassemble(instr, rd, arg, rt, 0);
                }
                else
                    throw 1;
            }
            else
            {
                if ((int)token.size() == 3)
                {
                    argh = formatr(instr, token.at(0), token.at(2), token.at(3), linen);
                    assembled = rassemble(instr, argh[0], argh[1], argh[2], argh[3]);
                }
                else
                    throw 1;
            }
        }
        catch (int e)
        {
            if (e == 1)
                std::cerr << "Wrong number of arguments at line " << linen << std::endl;
        }
    }
    else if (type == I)
    {
        try
        {
            if (instr == "lui")
            {
                if ((int)token.size() == 2)
                {
                    argh = formati(instr, token.at(0), token.at(1), linen);
                    assembled = iassemble(instr, argh[0], 0, argh[1]);
                }
                else
                    throw 1;
            }
            else
            {
                if ((int)token.size() == 3)
                {
                    argh = formati(instr, token.at(0), token.at(1), token.at(2), linen);
                    assembled = iassemble(instr, argh[0], argh[1], argh[2]);
                }
                else
                    throw 1;
            }
        }
        catch (int e)
        {
            if (e == 1)
                std::cout << "Wrong number of arguments at line " << linen << std::endl;
        }
    }
    else
        std::cerr << "Instruction not recognized at line " << linen << std::endl;
    results.push_back(assembled);
}

void assembler::secAdd(void)
{
    std::vector<int>::iterator iter = results.begin();
    for (int i = 0; i < (int)reloctable.size(); i++)
    {
        for (unsigned int j = 0; j < symbtable.size(); j++)
        {
            if (reloctable.at(i).label == symbtable.at(j).label)
            {
                int assembled = 0;
                iter += (reloctable.at(i).rlinenum / 4);
                for (unsigned int k = 0; k < ops.size(); k++)
                {
                    if (reloctable.at(i).instruct == ops.at(k).instruct)
                        type = I;
                }
                if (type == I)
                    assembled = iassemble(reloctable.at(i).instruct, reloctable.at(i).rt, reloctable.at(i).rs, symbtable.at(i).slinenum);
                else
                    std::cerr << "Instruction not recognized at line " << reloctable.at(i).rlinenum << std::endl;
                results.erase(iter);
                results.insert(iter, assembled);
            }
        }
    }
}

int assembler::rassemble(std::string instr, int rd, int rs, int rt, int shamt)
{
    int func = 0;
    int code = 0;
    for (int i = 0; i < (int)functions.size(); i++)
    {
        if (instr == functions.at(i).instruct)
        {
            func = functions.at(i).funct;
            break;
        }
        else
        {
            if (i == (functions.size() - 1))
                return -1;
        }
    }
    code = (rs << 21) + (rt << 16) + (rd << 11) + (shamt << 6) + func;
    return code;
}

int assembler::iassemble(std::string instr, int rt, int rs, int imm)
{
    int op = 0;
    int code = 0;
    for (int i = 0; i < (int)ops.size(); i++)
    {
        if (instr == ops.at(i).instruct)
        {
            op = ops.at(i).opc;
            break;
        }
        else
        {
            if (i == (ops.size() - 1))
                return -1;
        }
    }
    code = (op << 26) + (rs << 21) + (rt << 16) + imm;
    return code;
}

void assembler::p()
{
    for (int i = 0; i < (int)results.size(); i++)
        std::cout << results.at(i) << " ";
    std::cout << std::endl;
}

When debugging, the tokens parameter triggers the error, and the this pointer in the vector code shows that the vector size changes to 0 at these lines:

#if _ITERATOR_DEBUG_LEVEL == 2
        if (size() <= _Pos)

What exactly is happening?

Sorry if my formatting is bad/wrong, etc., and please let me know if I should make any edits/provide more code.

Thanks in advance.

fuwafuwa
  • 11
  • 3
  • Do you see output from `std::cout << "label = " << lexed[i].labels[0] << "\n";` just before the error? Is it possible that `lexed[i].labels` is empty? – Orest Hera Oct 03 '15 at 21:37
  • Yes, I did see output. The pointer this was not empty, but changes to empty between the lines `#if _ITERATOR_DEBUG_LEVEL == 2 ` and `if (size() <= _Pos)` – fuwafuwa Oct 03 '15 at 21:48
  • It is not about the pointer. The pointer points to `labels` object. However, that object may be empty. Check `myassembler.add(lexed[i].labels.empty() ? "" : lexed[i].labels[0], lexed[i].name, tokens, i);` – Orest Hera Oct 03 '15 at 21:50
  • Oh, you're talking about the labels object! I forgot about that! Thanks, that might be it. – fuwafuwa Oct 03 '15 at 22:00

1 Answers1

0

The error is caused by accessing by index vector element that does not exist. In you case lexed[i] should be valid. So, the only possible issue may be with empty labels vector. Validate this vector before accessing its elements, for example

myassembler.add(lexed[i].labels.empty() ? "" : lexed[i].labels[0],
    lexed[i].name, tokens, i);

Actually there is one more bug for very large lexed arrays when integer index may overflow. You should not cast result of .size() to int. Instead proper type should be used for i:

for (size_t i = 0; i < lexed.size(); i++)
Orest Hera
  • 6,706
  • 2
  • 21
  • 35