0

this question follows the discussion of flex+bison output in a glib's hash container Let me repost it (last post remained unanswered after some discussions.) I want to parse a bibtex file using flex and bison, and will display those data using gtk library(in C). The lexer is

%{
#include "bib.tab.h"
%}

%%
[A-Za-z][A-Za-z0-9]*      { yylval.sval = strdup(yytext); return KEY; }
\"([^\"]|\\.)*\"|\{([^\"]|\\.)*\}  { yylval.sval = strdup(yytext); return VALUE; }
[ \t\n]                   ; /* ignore whitespace */
[{}@=,]                   { return *yytext; }
.                         { fprintf(stderr, "Unrecognized character %c in input\n", *yytext); }
%%

and the parser is:

%{
#include <stdio.h>
#include <glib.h>
#include <gtk/gtk.h>
#include <string.h>
#include <glib/gstdio.h>
#include <fcntl.h>
enum
{
  COL_BIB_KEY=0,
  COL_BIB_TYPE, COL_BIB_AUTHOR, COL_BIB_YEAR,
  NUM_COLS} ;
#define slen 1024
GHashTable* table;
GtkTreeIter siter;
GtkListStore *store;
%}

// Symbols.
%union
{
    char    *sval;
};
%token <sval> VALUE
%token <sval> KEY
%token OBRACE
%token EBRACE
%token QUOTE
%token SEMICOLON 

%start Input
%%
Input: 
     /* empty */ 
     | Input Entry ;  /* input is zero or more entires */
Entry: 
     '@' KEY '{' KEY ','{ g_hash_table_insert(table, g_strdup("TYPE"), g_strdup($2));
                  g_hash_table_insert(table, g_strdup("ID"), g_strdup($4));
          g_printf("%s:%s\n","KEY=>",g_hash_table_lookup(table,"TYPE"));
//                  g_printf("%s: %s\n", $2, $4);
              } 
     KeyVals '}' 
     ;
KeyVals: 
       /* empty */ 
       | KeyVals KeyVal ; /* zero or more keyvals */
KeyVal: 
      KEY '=' VALUE ',' { g_hash_table_insert(table, g_strdup($1), g_strdup($3));
//                          g_printf("%s: %s\n", $1, $3); 
              };

%%

int yyerror(char *s) {
  printf("yyerror : %s\n",s);
}

int main(int argc, char** argv) {
gtk_init(&argc, &argv);
GtkWidget  *window = gtk_window_new (GTK_WINDOW_TOPLEVEL);    
  GtkWidget *tree=gtk_tree_view_new();
  setup_tree(tree);

gtk_container_add (GTK_CONTAINER (window), tree);
store= gtk_list_store_new (NUM_COLS, 
      G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING, G_TYPE_STRING);
  table = g_hash_table_new(g_str_hash, g_str_equal);
gint i;
do{
   g_hash_table_remove_all (table);
   yyparse();
   parse_entry (table);
gtk_tree_view_set_model (GTK_TREE_VIEW (tree), GTK_TREE_MODEL (store));
g_object_unref (store);
  }
  while(!EOF);
  g_hash_table_destroy (table);
gtk_widget_show_all (window);
gtk_main ();
return 0;
}
void parse_entry (GHashTable *table)
{
  GHashTableIter iter;
  gchar *key, *val;
  char *keys[] = {"id", "type", "author", "year", "title", "publisher", "editor", 
    "volume", "number", "pages", "month", "note", "address", "edition", "journal",
    "series", "book", "chapter", "organization", NULL};
  char *vals[] = {NULL,  NULL,  NULL, NULL, NULL,
    NULL,  NULL,  NULL, NULL, NULL,
    NULL,  NULL,  NULL, NULL, NULL,
    NULL,    NULL,  NULL, NULL, NULL};

  gchar **kiter;
  gint i;
  g_hash_table_iter_init (&iter, table);
  while (g_hash_table_iter_next (&iter, (void **)&key, (void **)&val))
  {
    for (kiter = keys, i = 0; *kiter; kiter++, i++)
    {
      if (!g_ascii_strcasecmp(*kiter, key))
      {
    vals[i] = g_strndup(val,slen);
//    g_printf("%s:%s\n",keys[i],g_hash_table_lookup(table,keys[i]));
    g_printf("%d=>%s:%s\n",i,keys[i],vals[i]);
    break;
      }
    }
  }
    gtk_list_store_append (store, &siter);
    gtk_list_store_set (store, &siter,
      COL_BIB_TYPE,         vals[COL_BIB_TYPE],
      COL_BIB_KEY,      vals[COL_BIB_KEY],
      COL_BIB_AUTHOR,       vals[COL_BIB_AUTHOR],
      COL_BIB_YEAR,         vals[COL_BIB_YEAR],
      -1);
}

void setup_tree(GtkWidget *tree){
GtkCellRenderer *renderer;
GtkTreeViewColumn *column;

renderer = gtk_cell_renderer_text_new ();
column = gtk_tree_view_column_new_with_attributes
("Type", renderer, "text",COL_BIB_TYPE , NULL);
gtk_tree_view_append_column (GTK_TREE_VIEW (tree), column);
renderer = gtk_cell_renderer_text_new ();
column = gtk_tree_view_column_new_with_attributes
("Author", renderer, "text", COL_BIB_AUTHOR, NULL);
gtk_tree_view_append_column (GTK_TREE_VIEW (tree), column);
renderer = gtk_cell_renderer_text_new ();
column = gtk_tree_view_column_new_with_attributes
("Year", renderer, "text",COL_BIB_YEAR, NULL);
gtk_tree_view_append_column (GTK_TREE_VIEW (tree), column);
g_printf("HIIIIIIIIIi");
}

The problem is on populating the hash table, and not the listview(I enclosed the list store so that people can see my final goal and suggest improvements.) If we put the line

g_printf("%s:%s\n",$1,g_hash_table_lookup(table,$1));

at line number 50, it prints the hash table's content correctly, but if we want the content by uncommenting line number 105, then only the last entry is parsed. So, my guess is I am not processing the hash file correctly (line no 97-107 may be?)

The makefile is:

CC=gcc -g
FLEX=flex
BISON=bison
LIBS=lfl
PROG=parse

${PROG}:bib.y bib.l
    ${BISON} -d bib.y
    ${FLEX} -i bib.l
    ${CC} lex.yy.c bib.tab.c `pkg-config --cflags --libs glib-2.0``pkg-config --cflags --libs gtk+-3.0` -${LIBS} -o $@

clean:
    rm -f lex.yy.c bib.tab.c ${PROG}
    touch bib.l bib.y

and a sample bibtex file is:

@Book{a1,
    Title="ASR",
    Publisher="oxf",
    author = "a {\"m}ook, Rudra Banerjee",
    Year="2010",
    Address="UK",
    Edition="1",
}
@Booklet{ab19,
    Author="Rudra Banerjee and A. Mookerjee",
    Title="Fe{\"Ni}Mo",
    Editor="sm1",
    Title="sm2",
    Publisher="sm3",
    Volume="sm4",
    Number="sm5",
    Pages="sm6",
    Month="sm8",
    Note="sm9",
    Key="sm10",
    Year="1980",
    Address="osm1",
    Edition="osm2",
}

I will be grateful if someone shows me some way to populate the hashtable correctly. Please help.

Community
  • 1
  • 1
BaRud
  • 3,055
  • 7
  • 41
  • 89

1 Answers1

0

It looks like you're just dumping all the data into a single hash table. So the first entry will go into the hash table unders the keys TYPE, ID, Title, Publisher, etc. Then the second entry will overwrite those same keys and values (except it uses Author instead of author), leading to a mix of the two entries.

If you want to use a hashtable, I would expect you'd need a hash table for each entry, and a hastable of hashtables to map IDs to hashtables containing the info for that entry. Alternately, you could parse each entry into a list or some other container structure, and have a single hashtable mapping IDs to lists/containers.

You're also leaking memory very rapidly, as you allocate new memory for each KEY or VALUE and then duplicate the memory to put it into the hashtable.

Chris Dodd
  • 119,907
  • 13
  • 134
  • 226