My goal is to analyze a text file, tokenize each word, then alphabetize each word with its word frequency.
Example:
Input: The house is on the ground on earth.
Output:
earth - 1
ground - 1
house - 1
is - 1
on - 2
the - 2
I have been able to open the file, read the file line by line, tokenize each word, converted the tokens to lowercase. I am stuck grouping and alphabetizing each token.
#include <stdio.h>
#include <stdlib.h>
void lower_string(char s[]);
int main()
{
FILE *file;
//char path[100];
char ch[100];
int characters;
/* Input path of files to merge to third file
printf("Enter source file path: ");
scanf("%s", path);
file = fopen(path, "r");*/
file = fopen("test.txt", "r"); //testing w.o repeated input
/* Check if file opened successfully */
if (file == NULL)
{
printf("\nUnable to open file.\n");
printf("Please check if file exists and you have read privilege.\n");
exit(EXIT_FAILURE);
}
const char delim[] = " ,.;!?[\n]";
char *token;
int tokenNum;
while (fgets(ch, sizeof(ch), file) != NULL)
{
lower_string(ch);
token = strtok(ch, delim);
while (token != NULL)
{
printf("Token:%s\n", token);
token = strtok(NULL, delim);
tokenNum++;
}
}
printf("%d\n", tokenNum); //total words testing
/* Close files to release resources */
fclose(file);
return 0;
}
void lower_string(char s[])
{
int c = 0;
while (s[c] != '\0')
{
if (s[c] >= 'A' && s[c] <= 'Z')
{
s[c] = s[c] + 32;
}
c++;
}
}
I have been looking into building and manipulating an ordered linked list of integers and binary search tree of integers. I'm having a hard time figuring out where I should begin to implement these features. So far i have been looking at the code below for ordered linked list.
#include <stdio.h>
#include <stdlib.h>
//These structures are declared globally so they are available to all functions
//in the program.
typedef struct list_node_s
{ //defines structure of one node
int key; //key value - here an integer
int count; //frequency key value encountered in input
struct list_node_s *restp; //pointer to the next node in list = NULL if EOL
} list_node_t;
typedef struct //defines head of list structure
{
list_node_t *headp; //pointer to first node in list, NULL if list is empty
int size; //current number of nodes in the list
} ordered_list_t;
//Prototypes
list_node_t * insert_in_order (list_node_t * old_listp, int new_key);
void insert (ordered_list_t * listp, int key);
int delete (ordered_list_t * listp, int target);
list_node_t * delete_ordered_node (list_node_t * listp, int target,int *is_deleted);
void print_list (ordered_list_t * listp);
#define SEND -999 //end of input sentinal
int main (void)
{
int next_key;
ordered_list_t my_list = {NULL, 0};
printf("\n\nProgram to build, display and manipulate (delete) an Ordered Linked List \n");
printf("\nAdapted from code in \"Problem Solving and Programming in C\" by J.R. Hanly and E.B. Koffman\n\n");
printf ("enter integer keys - end list with %d\n", SEND);
/* build list by in-order insertions*/
for (scanf ("%d", &next_key);
next_key != SEND;
scanf ("%d", &next_key))
{
insert (&my_list, next_key);
}
/* Display completed list */
printf ("\nOrdered list as built:\n");
print_list(&my_list);
/* Process requested deletions */
printf("enter key value for node to be removed from list or %d to end > ", SEND);
for (scanf ("%d", &next_key);
next_key != SEND;
scanf ("%d", &next_key))
{
if (delete (&my_list, next_key))
{
printf ("%d deleted.\n New list:\n", next_key);
print_list (&my_list);
}
else
{
printf ("No deletion. %d not found\n", next_key);
}
printf ("enter key value for node to be removed from list or %d to end > ", SEND);
}
return (0);
}
/* prints contents of a linked list Display the elements in the list pointed to by the pointer list.*/
void print_list (ordered_list_t * listp)
{
list_node_t * tmp;
for (tmp = listp->headp; tmp != NULL; tmp = tmp->restp)
printf ("key = %d; count = %d\n", tmp->key, tmp->count);
printf ("\n\n");
}
//Inserts a new node containing new_key into an existing list and returns a pointer to the first node of the new list
list_node_t * insert_in_order (list_node_t * old_listp, int new_key)
{
list_node_t * new_listp;
if (old_listp == NULL) //check for end of list (EOL)
{
new_listp = (list_node_t *) malloc (sizeof (list_node_t));
new_listp->key = new_key;
new_listp->count = 1;
new_listp->restp = NULL;
}
else if (old_listp->key == new_key) //check for matching key, increment count
{
old_listp->count++;
new_listp = old_listp;
}
else if (old_listp->key > new_key) //Next node key value > new key, so insert new node at current location
{
new_listp = (list_node_t *) malloc (sizeof (list_node_t));
new_listp->key = new_key;
new_listp->count = 1;
new_listp->restp = old_listp;
}
else
{
new_listp = old_listp;
new_listp->restp = insert_in_order (old_listp->restp, new_key);
}
return (new_listp);
}
//inserts a node into an ordered list_node_t
void insert (ordered_list_t * listp, int key)
{
++(listp->size);
listp->headp = insert_in_order (listp->headp, key);
}
//deletes the first node containing the target key from an ordered list; returns 1
//if target found & deleted, 0 otherwise (means target not in list)
int delete (ordered_list_t * listp, int target)
{
int is_deleted;
listp->headp = delete_ordered_node (listp->headp, target, &is_deleted);
if (is_deleted)
--(listp->size); //reduce current node count (size); keep size of list current
return (is_deleted);
}
/* deletes node containing target key from a list whose head is listp; returns a pointer
to the modified list (incase it is the first node, pointed to by listp), frees
the memory used by tyhe deleted node and sets a flag to indicate success (1) or
failure (0; usually means no such node found).
*/
list_node_t * delete_ordered_node (list_node_t * listp, int target, int *is_deleted)
{
list_node_t *to_freep, *ansp;
// if list empty, nothing to do; return NULL
printf ("check for empty list; target: %d \n", target);
if (listp == NULL)
{
*is_deleted = 0;
ansp = NULL;
}
//if first node is to be deleted, do it; relink rest of list to list header struct
else if (listp->key == target)
{
printf ("at first node; target: %d \n", target);
*is_deleted = 1;
to_freep = listp; //keeps track of node memory location to be freed
ansp = listp->restp;
free (to_freep); //release the memory of the deleted node for reuse
}
//if target exists, it is further down the list (recursive step), make recursive call
//to move down the list looking for the target value
else
{
printf ("chase down list to find: %d \n", target);
ansp = listp;
ansp->restp = delete_ordered_node (listp->restp, target, is_deleted);
}
return (ansp);
}
I'm finding it hard to implement that with strtok.
12/4 EDIT: added: Nodes for BST. Questions-
- Don't know if key needs to be tracked.(I assume it'll be useful to pull specific words).
- Where/how would I add the logic to alphabetize the tree.(study sources appreciated)
- How do I pass each word through this tree?
#define WLENGTH 100
//Base Node info
struct node
{
char word[WLENGTH];
int key;
int freq;
struct node *left, *right;
};
//Function to create a new node
struct node *newNode(char wordn, int item, int freqn)
{
struct node *temp = (struct node *) malloc(sizeof(struct node));
temp->word = wordn;
temp->key = item;
temp->freq = freqn;
temp->left = temp->right = NULL;
return temp;
}
//Function to place nodes in order
void inorder(struct node *root)
{
if (root != NULL)
{
inorder(root->left);
printf("%d ", root->key);
inorder(root->right);
}
}
/*Function to insert a new node with given key*/
struct node* insert(struct node* node, int key)
{
/* If the tree is empty, return a new node */
if (node == NULL)
return newNode(key);
/* Otherwise, recur down the tree */
if (key < node->key)
node->left = insert(node->left, key);
else if (key > node->key)
node->right = insert(node->right, key);
/* return the (unchanged) node pointer */
return node;
}