1

ANSI c on OSX 10.13.6
Apple LLVM version 9.1.0 (clang-902.0.39.2) Target: x86_64-apple-darwin17.7.0 Thread model: posix

I'm learning c

This is a function that manually (character-by-character) adds two character strings representing large numbers (that exceed the unsigned long long or double size).

It functions fine with any two strings 14 or less characters long, but segmentation fault 11 with any strings greater than 14 chars.

Changing the string's memory allocation method seems to have no effect (I.e. from
char[15] addend1; // not a ptr
to
char *addend1 = (char *) malloc(sizeof(char) * (16) ); // pointer

One things that's curious, is that it seems to segfault on the ...
for (int j = maxlength - 1 ; j >= 0; j--)
... prior to accessing either of addend1 or addend2, but I'm not able to find an error there or change it to prevent the segfault.

Am I misreading where the error arises, or could it be related to the for loop?

Successful run (less than 15 chars)

maxlength = 14
char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) ) ... DONE
for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } ... DONE
Start adding individual ints from end (right side) ...
13 ...12 ...11 ...10 ...9 ...8 ...7 ...6 ...5 ...4 ...3 ...2 ...1 ...0 ...main.sum = 28147497671064

UNSuccessful run (15 chars)

maxlength = 15
char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) ) ... DONE
for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } ... DONE
Start adding individual ints from end (right side) ...
Segmentation fault: 11

MAIN.c

#include <stdio.h>
#include <stdlib.h>
#include "../../c-library/include/addViaStrings.h"

int main(void) {
    //  s[0] = 72; s[1] = 101; s[2] = 108; s[3] = 108; s[4] = 111; s[5] = 32; s[6] = 87; s[7] = 111; s[8] = 114; s[9] = 108; s[10] = 100; s[11] = 0;

    // WORKS
    // char s1[] = "14073748835532";
    // char s2[] = "14073748835532";

    // FAILS
    char s1[] = "140737488355328";
    char s2[] = "140737488355328";

    char *sum = addNumericStrings(&s1, &s2);
    printf("main.sum = %s\n", sum);
}

addViaStrings.h

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>

char* addNumericStrings(char *s1, char *s2);
char leftPad(char *result, char *s, int maxlength);
int  findMaxLength(char *s1, char *s2);

char* addNumericStrings(char *s1, char *s2){
    // Find the length of the greater of the two
    int maxlength = findMaxLength(s1, s2);
    printf("maxlength = %d\n", maxlength); //333

    ///////////////////////////////////////////////
    // Using malloc instead of char[maxlength] seems to have NO EFFECT on the issue
    // char addend1[maxlength]; // not a pointer
    char *addend1 = (char *) malloc(sizeof(char) * (maxlength + 1) );
    addend1[maxlength + 1] = 0; // end flag

    // char addend2[maxlength]; // not a pointer
    char *addend2 = (char *) malloc(sizeof(char) * (maxlength + 1) );
    addend2[maxlength + 1] = 0; // end flag

    // Allocate sum pointer
    printf("char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) ) ... "); //333
    char *sum = (char *) malloc(sizeof(char) * (maxlength + 1) );
    printf("DONE\n"); //333

    // General use vars
    int a1, a2, total;
    int carry = 0;

    // Prepare the strings for manual addition. Pad the left with char 0s
    leftPad(addend1, s1, maxlength);
    leftPad(addend2, s2, maxlength);

    // Buffer sum with zeros
    sum[maxlength + 1] = 0; // end flag
    printf("for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } ... "); //333
    for (int i = 0; i < (maxlength); i++) { sum[i] = '0'; } // Fill w/ 0s
    printf("DONE\n"); //333

    // Run the manual addition
    // Start adding individual ints from end (right side)
    printf("Start adding individual ints from end (right side) ...\n"); //333

    // maxlength -1 because(I think) the termination char takes 2 bytes
    // If I use (maxlength) instead of (maxlength -1) I get a weird 
    //  question mark char at the end of returnsum
    for (int j = maxlength - 1 ; j >= 0; j--) {

        ///////////////////////////////////////////
        // The segfault seems to happen BEFORE accessing addend1 or addend2
        printf("%d ...", j); // 333 This DOES NOT print
        ///////////////////////////////////////////
        a1 = addend1[j] - '0'; // Convert to int
        a2 = addend2[j] - '0'; // Convert to int
        total = (a1 + a2 + carry);
        carry = 0;
        if ( total >= 10){
            carry += 1;
            total -= 10;
        }
        sum[j + 1] = '0'+total; // convert to ascii value for numbers (adding 48)
    }
    sum[0] = '0' + carry; // add last carry to start of num always, even if 0
    // Before returning, truncate leading zeros
    char *returnsum = (char *) malloc(sizeof(char) * (strlen(sum) + 1) );
    int sum_i = 0;
    int returnsm_i = 0;
//    bool truncate = true; // Find out why this wont compile
    int truncate = 1; // true
    while (1){
        // if order is important here
        if (sum[sum_i] == '\0') { break; } // we're done
        if (sum[sum_i] == '0' && truncate == 1) { sum_i += 1; continue; } // 1 is true
        // if a num, Stop truncating 0s but DO continue adding numbers
        if (sum[sum_i] != '0') { truncate = 0; } // 0 is false
        returnsum[returnsm_i] = sum[sum_i];
        returnsm_i += 1;
        sum_i += 1;
    }
    return returnsum;
}

char leftPad(char *result, char *s, int maxlength){
    int slength = strlen(s);
    // buffer with zeros, not '\0's
    for (int i = (maxlength); i >= 0; i--){ result[i] = '0'; }
    // right fill result with s
    for (int j = 0; j <= slength; j++){
        int index = ((maxlength - slength) + j);
        result[index] = s[j];
    }
    result[maxlength + 1] = 0;

}

int findMaxLength(char *s1, char *s2){
//  int length1 = findEndLength(s1);
//  int length2 = findEndLength(s2);
    int length1 = strlen(s1);
    int length2 = strlen(s2);
    int maxlength;
    (length1 > length2) ? (maxlength = length1) : (maxlength = length2);
    return maxlength;
}
Thomas Dickey
  • 51,086
  • 7
  • 70
  • 105
RightmireM
  • 2,381
  • 2
  • 24
  • 42
  • What does `findMaxLength` do? Or `leftPad`? A [mcve] would be very useful here – Sami Kuhmonen Nov 03 '18 at 13:12
  • @SamiKuhmonen Thanks for the comment. I've added them to the OP. – RightmireM Nov 03 '18 at 13:28
  • 3
    `sum[maxlength + 1] = 0;` is wrong: although you alocate `maxlength + 1` characters, the last valid index is just `maxlength`. (Indices in C go from `0..n-1`, remember?) – Paul Ogilvie Nov 03 '18 at 13:28
  • @PaulOgilvie Thanks for the comment. I chose `maxlength + 1` because the actual number of digits in the string are 15 (maxlength) and then I use the 16th index for the termination char. However, if I change it to `sum[maxlength] = 0;`, I get the same `segfault 11` - but now it starts erroring out at adding two 14 char long strings instead of two 15 char long strings. Adding two strings between 1 and 13 chars still works however. – RightmireM Nov 03 '18 at 13:37
  • 3
    Guess you should use a debugger to step through the program and see what happens. I never see you terminate the strings with `'\0'`. Does your `leftPad` work? – Paul Ogilvie Nov 03 '18 at 13:57
  • @PaulOgilvie Good point. I updated the OP to add `addend1[maxlength + 1] = 0; // end flag` for addend1 and 2. Originally, I had used a `char addend1[]` so I hadn't needed to manually add a terminator (**Is that correct?**) Unfortunately, the change doesn't seem to have affected the results. The `leftpad` does seem to work, but I'll double-check it. – RightmireM Nov 03 '18 at 14:04
  • 2
    Again: indices in C go from `0..n-1`. So if you allocate `maxlength+1` characters, then _the last index is_ `(maxlenght+1)-1`, i.e. `maxlength`. `addend1[maxlength+1`] __does not exist__. – Paul Ogilvie Nov 03 '18 at 14:09
  • @PaulOgilvie Good point. Although, since I'm declaring using `malloc (maxlength + 1)` (**NOT** `char[maxlength]`) then doesn't that mean that I have "maxlength" for actual characters and use index (maxlength + 1) for the terminator? BTW Should we move this to chat? – RightmireM Nov 03 '18 at 14:15
  • 1
    You are correct that if you declare a `char` array without explicit length and provide a string literal initializer, then you will automatically get the initializer's terminator in your array, along with the rest of its contents. – John Bollinger Nov 03 '18 at 14:16
  • 2
    @RightmireM, the range of valid indexes does not depend on whether the array is allocated statically, automatically, or dynamically. If the length of the array is `arraylength` elements, then the valid indexes are `0`, `1`, ... `arraylength - 1`. – John Bollinger Nov 03 '18 at 14:19
  • @PaulOgilvie P.S. I added an explicit terminator to `leftPad()` in the form of `result[maxlength + 1] = 0;` ... but it doesn't change the behavior of the program (or the segfault) at all. – RightmireM Nov 03 '18 at 14:20
  • 2
    Therefore, whether you use `char[maxlength + 1]` or `malloc(maxlength + 1)`, the largest index you may use is `maxlength` (***not*** `maxlength + 1`). – John Bollinger Nov 03 '18 at 14:22
  • 2
    Dear Mr. Rightmire. We seem not able to get it into your head that the _size_ of an array is one more than the index of its last element and that a string requires _one more_ character for its terminating null character. So `"hello"` has 6 characters, `'h' 'e', 'l', 'l', 'o', '\0'`. `'h'` occupies position `0` and the `'\0'` occupies position `5`. – Paul Ogilvie Nov 03 '18 at 14:38
  • 1
    ... and that, therefore, the index of the terminator of `"hello"` is ***5***, not ***6***. That is, the index of a string's terminator is the same as that string's length. All of this follows from counting from 0 instead of from 1. – John Bollinger Nov 03 '18 at 14:49
  • @PaulOgilvie Hi Paul. No. I totally understand that :) It's the same in Python (my background). "H" in your example is `var[0]`, "e" is `var[1]`, etc. Nonetheless, the solution was, in fact, in the malloc of `sum`. It needed to be `maxlength + 2` ... so I guess, that was exactly your point. If you want to make that as an answer instead of comment, then I'll accept it for credit. Thanks! – RightmireM Nov 03 '18 at 14:52
  • For clarification, yes actually all the `maxlength + 1`s needed to be just `maxlength` and the `sum` variable needed to be `maxlength + 1` (one digit longer than the addends in case they carried a final 1). It was the failure of sum's allocation that caused the segfault. Thanks again for the help! – RightmireM Nov 03 '18 at 15:06
  • 1
    I am afraid you fixed a symptom, but not the cause. – Paul Ogilvie Nov 03 '18 at 15:09
  • 2
    First step: remove all dynamic allocation and use fixed sized buffers. Get the addition working. Next step: change to dynamic allocation. one by one. – wildplasser Nov 03 '18 at 16:18
  • @PaulOgilvie No. I believe the cause has also been resolved. The `addend`s have all had memory declared at `maxsize`, the sum (needing to be one character larger than the addends) have been declared at `maxsize + 1`. The addends' terminators (`\0`) are at `addends[maxsize - 1]`and the sum's terminator is at `sum[maxsize]`. What is still incorrect? – RightmireM Nov 03 '18 at 16:52

1 Answers1

0

The issue was I was trying to access the sum string as if it was one position longer than the addends strings, but I had declared it as the same length (I.e. maxlength + 1). So I was attempting to access one position past the actual sum array.

This was a somewhat hidden problem, because it was not until the length of sum needed to be greater than 15, that this access error stepped into disallowed memory space, resulting in a segfault.

Details

Because the sum of two addends could conceivably require at least one additional position in the array if the sums carried over a one (I.e. 999 + 999 = 1998), the sum string needs to be one array position longer than the addends.

If the addends were 3 digits long (length of array = 4) then the sum needed to be 4 digits long (array length = 5).

// Correct code if "maxlength" (number of actual digits in string) = 14
char *addend1 = (char *) malloc(sizeof(char) * (maxlength + 1) ); // +1 To include termination byte
char *addend2 = (char *) malloc(sizeof(char) * (maxlength + 1) ); // +1 To include termination byte
char *sum     = (char *) malloc(sizeof(char) * (maxlength + 2) ); // +2 To include termination byte, AND an extra char at the front

...so that the final actual digit character of sum is accessed via maxlength + 1

CORRECTED CODE

NOTE: Because calculating against maxlength as the number of digits (versus the length of the entire array including terminator) was confusing - as well as considered bad form, I have since learned - the following final code has been simplified to use more intuitive variables.

#include <stdio.h>
#include <stdlib.h>

char* addIntsAsStrings(char *s1, char *s2);

char* addIntsAsStrings(char *s1, char *s2){
    // Find the length of the greater of the two
    int length1 = strlen(s1);
    int length2 = strlen(s2);
    int addendDigits;
    (length1 > length2) ? (addendDigits = length1) : (addendDigits = length2);

    // We need separate strings of so they can be buffered with zeros
    // Create the string for the addends and buffer with zeros.
    char addend1[addendDigits + 1];
    char addend2[addendDigits + 1];
    for (int i = 0; i < (addendDigits) ; i++){ // "<" not "<="
        addend1[i] = '0'; // buffer w/ 0s
        addend2[i] = '0'; // buffer w/ 0s
    } // buffer w/ 0s
    addend1[addendDigits] = '\0'; // terminate

    // put s1 and s2 into buffered addends strings
    int s1_index = (strlen(s1) - 1);
    int s2_index = (strlen(s2) - 1);
    for (int i = (addendDigits - 1); i >= 0; i--){ //Start at back of addend
        if ( s1_index >= 0) { addend1[i] = s1[s1_index]; }
        if ( s2_index >= 0) { addend2[i] = s2[s2_index]; }
        s1_index -= 1;
        s2_index -= 1;
    }

    // Allocate sum pointer. The sum pointer needs to be ONE char
    //  longer than the addends, in the event that the addends need
    //  to carry a final one to the front. I.e. 999 + 999 = 1998
    int sumDigits = addendDigits + 1;
    char *sum = (char *) malloc(sizeof(char) * (sumDigits + 1) ); // +1 To include termination byte, AND an extra char at the front
    for (int i = 0; i < (sumDigits) ; i++){ // "<" not "<="
        sum[i] = '0'; // buffer w/ 0s
    }
    sum[sumDigits] = '\0';

    // Manual addition vars
    int a1, a2, total;
    int carry = 0;

    // Run the manual addition
    // Start adding individual ints from end (right side)
    for (int j = addendDigits - 1; j >= 0; j--) {
        a1 = addend1[j] - '0'; // Convert to int
        a2 = addend2[j] - '0'; // Convert to int
        total = (a1 + a2 + carry);
        carry = 0;
        if ( total >= 10){
            carry += 1;
            total -= 10;
        }
        // convert to ascii value for numbers (adding 48)
        sum[j + 1] = '0'+total; // sum[j + 1] because `sum`is always one index larger than the addends
    }
    sum[0] = '0' + carry; // add last carry to start of num always, even if 0

    // Before returning, truncate leading zeros
    char *returnsum = (char *) malloc(sizeof(char) * (strlen(sum) + 1) );
    int sum_i = 0;
    int returnsm_i = 0;
    int truncate = 1; // true
    while (1){
        // if order is important here
        if (sum[sum_i] == '\0') { break; } // we're done
        if (sum[sum_i] == '0' && truncate == 1) { sum_i += 1; continue; } // 1 is true
        // if a num, Stop truncating 0s but DO continue adding numbers
        if (sum[sum_i] != '0') { truncate = 0; } // 0 is false
        returnsum[returnsm_i] = sum[sum_i];
        returnsm_i += 1;
        sum_i += 1;
    }
    return returnsum;
}

Array Lengths

RightmireM
  • 2,381
  • 2
  • 24
  • 42