JavaScript algo: decompress a compressed string

Question

I wanted to write a function that takes a compressed string and outs the decompressed string.

A compressed string like a2b2c3 and the decompress string is aabbccc More examples would be

`a` -> `a`
`ab12` -> `abbbbbbbbbbbb`
`a3b2a2` -> `aaabbaa

I tried to implement it but it is really messy and buggy for compressed strings like ab12

function isNumeric(num) {
    if (num === '') return false
    if (num === null) return false
    return !isNaN(num)
  }
  

function decompress(compressedStr) {
    const array = compressedStr.split('')
    let prevChar, str = ''
    for(let i = 0; i < array.length; i++) {
        if(i === 0) {prevChar = array[i]}
        if(isNumeric(array[i])) {
            str += prevChar.repeat(Number(array[i]))
            prevChar = null
        } else {
            if(!prevChar) prevChar = array[i] 
            else {
                str += prevChar
                prevChar = array[i] 
            }
        }
    }

    return str
}

Now it works for a3b2a2 but it is buggy for cases like ab12 . Need help to rewrite this function to make it work.

What is `isNumeric`? Notice you'll need to scan more than one letter ahead for the whole number — Bergi, Oct 12 '21 at 20:51
@VLAZ the specific problem is that the function is buggy for inputs like `ab12` — Joji, Oct 12 '21 at 22:56
@Bergi oops I forgot to add that in the snippet. Fixed it. I am not good at regex though.. — Joji, Oct 12 '21 at 22:57
Here's a functional abomination that raises more questions than it answers: `str.split('').map(c=>(d=c-0,isNaN(d)?(a=c,b=1,e=0,f=c,a):(e=e*10+d,a=f.repeat(e-b),b=e,a))).join('')` — Wyck, Oct 21 '21 at 21:49

score 9 · Answer 1 · answered Oct 12 '21 at 20:53

9

You can use String#replace while capturing both the character and the number of times it is to be repeated.

function decompress(str) {
  return str.replace(/(\D)(\d+)/g, (_, g1, g2) => g1.repeat(g2));
}
console.log(decompress('a'));
console.log(decompress('ab12'));
console.log(decompress('a3b2a2'));

answered Oct 12 '21 at 20:53

Unmitigated

76,500
11
62
80

1

oh thanks. I am not good at Regex - how should I interpret it? also can I ask what is the time complexity for this solution? – Joji Oct 12 '21 at 22:58
@Joji Performance should not really be an issue unless you are dealing with extremely long strings. This just replaces all characters followed by digits with that character repeated that number of times. – Unmitigated Oct 12 '21 at 23:03
1

Hi it does. thanks for the answer. But I am also looking for a non regex solution... – Joji Oct 15 '21 at 18:16
@Joji I have added an alternative solution with a simple loop. – Unmitigated Oct 15 '21 at 21:10
fails on (malformed) strings that start with a digit `'1a'` or only digits`'12'` – pilchard Oct 21 '21 at 23:43
1

@pilchard What do you mean by it fails? For each of those strings, the return value is the original string. What do you think the result should be? It is never stated in the question. – Unmitigated Oct 21 '21 at 23:50
True, the question doesn't address how/if they would ever be handled, but it seems strange that those would be the only returned values with digits in them. – pilchard Oct 21 '21 at 23:53

x00 · Answer 2 · 2021-10-16T06:32:25.880

const is_digit = (ch) => '0' <= ch && ch <= '9'

function decompress(str) {
  if( str.length === 0 ) return ''

  if( is_digit(str[0]) ) return 'invalid input'

  const output = []
  let i = 0
  while(i !== str.length) {

    // collect non-digits into the `output`, stop at the end or at a digit
    while( is_digit(str[i]) === false ) {
      if( i === str.length ) return output.join('')
      output.push(str[i])
      ++i
    }

    if( i === str.length ) break

    // remember the `ch` with a span
    let ch = str[i-1]

    // parse the span
    let span = 0
    while( is_digit(str[i]) ) {
      if( i === str.length ) break
      span = span * 10 + Number(str[i])
      ++i
    }

    if( span === 0 ) {
      // don't forget about edge cases
      output.pop()
    } else {
      // span-1 because the first `ch` is already in the `output`
      for( let j = 0 ; j !== span-1 ; ++j ) {
        output.push(ch)
      }
    }
  }

  return output.join('')
}

// tests
[
  '',
  '12',   // edge case
  'a',
  'ab',
  'a0',   // another edge case
  'ab0',
  'a0b0',
  'a0b',
  'a1',   // yet another
  'a01',  // and another
  'ab1',
  'a1b1',
  'ab01',
  'ab12', // your favorite
  'a12b',
  'a3b2a2',
].forEach((test_case) => console.log('res:', decompress(test_case)))

This is not necessary an optimal solution in JS (there're a lot of magic in V8), so it needs to be tested. But it doesn't looks like the OP is writing a production code, but rather an exercise. So the main points here are

the code always goes forward and creates only the output, so it has an O(N) complexity in speed and memory
it doesn't use string concatenations (because under the hood... well you can not be sure, but you can assume that V8 creates new strings with each concatenation, and it can ruin the complexity)
it doesn't use Number.parseInt or something similar - because, again, that would require new strings to be created

Scott Sauyet · Answer 3 · 2021-10-20T19:55:41.360

I think the regex answer from user Unmitigated is your best bet. But if you want a non-regex solution, we can do this by iterating through the string one character at a time, updating the resulting string (r), the currently processing character (c) and the decimal digits (d) each time. reduce is good for this, with a {c, d, r} accumulator, and an individual character s input to the callback. It would look like this:

const decompress = ([...ss], {c, d, r} = 
  ss .reduce (
    ({c, d, r}, s) => '0' <= s && s <= '9'
      ? {c, d: d + s, r}
      : {c: s, d: '', r: r + c .repeat (+d || 1)},
    {c: '', d: '', r: ''}
  )
) =>  r + c .repeat (+d || 1);

['', 'a', 'ab12', 'a3b2a2', 'a3b2a2d', '1ab2c', 'ab3cd13ac'] .forEach (
  s => console .log (`"${s}": "${decompress (s)}"`)
)

Note that at the end of the reduce, we need to do a final calculation based on r, c, and d. There are alternatives to this, but I think they're all uglier.

The step-by-step values should make it clear, I hope:

decompress ('ab3cd13ac')
acc = {c: "",  d: "",   r: ""},                   s = "a"
acc = {c: "a", d: "",   r: ""},                   s = "b"
acc = {c: "b", d: "",   r: "a"},                  s = "3" 
acc = {c: "b", d: "3",  r: "a"},                  s = "c" 
acc = {c: "c", d: "",   r: "abbb"},               s = "d"
acc = {c: "d", d: "",   r: "abbbc"},              s = "1"
acc = {c: "d", d: "1",  r: "abbbc"},              s = "3" 
acc = {c: "d", d: "13", r: "abbbc"},              s = "a"
acc = {c: "a", d: "",   r: "abbbcddddddddddddd"}, s = "c"
acc = {c: "c", d: "",   r: "abbbcddddddddddddda"} ==> "abbbcdddddddddddddac"

The two uses of c .repeat (+d || 1) add a number of copies of the current character. If we have digits, we convert them to a number and then convert 0s to 1s. This means that we don't support plain 0s in the compressed string. That seems reasonable, but if you want to do so, you can simply replace both occurrences with c .repeat (d.length ? +d : 1).

Update

My note that there are solutions without that final calculation said that they are all uglier. I've thought a bit more, and this is not bad at all:

const decompress = ([...ss]) => ss .reduce (
  ({c, d, r}, s) => '0' <= s && s <= '9' 
    ? {c, d: d + s, r: r + c .repeat (+(d + s) - (d.length ? +d : 1))}
    : {c: s, d: '', r: r + s},
  {c: '', d: '', r: ''}
) .r;

['', 'a', 'ab12ef', 'a3b2a2', 'a3b2a2d', '1ab2c', 'ab3cd13ac'] .forEach (
  s => console .log (`"${s}": "${decompress (s)}"`)
)

Again, this solution does not support leading zeros in the counts.

The steps look like this:

decompress ('ab3cd13ac')
acc = {c: "",  d: "",   r: ""},                    s = "a"
acc = {c: "a", d: "",   r: "a"},                   s = "b"
acc = {c: "b", d: "",   r: "ab"},                  s = "3"
acc = {c: "b", d: "3",  r: "abbb"},                s = "c"
acc = {c: "c", d: "",   r: "abbbc"},               s = "d"
acc = {c: "d", d: "",   r: "abbbcd"},              s = "1"
acc = {c: "d", d: "1",  r: "abbbcd"},              s = "3"
acc = {c: "d", d: "13", r: "abbbcddddddddddddd"},  s = "a"
acc = {c: "a", d: "",   r: "abbbcddddddddddddda"}, s = "c"
acc = {c: "c", d: "",   r: "abbbcdddddddddddddac"} ==> take `r`

The tricky step is here:

acc = {c: "d", d: "1",  r: "abbbcd"},              s = "3"

We have a digit, '3', and so we look to the existing digits, now '1', convert both '13' and '1' to numbers, subtract them, giving 12 and so add twelve more 'd' characters.

If the next character after that had been '7', then we would do it again, subtracting 13 from 137 to get 124 and adding 124 more 'd' characters. In this manner, the r result always holds the appropriate result for the current string prefix, which is a nice feature. We could get leading zeroes to work by clamping the subtraction to always be at least 0, but there seems little need. If later decide we want that, it's easy enough to do.

If you really don't like the short variable names, we could also write

const decompress = ([...characters]) => characters .reduce (
  ({current, digits, result}, character) => '0' <= character && character <= '9' 
    ? {current, digits: digits + character, result: result + current .repeat (+(digits + character) - (digits.length ? +digits : 1))}
    : {current: character, digits: '', result: result + character},
  {current: '', digits: '', result: ''}
) .result;

but I find that a bit of a mouthful.

pilchard · Answer 4 · 2021-10-23T12:27:29.083

Iterating the string in reverse allows for simplification in the logic as one simply stores the number until a non-numeric is encountered.

Here accumulating the count as a number which in minor testing proved slightly faster than using a string but at the cost of slightly more convoluted code.

function decompress(str) {
  let num = 0, place = 0, res = '';
  for (let i = str.length - 1; i >= 0; i--) {
    const ch = str[i];
    if (isNaN(ch) || ch === ' ') {
      res = ch.repeat(place ? num : 1) + res;
      num = place = 0;
    } else num += ch * 10 ** place++;
  }

  return res;
}

console.log(decompress('a'));       //a
console.log(decompress('a0b0x'));   //x
console.log(decompress('ab12'));    //abbbbbbbbbbbb
console.log(decompress('a3b2a2'));  //aaabbaa
console.log(decompress('0a 3b2 a2')); //a   bb aa
console.log(decompress('a69546803').length);

Alternatively accumulating digits in a string to be coerced by repeat(), slightly slower in testing but concise code.

function decompress(str) {
  let num = '', res = '';
  for (let i = str.length - 1; i >= 0; i--) {
    const ch = str[i];
    if (isNaN(ch) || ch === ' ') {
      res = ch.repeat(num || 1) + res;
      num = '';
    } else num = ch + num;
  }

  return res;
}

console.log(decompress('a'));       //a
console.log(decompress('a0b0x'));   //x
console.log(decompress('ab12'));    //abbbbbbbbbbbb
console.log(decompress('a3b2a2'));  //aaabbaa
console.log(decompress('0a 3b2 a2')); //a   bb aa
console.log(decompress('a69546803').length);

You could also implement this as a generator function

const is_digit = (ch) => (typeof ch === 'string' && '0'.charCodeAt() <= ch.charCodeAt() && ch.charCodeAt() <= '9'.charCodeAt());

function* decompress(str) {
  let num = 0;
  for (let i = 0; i < str.length; i++) {
    const ch = str[i];
    if (!is_digit(ch)) {

      if (is_digit(str[i + 1])) {
        while (is_digit(str[i + 1])) {
          num = num * 10 + Number(str[++i]);
        }
      } else num = 1;

      for (; num > 0; num--) {
        yield ch;
      }
    }
  }
}

let str = '';
for (const char of decompress('a3b2a2')) {
  str += char;
}
console.log(str); // aaabbaa

console.log([...decompress('a')].join(''));         // a
console.log([...decompress('a0b0x')].join(''));     // x
console.log([...decompress('ab12')].join(''));      // abbbbbbbbbbbb
console.log([...decompress('a3b2a2')].join(''));    // aaabbaa
console.log([...decompress('0a 3b2 a2')].join('')); // a   bb aa

score 1 · Answer 5 · answered Oct 15 '21 at 21:07

Without regular expressions, you can directly loop through the characters until a non-digit character is reached, then consume all the digit characters after it.

function decompress(str) {
  let prevCh = '', num = '', res = '';
  function append() {
    if (prevCh)
      if (num) res += prevCh.repeat(num), num = '';
      else res += prevCh;
  }
  for (const ch of str) {
    if (isNaN(ch)) {
      append();
      prevCh = ch;
    } else num += ch;
  }
  append();
  return res;
}
console.log(decompress('a'));
console.log(decompress('ab12'));
console.log(decompress('a3b2a2'));

The best. But you forget about malformed strings like "a69546803" — 11_22_33, Oct 21 '21 at 20:04

J_K · Answer 6 · 2021-10-20T08:41:20.613

Not sure how to fix your solution, but here is one approach

let str = 'ab12'.split('')

function decompress(s) {
  for (let i = 1; i < s.length; i++) {
    if (!isNaN(s[i]) && !isNaN(s[i - 1])) {
      s[i - 1] = s[i - 1] + s[i]
      s.splice(i, 1)
    }
  }

  for (let i = 1; i < s.length; i++) {
    if (!isNaN(s[i])) {
      s[i - 1] = s[i - 1].repeat(Number(s[i]))
      s.splice(i, 1)
    }
  }
  return s.join('')
}


console.log(decompress(str))

JavaScript algo: decompress a compressed string

6 Answers6

Update