7

My application is storing multiple document types in the same bucket. I know it is not good practice but I have a limit on how many buckets I can create on my server and there is not way around it at the moment. The documents are prefixed with their types so when I get a document I just need to concatenate the prefix and the id to get the key and I can do a key lookup.

I need to create a report which gents information from more than one document types.

My map looks like this:

function(doc, meta) {

  var getStep = function(stepName, exit, mapper) {
    if (meta.id.indexOf(stepName) !== -1) {
      var hotelId = parseInt(meta.id.replace(stepName + '_', ''));
      if (hotelId > 0) {
        var result = {
          hotelId: hotelId,
          exit: exit
        };
        if (mapper !== undefined) {
          mapper(result);
        }
        return result;
      }
    }
    return null;
  };

  var photos = getStep('PHOTOS', 7);
  if (photos != null) {
    emit(photos.hotelId, photos);
  }
  var pricing = getStep('PICR', 5);
  if (pricing != null) {
    emit(pricing.hotelId, pricing);
  }
  var owner = getStep('OWNER', 1);
  if (owner != null) {
    emit(owner.hotelId, owner);
  }
  var amenity = getStep('AM', 4);
  if (amenity != null) {
    emit(amenity.hotelId, amenity);
  }
  var description = getStep('HDESC', 3, function(result) {
    result.description = doc.description;
    result.hotelRoomTypeId = doc.hotelRoomTypeId;
    result.starRating = doc.starRating;
  });
  if (description != null) {
    emit(description.hotelId, description);
  }
  var contact = getStep('DC', 3, function(result) {
    result.email = doc.emailAddress;
    result.contact = doc.mainContactName;
  });
  if (contact != null) {
    emit(contact.hotelId, contact);
  }
  var location = getStep('LOC', 2, function(result) {
    result.city = doc.cityName;
    result.zip = doc.postalCode;
    result.country = doc.countryName;
    result.street = doc.stateName + ', ' + doc.streetName;
  });
  if (location != null) {
    emit(location.hotelId, location);
  }
  var property = getStep('PRP', 1, function(result) {
    result.paymentMethodId = doc.paymentMethodId
  });
  if (property != null) {
    emit(property.hotelId, property);
  }
}

It generates this output:

"total_rows":...,"rows":[
{"id":"DC_1","key":1,"value":{"hotelId":1,"exit":3,"email":"test@example.com","contact":"Jeno"}},
{"id":"HDESC_1","key":1,"value":{"hotelId":1,"exit":3,"description":".","hotelRoomTypeId":0,"starRating":5}},
{"id":"LOC_1","key":1,"value":{"hotelId":1,"exit":2,"city":"Barcelona","zip":"1222","country":"Spain","street":"Catalonia, someplacenice"}},
{"id":"PRP_1","key":1,"value":{"hotelId":1,"exit":1}},
{"id":"PRP_2","key":2,"value":{"hotelId":2,"exit":1}},
{"id":"AM_3","key":3,"value":{"hotelId":3,"exit":4}},
{"id":"AM_4","key":4,"value":{"hotelId":4,"exit":4}},
{"id":"PHOTOS_4","key":4,"value":{"hotelId":4,"exit":7}},
{"id":"PRP_4","key":4,"value":{"hotelId":4"exit":1}},
{"id":"AM_4","key":4,"value":{"hotelId":4,"exit":4}},
{"id":"PRP_4","key":4,"value":{"hotelId":4,"exit":1}},
{"id":"PHOTOS_5","key":5,"value":{"hotelId":5,"exit":7}}
...

]

I am trying to group the date by hotelId which is the new key and merge the fields to one document with a custom reducer. I am getting different errors depending on the error type but all errors seem to indicate that there is a limit on how much date the reducer can return. If I change the return type from an object to an associative array which works pretty much the same way I get a better error.

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {
    var results = {}; // Object!
    for (var i = 0; i < values.length; i++) {
      var row = values[i];
      if (!results[row.hotelId]) {
        results[row.hotelId] = {
          phone: '',
          exit: 1
        };
      }
      var result = results[row.hotelId];
      for (var name in row) {
        result[name] = row[name];
      }
      if (row.exit > row.exit) {
        result.exit = row.exit;
      }
    };

    return results;
  }
}

Gives me RangeError: Maximum call stack size exceeded

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {
    var results = []; // Array!
    for (var i = 0; i < values.length; i++) {
      var row = values[i];
      if (!results[row.hotelId]) {
        results[row.hotelId] = {
          phone: '',
          exit: 1
        };
      }
      var result = results[row.hotelId];
      for (var name in row) {
        result[name] = row[name];
      }
      if (row.exit > row.exit) {
        result.exit = row.exit;
      }
    };

    return results;
  }
}

Gives me reduction too large error

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {    
    return values;
  }
}

Gives me RangeError: Maximum call stack size exceeded

If I run:

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {        
    return values.length;
  }
}

I get back:

[ 68, 72, 65, 66, 68, 68, 70, 114 ]

The JavaScript engine should be able to reduce arrays with max 114 size and the output data should be even smaller. Apparently there is a limit on how much data a reduce can return max_kv_size_per_doc which is 1Mb also there is a 10 secs execution limit but in my case it is something else. Is there a way to get around these limits by changing the algorithm, returning array or arrays or something? Is there something I can do in the map or some tricks I can use in rereduce?

Jeno Laszlo
  • 2,023
  • 18
  • 36
  • a) having multiple document types in a bucket is not a bad practice; it's very common, and often recommended. – Matthew Groves Jul 10 '17 at 13:44
  • b) what version of Couchbase are you using? is N1QL not an option for you? – Matthew Groves Jul 10 '17 at 13:44
  • c) "Maximum call stack size exceeded" sounds like a JavaScript error; ideally you could debug through it and find out where it's coming from, but not sure how to do that – Matthew Groves Jul 10 '17 at 13:51
  • 1
    re: b) We use 3.0.1 but have a few 4.5.1 instances. I would definitely upvote a working N1QL solution but hoping for a map reduce one. – Jeno Laszlo Jul 10 '17 at 13:51
  • re: c) yeah. I got the raw JSON and I had no issues converting it with PowerShell – Jeno Laszlo Jul 10 '17 at 13:53
  • For N1QL, a series of UNION queries wrapped by a aggregation would work, I think. E.g `SELECT a.foo, SUM(b.bar) FROM ( ... unions ...) GROUP BY a.foo` – Matthew Groves Jul 10 '17 at 13:54
  • re: a) I think I read that having similar docs in a bucket is better if you want to add indexes, etc. I am not sure if it is related to the docs having the same schema or about concerns that certain indexes will degrade the performance of the whole bucket (even for stuff which wouldn't benefit from the indexes). I could be totally wrong on this – Jeno Laszlo Jul 10 '17 at 13:56
  • @Jeno, unfortunately it is not obvious what input parameters in your `function(key, values, rereduce)` are and how do you call this function. Could you please add relevant lines of javascript code to your question? – Kosh Jul 10 '17 at 20:06
  • @Kosh the function with the keys, values and rereduce argument is called the reducer. https://developer.couchbase.com/documentation/server/3.x/developer/dev-guide-3.0/reduce-rereduce.html – Jeno Laszlo Jul 10 '17 at 23:38
  • @Jeno, thanks for the link. I meant where in your code you call this function to execute and what you send as parameters. – Kosh Jul 10 '17 at 23:49
  • @KoshVery I run this script in a View. CB Console -> Views. Each views has a Map and a Reduce part. The view can be accesses by url as well. The url params are: ?stale=false&inclusive_end=false&connection_timeout=60000&limit=10&skip=0 It means it is a dev views, the limit doesn't affect the outcome because the reduce goes through all values. – Jeno Laszlo Jul 11 '17 at 03:37

1 Answers1

3

I figured it out. It works if I use compound keys and group_level.

So if I change my map to return an array as key for hotel id and I set group_level = 1 then the values will be grouped for me as I initially expected:

function(doc, meta) {

  var getStep = function(stepName, exit, mapper) {
    if (meta.id.indexOf(stepName) !== -1) {
      var hotelId = parseInt(meta.id.replace(stepName + '_', ''));
      if (hotelId > 0) {
        var result = {
          hotelId: hotelId,
          exit: exit
        };
        if (mapper !== undefined) {
          mapper(result);
        }
        return result;
      }
    }
    return null;
  };

  var photos = getStep('PHOTOS', 7);
  if (photos != null) {
    emit([photos.hotelId], photos); // array as key
  }
  var pricing = getStep('PICR', 5); // array as key
  if (pricing != null) {
    emit([pricing.hotelId], pricing);
  }
  var owner = getStep('OWNER', 1); // array as key
  if (owner != null) {
    emit([owner.hotelId], owner);
  }
  var amenity = getStep('AM', 4); // array as key
  if (amenity != null) {
    emit([amenity.hotelId], amenity);
  }
  var description = getStep('HDESC', 3, function(result) {
    result.description = doc.description;
    result.hotelRoomTypeId = doc.hotelRoomTypeId;
    result.starRating = doc.starRating;
  });
  if (description != null) {
    emit([description.hotelId], description); // array as key
  }
  var contact = getStep('DC', 3, function(result) {
    result.email = doc.emailAddress;
    result.contact = doc.mainContactName;
  });
  if (contact != null) {
    emit([contact.hotelId], contact); // array as key
  }
  var location = getStep('LOC', 2, function(result) {
    result.city = doc.cityName;
    result.zip = doc.postalCode;
    result.country = doc.countryName;
    result.street = doc.stateName + ', ' + doc.streetName;
  });
  if (location != null) {
    emit([location.hotelId], location); // array as key
  }
  var property = getStep('PRP', 1, function(result) {
    result.paymentMethodId = doc.paymentMethodId
  });
  if (property != null) {
    emit([property.hotelId], property); // array as key
  } 
}

Then I need to set the group_level=1 and reduce=true. You can do it in the view editor or in the query string.

The last bit is the reduce:

function(key, values, rereduce) { 
  if (rereduce) {
    return values;
  } else {           
    var result = {};
    values.forEach(function(item){
        for(var name in item){
            result[name] = item[name];
        }
    });

    return result;
  }
}

The result will be merged by hotelId as expected :)

Jeno Laszlo
  • 2,023
  • 18
  • 36