I have been tearing my hair out (not much to begin with, though), trying to figure out a way to hit a web service and then parse and deserialize the json in chunks while parsing at the same time into into my objects without storing the entire document (500mb+). Effectively, I am trying to connect libcurl and rapidjson using SAX-style parsing right into my classes.
Maybe I am thinking about this wrong, but my current line of thought is that during the curl_writeback add the received data to the json text stream. What I am struggling with is how to initiate and then make the rapidjson parsing engine wait for the next block of data to come in from the curl writeback. Halting before the uncompleted token is processed until the new stream buffer has been received. I don't want to append buffers, because I want to free up the previous buffer.
Has anyone done this before?
Here is some code to illustrate the concept...but obviously not complete (and probably all wrong!)
Here is the structure for passing information to curl_write so that it knows how to do a callback to the parser.
struct CurlHandler {
stringstream *data;
Reader *reader;
void *parsehandler;
};
Then before initiating the request, I populate the structure as follows:
rapidjson::Reader reader;
CurlHandler handler;
ParseHandler parser;
handler.reader = &reader;
handler.parsehandler = &parser;
SetOption(CURLOPT_WRITEDATA, (void *) &handler);
ParseHandler is a class that is needed by rapidjson to do SAX parsing. The idea was to call the rapidjson parser (somehow) during the curl_write:
static size_t curl_write(void *ptr, size_t size, size_t nmemb, void *CurlHandlerPtr) {
CurlHandler *handler = (CurlHandler *) CurlHandlerPtr;
// handler->reader.Parse(handler->data, handler->parsehandler); ????????
return 1;
}
I sure hope there is an easier answer than this.
Here is the full code for context:
#pragma once
#include <string>
#include <iostream>
#include "curl\curl.h"
#include "rapidjson\rapidjson.h"
#include "rapidjson\reader.h"
using namespace std;
using namespace rapidjson;
struct CurlHandler {
stringstream *data;
Reader *reader;
void *parsehandler;
};
static size_t curl_write(void *ptr, size_t size, size_t nmemb, void *CurlHandlerPtr) {
CurlHandler *handler = (CurlHandler *) CurlHandlerPtr;
// handler->reader.Parse(handler->data, handler->parsehandler); ????????
return 1;
}
class ParseHandler {
bool Null() { cout << "Null()" << endl; return true; }
bool Bool(bool b) { cout << "Bool(" << boolalpha << b << ")" << endl; return true; }
bool Int(int i) { cout << "Int(" << i << ")" << endl; return true; }
bool Uint(unsigned u) { cout << "Uint(" << u << ")" << endl; return true; }
bool Int64(int64_t i) { cout << "Int64(" << i << ")" << endl; return true; }
bool Uint64(uint64_t u) { cout << "Uint64(" << u << ")" << endl; return true; }
bool Double(double d) { cout << "Double(" << d << ")" << endl; return true; }
bool String(const char* str, SizeType length, bool copy) {
cout << "String(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl;
return true;
}
bool StartObject() { cout << "StartObject()" << endl; return true; }
bool Key(const char* str, SizeType length, bool copy) {
cout << "Key(" << str << ", " << length << ", " << boolalpha << copy << ")" << endl;
return true;
}
bool EndObject(SizeType memberCount) { cout << "EndObject(" << memberCount << ")" << endl; return true; }
bool StartArray() { cout << "StartArray()" << endl; return true; }
bool EndArray(SizeType elementCount) { cout << "EndArray(" << elementCount << ")" << endl; return true; }
};
class StreamTest {
private:
string _username;
string _password;
CURL *_curl;
CURLcode _error;
public:
StreamTest() {
curl_global_init(CURL_GLOBAL_ALL);
_curl = curl_easy_init();
};
~StreamTest() {
curl_global_cleanup();
};
int Initialize(string username, string password) {
_username = username;
_password = password;
return 0;
}
int Get(std::string url) {
CURLcode result;
rapidjson::Reader reader;
CurlHandler handler;
ParseHandler parser;
handler.reader = &reader;
handler.parsehandler = &parser;
std::string s = _username; s += ":"; s += _password;
SetOption(CURLOPT_USERPWD, s.c_str());
SetOption(CURLOPT_URL, url.c_str());
SetOption(CURLOPT_WRITEFUNCTION, curl_write);
SetOption(CURLOPT_WRITEDATA, (void *) &handler);
result = curl_easy_perform(_curl);
return 0;
}
template <typename ValueType>
StreamTest& SetOption(CURLoption opt, ValueType val) {
if(_curl && _error == CURLE_OK) {
_error = curl_easy_setopt(_curl, opt, val);
}
return *this;
}
};
int _tmain(int argc, _TCHAR* argv[]) {
StreamTest http;
http.Initialize("guest", "passwd");
http.Get("http://localhost/GetData");
cin.get();
}
EDIT:
Ideally the http.Get function in _tmain would pass in a reference to an object that is to be deserialized into. The object would receive all the SAX events from rapidjson in a callback of sorts.