We would like to upload about 30k entities to the datastore in one go, while also creating documents from strings associated with these entities.
This is to allow for partial search on strings which the datastore is not suited for.
However, we haven't been able to find any resources or documentation on how to bulk upload documents using the search api functionality.
How do we go about this?
We tried using the bulkloader, which keeps giving the following error
google.appengine.ext.db.KindError: No implementation for kind 'Prototype'
This was because we were trying to upload ndb models, but the error suggest that it was defaulting to db
We tried to hack our way around it and define the class as a db Model and upload it. This works, and the data is uploaded to the datastore, however the post_put_hook doesn't work
Here's the code:
#models.py
import datetime
from google.appengine.ext import db
from google.appengine.tools import bulkloader
class PrototypeE(db.Model):
#creating db Model of data
p_id=db.StringProperty(indexed=True,required=True)
p_name=db.StringProperty(required=True)
p_val=db.IntegerProperty(required=True)
p_lnk=db.StringProperty(required=True)
p_src=db.StringProperty(choices=site_list)
p_create_time=db.DateTimeProperty(auto_now_add=True)
p_update_time=db.DateTimeProperty(auto_now=True)
p_gen=db.StringProperty(choices=gen_list)
p_img=db.StringProperty()
p_cat=db.StringProperty()
p_brd=db.StringProperty()
p_keys=db.StringProperty()
def _post_put_hook(self,Future):
doc_key=Future.get_result()
doc_id=doc_key.id()
doc= search.Document(doc_id=unicode(doc_id),
fields=[
search.TextField(name="keywords",value=self.p_keys),
search.NumberField(name="value",value=self.p_price)
]) #document
logging.info(doc)
try:
index=search.Index(name="Store_Doc")
index.put(doc) #putting data into document
except search.Error:
logging.exception('Doc put failed')
And the loader:
#proto_loader.py
import datetime
from google.appengine.ext import ndb
from google.appengine.tools import bulkloader
import models
class ProtoLoader(bulkloader.Loader):
def __init__(self):
bulkloader.Loader.__init__(self, 'PrototypeE',
[('p_id', str),
('p_name', str),
('p_val', int),
('p_lnk', str),
('p_src',str),
('p_gen',str),
('p_img',str),
('p_cat',str),
('p_brd',str),
('p_keys',str)
])
loaders = [ProtoLoader]
This succeeds in uploading the data to the datastore, but the hook is not called and no documents are created.
Do we need to edit the bulkloader file to get around this issue?
UPDATE: As mentioned earlier, the reason we attempted mixing ndb and db is that we get the following error when defining the class as an ndb.Model all through
Traceback (most recent call last):
File "appcfg.py", line 126, in <module>
run_file(__file__, globals())
File "appcfg.py", line 122, in run_file
execfile(_PATHS.script_file(script_name), globals_)
File "/home/stw/Google/google_appengine/google/appengine/tools/appcfg.py", line 5220, in <module>
main(sys.argv)
File "/home/stw/Google/google_appengine/google/appengine/tools/appcfg.py", line 5211, in main
result = AppCfgApp(argv).Run()
File "/home/stw/Google/google_appengine/google/appengine/tools/appcfg.py", line 2886, in Run
self.action(self)
File "/home/stw/Google/google_appengine/google/appengine/tools/appcfg.py", line 4890, in __call__
return method()
File "/home/stw/Google/google_appengine/google/appengine/tools/appcfg.py", line 4693, in PerformUpload
run_fn(args)
File "/home/stw/Google/google_appengine/google/appengine/tools/appcfg.py", line 4574, in RunBulkloader
sys.exit(bulkloader.Run(arg_dict))
File "/home/stw/Google/google_appengine/google/appengine/tools/bulkloader.py", line 4408, in Run
return _PerformBulkload(arg_dict)
File "/home/stw/Google/google_appengine/google/appengine/tools/bulkloader.py", line 4219, in _PerformBulkload
LoadConfig(config_file)
File "/home/stw/Google/google_appengine/google/appengine/tools/bulkloader.py", line 3886, in LoadConfig
Loader.RegisterLoader(cls())
File "proto_loader.py", line 40, in __init__
('p_keys',str)
File "/home/stw/Google/google_appengine/google/appengine/tools/bulkloader.py", line 2687, in __init__
GetImplementationClass(kind)
File "/home/stw/Google/google_appengine/google/appengine/tools/bulkloader.py", line 957, in GetImplementationClass
implementation_class = db.class_for_kind(kind_or_class_key)
File "/home/stw/Google/google_appengine/google/appengine/ext/db/__init__.py", line 296, in class_for_kind
raise KindError('No implementation for kind \'%s\'' % kind)
google.appengine.ext.db.KindError: No implementation for kind 'PrototypeE'
As the error indicates, bulkloader assumes a db Class and checks with db.class_for_kind which results in an error when using ndb