I have set up a Jupyter Notebook that calls my description column within a table from a PostgreSQL database and apply a Machine learning model from the Ibm watson studio API to this data.
I was able to correctly get a response back with a prediction, but the problem is that my datas are all being displayed and read as a single object instead of an individual object for each row.
My goal is to apply the model to each of these description rows but as you can see below the prediction is applied to the column itself, not to the rows:
{
"collection": [
{
"top_class": "hot",
"text": "{\"description\":{\"0\":\"Lorem ipsum sjvh hcx bftiyf, hufcil, igfgvjuoigv gvj ifcil ,ghn fgbcggtc yfctgg h vgchbvju.\",\"1\":\"Lorem ajjgvc wiufcfboitf iujcvbnb hjnkjc ivjhn oikgjvn uhnhgv 09iuvhb oiuvh boiuhb mkjhv mkiuhygv m,khbgv mkjhgv mkjhgv.\",\"2\":\"Lorem aiv ibveikb jvk igvcib ok blnb v hb b hb bnjb bhb bhn bn vf vbgfc vbgv nbhgv bb nb nbh nj mjhbv mkjhbv nmjhgbv nmkn\",\"3\":\"Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx\",\"4\":\"Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx\",\"5\":\"Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx\",\"6\":\"Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx\",\"7\":\"Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx\",\"8\":\"Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx\",\"9\":\"Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx\",\"10\":\"lorem sivbnogc hbiuygv bnjiuygv bmkjygv nmjhgv.\"}}",
"classes": [
{
"confidence": 0.40859634691282776,
"class_name": "hot"
},
{
"confidence": 0.2325080584859929,
"class_name": "cold"
}
]
}
],
"classifier_id": "7818d2s519-nlc-1311",
"url": "https://gateway.watsonplatform.net/natural-language-classifier/api/v1/classifiers/7818d2s519-nlc-1311"
}
And for clarity, the expected output should look like this:
{
"classifier_id": "7818d2s519-nlc-1311",
"url": "https://gateway.watsonplatform.net/natural-language-classifier/api/v1/classifiers/7818d2s519-nlc-1311",
"collection": [
{
"text": "Lorem aiv ibveikb jvk igvcib ok blnb v hb b hb bnjb bhb bhn bn vf vbgfc vbgv nbhgv bb nb nbh nj mjhbv mkjhbv nmjhgbv nmkn.",
"top_class": "cold",
"classes": [
{
"confidence": 0.89084859929,
"class_name": "cold"
},
{
"confidence": 0.23250805848,
"class_name": "hot"
}
]
},
{
"text": "Lorem ajjgvc wiufcfboitf iujcvbnb hjnkjc ivjhn oikgjvn uhnhgv 09iuvhb oiuvh boiuhb mkjhv mkiuhygv m,khbgv mkjhgv mkjhgv.",
"top_class": "hot",
"classes": [
{
"confidence": 0.1084859929,
"class_name": "cold"
},
{
"confidence": 0.99250805848,
"class_name": "hot"
}
]
},
{
"text": "Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb vcibs j dvx.",
"top_class": "cold",
"classes": [
{
"confidence": 0.7084859929,
"class_name": "cold"
},
{
"confidence": 0.19250805848,
"class_name": "hot"
}
]
}
etc.....
This is my python code in the Notebook:
from watson_developer_cloud import NaturalLanguageClassifierV1
import pandas as pd
import psycopg2
import json
# connect to the database
conn_string = 'host={} port={} dbname={} user={} password={}'.format('119.203.10.242', 5432, 'mydb', 'locq', 'Mypass***')
conn_cbedce9523454e8e9fd3fb55d4c1a52e = psycopg2.connect(conn_string)
# select the description column
data_df_1 = pd.read_sql('SELECT description from public."search_product"', con=conn_cbedce9523454e8e9fd3fb55d4c1a52e)
# connect to the Watson Studio API
natural_language_classifier = NaturalLanguageClassifierV1(
iam_apikey='F76ugy8hv1s3sr87buhb7564vb7************'
)
# apply the model to the datas
classes = natural_language_classifier.classify_collection('7818d2s519-nlc-1311', [{'text':data_df_1.to_json()}]).get_result()
# print the result in json
print(json.dumps(classes, indent=2))
And, an example of what the data structure look like:
Id description
0 Lorem ipsum sjvh hcx bftiyf, hufcil, igfgvju...
1 Lorem ajjgvc wiufcfboitf iujcvbnb hjnkjc ivjh...
2 Lorem aiv ibveikb jvk igvcib ok blnb v hb b h...
3 Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb...
4 Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb...
5 Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb...
6 Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb...
7 Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb...
8 Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb...
9 Lorem jsvc smc cbd ciecdbbc d vd bcvdvbj obcvb...
10 Lorem sivbnogc hbiuygv bnjiuygv bmkjygv nmjhgv...
Is there any ways I can achieve that with some python code? Or is there a way to call the rows within the column instead of the column itself?
Edit
Having applied @Peter solution, the data is correctly formatted but I get this error now:
Full output for clarity:
---------------------------------------------------------------------------
JSONDecodeError Traceback (most recent call last)
<ipython-input-114-9d8e7cf98a41> in <module>()
1 import json
2
----> 3 classes = natural_language_classifier.classify_collection('7818d2s519-nlc-1311', reshaped).get_result()
4
5 print(json.dumps(classes, indent=2))
/opt/conda/envs/DSX-Python35/lib/python3.5/site-packages/watson_developer_cloud/natural_language_classifier_v1.py in classify_collection(self, classifier_id, collection, **kwargs)
152 if collection is None:
153 raise ValueError('collection must be provided')
--> 154 collection = [self._convert_model(x, ClassifyInput) for x in collection]
155
156 headers = {}
/opt/conda/envs/DSX-Python35/lib/python3.5/site-packages/watson_developer_cloud/natural_language_classifier_v1.py in <listcomp>(.0)
152 if collection is None:
153 raise ValueError('collection must be provided')
--> 154 collection = [self._convert_model(x, ClassifyInput) for x in collection]
155
156 headers = {}
/opt/conda/envs/DSX-Python35/lib/python3.5/site-packages/watson_developer_cloud/watson_service.py in _convert_model(val, classname)
461 if classname is not None and not hasattr(val, "_from_dict"):
462 if isinstance(val, str):
--> 463 val = json_import.loads(val)
464 val = classname._from_dict(dict(val))
465 if hasattr(val, "_to_dict"):
/opt/conda/envs/DSX-Python35/lib/python3.5/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
317 parse_int is None and parse_float is None and
318 parse_constant is None and object_pairs_hook is None and not kw):
--> 319 return _default_decoder.decode(s)
320 if cls is None:
321 cls = JSONDecoder
/opt/conda/envs/DSX-Python35/lib/python3.5/json/decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
/opt/conda/envs/DSX-Python35/lib/python3.5/json/decoder.py in raw_decode(self, s, idx)
353 """
354 try:
--> 355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
357 raise JSONDecodeError("Expecting value", s, err.value) from None
JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)