I am making queries to DBpedia to obtain and filter data, for this, I made the following script:
from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper, JSON, N3
from pprint import pprint
import pandas as pd
import pyvis
from pyvis.network import Network
import networkx as nx
import json
sparql = SPARQLWrapper('https://dbpedia.org/sparql')
def detallePelicula(titulo):
sparql.setQuery('''
SELECT ?name ?director ?country ?starring ?abstract
WHERE { dbr:'''+titulo+''' dbp:name ?name .
dbr:'''+titulo+''' dbo:director ?director .
dbr:'''+titulo+''' dbp:country ?country .
dbr:'''+titulo+''' dbp:starring ?starring .
dbr:'''+titulo+''' dbo:abstract ?abstract .
FILTER (lang(?abstract) = 'en')
}''')
sparql.setReturnFormat(JSON)
qres = sparql.query().convert()
diccionarios = []
for res in qres["results"]["bindings"]:
dic ={
"name":res["name"]["value"],
"director":res["director"]["value"].split("/")[-1],
"country":res["country"]["value"],
"abstract":res["abstract"]["value"],
"starring":res["starring"]["value"].split("/")[-1]
}
diccionarios.append(dic)
print(len(diccionarios))
dics_string = json.dumps(diccionarios)
a_json = json.loads(dics_string)
dataframe = pd.DataFrame.from_dict(a_json)
dataframe.drop_duplicates()
return dataframe
When passing information to the parameter in the following way detallePelicula('Toy_Story_3')
, no error occurs, but when doing it in the following way detallePelicula('Grease_(film)')
the following error is generated:
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
File ~/jupyter_notebook/jupynotebook_env/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py:926, in SPARQLWrapper._query(self)
925 else:
--> 926 response = urlopener(request)
927 return response, self.returnFormat
File /usr/lib64/python3.10/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
215 opener = _opener
--> 216 return opener.open(url, data, timeout)
File /usr/lib64/python3.10/urllib/request.py:525, in OpenerDirector.open(self, fullurl, data, timeout)
524 meth = getattr(processor, meth_name)
--> 525 response = meth(req, response)
527 return response
File /usr/lib64/python3.10/urllib/request.py:634, in HTTPErrorProcessor.http_response(self, request, response)
633 if not (200 <= code < 300):
--> 634 response = self.parent.error(
635 'http', request, response, code, msg, hdrs)
637 return response
File /usr/lib64/python3.10/urllib/request.py:563, in OpenerDirector.error(self, proto, *args)
562 args = (dict, 'default', 'http_error_default') + orig_args
--> 563 return self._call_chain(*args)
File /usr/lib64/python3.10/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
497 if result is not None:
File /usr/lib64/python3.10/urllib/request.py:643, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
642 def http_error_default(self, req, fp, code, msg, hdrs):
--> 643 raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: HTTP Error 400: Bad Request
During handling of the above exception, another exception occurred:
QueryBadFormed Traceback (most recent call last)
Cell In [11], line 1
----> 1 detallePelicula('Grease_(film)')
Cell In [7], line 14, in detallePelicula(titulo)
3 sparql.setQuery('''
4 SELECT ?name ?director ?country ?starring ?abstract
5 WHERE { dbr:'''+titulo+''' dbp:name ?name .
(...)
11 FILTER (lang(?abstract) = 'en')
12 }''')
13 sparql.setReturnFormat(JSON)
---> 14 qres = sparql.query().convert()
16 diccionarios = []
17 for res in qres["results"]["bindings"]:
File ~/jupyter_notebook/jupynotebook_env/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py:960, in SPARQLWrapper.query(self)
942 def query(self) -> "QueryResult":
943 """
944 Execute the query.
945 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the
(...)
958 :rtype: :class:`QueryResult` instance
959 """
--> 960 return QueryResult(self._query())
File ~/jupyter_notebook/jupynotebook_env/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py:930, in SPARQLWrapper._query(self)
928 except urllib.error.HTTPError as e:
929 if e.code == 400:
--> 930 raise QueryBadFormed(e.read())
931 elif e.code == 404:
932 raise EndPointNotFound(e.read())
QueryBadFormed: QueryBadFormed: A bad request has been sent to the endpoint: probably the SPARQL query is badly formed.
Response:
b"Virtuoso 37000 Error SP030: SPARQL compiler, line 4: syntax error at 'film' before ')'\n\nSPARQL query:\n#output-format:application/sparql-results+json\n\n SELECT ?name ?director ?country ?starring ?abstract\n WHERE { dbr:Grease_(film) dbp:name ?name .\n dbr:Grease_(film) dbo:director ?director .\n dbr:Grease_(film) dbp:country ?country .\n dbr:Grease_(film) dbp:starring ?starring .\n dbr:Grease_(film) dbo:abstract ?abstract .\n\n FILTER (lang(?abstract) = 'en')\n }"
I understand that the problem may be that the parentheses are not part of the URL composition, but I'm really not sure about it. I would appreciate in advance if someone can advise me to understand the error and obtain a possible solution, thank you very much for your answers.