0

I am making queries to DBpedia to obtain and filter data, for this, I made the following script:

from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper, JSON, N3
from pprint import pprint
import pandas as pd

import pyvis
from pyvis.network import Network
import networkx as nx
import json

sparql = SPARQLWrapper('https://dbpedia.org/sparql')
def detallePelicula(titulo):
    sparql.setQuery('''
        SELECT ?name ?director ?country ?starring ?abstract
        WHERE { dbr:'''+titulo+''' dbp:name ?name .
                dbr:'''+titulo+''' dbo:director ?director .
                dbr:'''+titulo+''' dbp:country ?country .
                dbr:'''+titulo+''' dbp:starring ?starring .
                dbr:'''+titulo+''' dbo:abstract ?abstract .

                FILTER (lang(?abstract) = 'en')
        }''')
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert()
    
    diccionarios = []
    for res in qres["results"]["bindings"]:
        dic ={
            "name":res["name"]["value"],
            "director":res["director"]["value"].split("/")[-1],
            "country":res["country"]["value"],
            "abstract":res["abstract"]["value"],
            "starring":res["starring"]["value"].split("/")[-1]
        }
        diccionarios.append(dic)
    print(len(diccionarios))
    
    dics_string = json.dumps(diccionarios)  
    a_json = json.loads(dics_string)
    dataframe = pd.DataFrame.from_dict(a_json)
    dataframe.drop_duplicates()
    
return dataframe

When passing information to the parameter in the following way detallePelicula('Toy_Story_3'), no error occurs, but when doing it in the following way detallePelicula('Grease_(film)') the following error is generated:

---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
File ~/jupyter_notebook/jupynotebook_env/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py:926, in SPARQLWrapper._query(self)
    925 else:
--> 926     response = urlopener(request)
    927 return response, self.returnFormat

File /usr/lib64/python3.10/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    215     opener = _opener
--> 216 return opener.open(url, data, timeout)

File /usr/lib64/python3.10/urllib/request.py:525, in OpenerDirector.open(self, fullurl, data, timeout)
    524     meth = getattr(processor, meth_name)
--> 525     response = meth(req, response)
    527 return response

File /usr/lib64/python3.10/urllib/request.py:634, in HTTPErrorProcessor.http_response(self, request, response)
    633 if not (200 <= code < 300):
--> 634     response = self.parent.error(
    635         'http', request, response, code, msg, hdrs)
    637 return response

File /usr/lib64/python3.10/urllib/request.py:563, in OpenerDirector.error(self, proto, *args)
    562 args = (dict, 'default', 'http_error_default') + orig_args
--> 563 return self._call_chain(*args)

File /usr/lib64/python3.10/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
    495 func = getattr(handler, meth_name)
--> 496 result = func(*args)
    497 if result is not None:

File /usr/lib64/python3.10/urllib/request.py:643, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
    642 def http_error_default(self, req, fp, code, msg, hdrs):
--> 643     raise HTTPError(req.full_url, code, msg, hdrs, fp)

HTTPError: HTTP Error 400: Bad Request

During handling of the above exception, another exception occurred:

QueryBadFormed                            Traceback (most recent call last)
Cell In [11], line 1
----> 1 detallePelicula('Grease_(film)')

Cell In [7], line 14, in detallePelicula(titulo)
      3 sparql.setQuery('''
      4     SELECT ?name ?director ?country ?starring ?abstract
      5     WHERE { dbr:'''+titulo+''' dbp:name ?name .
   (...)
     11             FILTER (lang(?abstract) = 'en')
     12     }''')
     13 sparql.setReturnFormat(JSON)
---> 14 qres = sparql.query().convert()
     16 diccionarios = []
     17 for res in qres["results"]["bindings"]:

File ~/jupyter_notebook/jupynotebook_env/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py:960, in SPARQLWrapper.query(self)
    942 def query(self) -> "QueryResult":
    943     """
    944     Execute the query.
    945     Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the
   (...)
    958     :rtype: :class:`QueryResult` instance
    959     """
--> 960     return QueryResult(self._query())

File ~/jupyter_notebook/jupynotebook_env/lib/python3.10/site-packages/SPARQLWrapper/Wrapper.py:930, in SPARQLWrapper._query(self)
    928 except urllib.error.HTTPError as e:
    929     if e.code == 400:
--> 930         raise QueryBadFormed(e.read())
    931     elif e.code == 404:
    932         raise EndPointNotFound(e.read())

QueryBadFormed: QueryBadFormed: A bad request has been sent to the endpoint: probably the SPARQL query is badly formed. 

Response:
b"Virtuoso 37000 Error SP030: SPARQL compiler, line 4: syntax error at 'film' before ')'\n\nSPARQL query:\n#output-format:application/sparql-results+json\n\n        SELECT ?name ?director ?country ?starring ?abstract\n        WHERE { dbr:Grease_(film) dbp:name ?name .\n                dbr:Grease_(film) dbo:director ?director .\n                dbr:Grease_(film) dbp:country ?country .\n                dbr:Grease_(film) dbp:starring ?starring .\n                dbr:Grease_(film) dbo:abstract ?abstract .\n\n                FILTER (lang(?abstract) = 'en')\n        }"

I understand that the problem may be that the parentheses are not part of the URL composition, but I'm really not sure about it. I would appreciate in advance if someone can advise me to understand the error and obtain a possible solution, thank you very much for your answers.

XDarkestX
  • 1
  • 3

0 Answers0