0

I'm running code in a python script and code in a flask api. The problem is to add rdf triples to an rdflib Graph. If i run it in command line script parsed by interpreter it works. If i run the same code in a flask api it ignores some logic. The code is:

FLASK

class Helper(object):
    def get_fred_offset(self, s):
        return s.split('_')[1]

    def get_denoted_offset(self, g):
        query = """ SELECT ?denoted ?offset WHERE { ?offset a ns2:PointerRange ; ns6:denotes ?denoted . } """
        qres = g.query(query)
        res = []
        for row in qres:
            res.append((row[0], self.get_fred_offset(row[1].strip()))) # denoted, startOffset
        return res

    def get_span_boundaries(self, g):
        query = " SELECT  ?textspan ?start ?end WHERE { ?textspan rst:startOffset ?start ; rst:endOffset ?end . }"
        qres = g.query(query)
        res = []
        for row in qres:
            res.append((row[0], row[1].strip(), row[2].strip()))
        return res

    def bridge(self, g):
        denoteds = self.get_denoted_offset(g)
        spans = self.get_span_boundaries(g)
        for d in denoteds:
            for s in spans:
                if s[1] <= d[1] < s[2]:
                    g.add( (d[0] , URIRef('belongsTo'), s[0]))
        return g

endpoint

@app.route("/bridge", methods=["POST"])
@cross_origin()
def merge():

    fh = filehandler.open_file(TMP_FOLDER, 'bridge_graph', m='r')
    data = ""
    g = Graph()
    for line in fh:
        data += line
    g.parse(data = data , format = 'n3' )
    wtf = Helper()
    wtf.bridge(g)

    return (g.serialize(format= 'n3'))

Python.py script

g = Graph()
fh = open("bridge_graph", 'r')
data = ""
for line in fh:
    data += line
g.parse(data = data , format = 'n3' )


def get_f_offset(s):
    return s.split('_')[1]

def get_denoted_offset(g):
    query = """ SELECT ?denoted ?offset WHERE { ?offset a ns2:PointerRange ; ns6:denotes ?denoted . } """
    qres = g.query(query)
    res = []
    for row in qres:
        res.append((row[0], get_f_offset(row[1].strip()))) # denoted, startOffset
    return res

def get_span_boundaries(g):
    query = " SELECT  ?textspan ?start ?end WHERE { ?textspan rst:startOffset ?start ; rst:endOffset ?end . FILTER (?start <= 5 && 5 < ?end) }"
    qres = g.query(query)
    res = []
    for row in qres:
        res.append((row[0], row[1].strip(), row[2].strip()))
    return res

def bridge(g):
    denoteds = get_denoted_offset(g)
    spans = get_span_boundaries(g)
    for d in denoteds:
        for s in spans:
            if s[1] <= d[1] < s[2]:
                print(s[1], d[1], s[2]) 
                print(d[0], 'belongsTo', s[0])
                g.add( (d[0] , URIRef('belongsTo'), s[0])) <=============== THISLINE

bridge(g)

print g.serialize(format='n3')

In one case THISLINE add triples in the other case not.

Nja
  • 439
  • 6
  • 17
  • You are using python2, you should move to python3. In your get_span_boundaries you don't use the same query. – Victor Mar 09 '20 at 19:53

2 Answers2

0

Following Victor's comment, rdflib will likely be dropping Python 2 support altogether by July.

Also, I suggest using exactly the same code for the script as per the Helper() class, so add a if __name__ == '__main__: to your Helper() class file so you can use it directly and test that the issues you see are to do with the Flask implementation, not your different rdflib code.

Nicholas Car
  • 1,164
  • 4
  • 7
0

The issue has to do with the fact that you have to specify the datatype of the Literal of your comparison. Instead of compare integer or generally numbers RDFLIB SPARQL Parser it's comparing string, producing unexpected behaviour. I believe now it's not an rdflib problem , but a SPARQL one, which require a strict type declaring. Adding: g.query(query, initBindings{?start : Literal^^xsd:Integer} solved the issue!

Dharman
  • 30,962
  • 25
  • 85
  • 135