2

I have a master xml file with a list like this:

<listPerson>
    <person xml:id="pe0001">
        <persName>
            <surname>Anderson</surname>
            [...]
       </persName>
    </person>
    <person xml:id="pe0002">
        <persName>
            <surname>Smith</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0004">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
</listPerson>

I have an html form which call an app in the app.xql and insert a new <person> record inside the master xml file. If there's a gap in the ID sequence (e.g ID pe0003 above) I'd like eXist-db to return that ID and 'fill the gap', otherwise just output the latest available ID (i.e. pe0005). I'm already accomplishing this last thing with:

declare function app:addPers($node as node(), $model as map(*)) {

    let $peid := doc('masterfile.xml')//tei:listPerson/tei:person[@xml:id][last()]/@xml:id
    let $idnumber := xs:decimal(substring-after($peid, 'pe'))
    let $newidnumber := (sum($idnumber + 1))
    let $newpeid := concat('pe0', $newidnumber)

    return

<html stuff>

}

What I'd like to do now is having an XQuery/Xpath code which detects when there's a gap in the sequence and acts accordingly. This is what I've done so far:

[app.xql]

declare function app:addPers($node as node(), $model as map(*)) {

let $seqpe := doc('masterfile.xml')//tei:listPerson/tei:person[@xml:id]/@xml:id
        let $peid := 
        for $item at $pos in $seqpe
            let $item := xs:decimal(substring-after($seqpe, 'pe'))
            return if ($item[$pos + 1] - $item[$pos] != 1) then 
            doc('masterfile.xml')//tei:listPerson/tei:person[@xml:id][$item]/@xml:id
        else 
        doc('masterfile.xml')//tei:listPerson/tei:person[@xml:id][last()]/@xml:id

        let $newidnumber := (sum($peid + 1))
        let $newpeid := concat('pe0', $newidnumber)
 return 

<html stuff>

}

This returns a err:FORG0001 cannot construct xs:decimal from "" error. What am I doing wrong?

Update

This is another test I made, which returns a err:XPDY0002 Undefined context sequence for 'following-sibling::tei:person error:

let $seqpe := doc('masterfile.xml')//tei:listPerson/tei:person
         let $peid := 
         for $item in $seqpe
             return if ((xs:decimal(substring-after(following-sibling::tei:person/@xml:id, 'pe'))) - (xs:decimal(substring-after($item/@xml:id, 'pe'))) ne 1) then 
             doc('masterfile.xml')//tei:listPerson/tei:person[@xml:id eq $item/@xml:id]/@xml:id
         else 
         doc('masterfile.xml')//tei:listPerson/tei:person[@xml:id][last()]/@xml:id

     let $newidnumber := (sum($peid + 1))
     let $newpeid := concat('pe0', $newidnumber)

SECOND UPDATE

As far as returning the last ID is concerned, both this code:

(let $idnext :=

  for $person in doc('/db/apps/app-ct/data/indices/pedb.xml')//tei:listPerson/tei:person[position() ne last()]
  where local:get-id($person/@xml:id) ne (local:get-id($person/following-sibling::tei:person[1]/@xml:id) - 1)
return 
    if (empty($idnext)) then
    (local:get-id(listPerson/person[last()]/@xml:id) + 1)
    else (local:get-id($person/@xml:id) + 1)
let $newpeid := 
if (fn:string-length($idnext) = 1) then
   concat('pe000', $idnext) else if
   (fn:string-length($idnext) = 2) then 
   concat('pe00', $idnext) else if 
   (fn:string-length($idnext) = 3) then 
   concat('pe0', $idnext) else 
   concat('pe', $idnext)

return

<html stuff>)[1]

and this:

    (let $idnext :=

      for $person in doc('/db/apps/app-ct/data/indices/pedb.xml')//tei:listPerson/tei:person[position() ne last()]
      where local:get-id($person/@xml:id) ne (local:get-id($person/following-sibling::tei:person[1]/@xml:id) - 1)
      return local:get-id($person/@xml:id) + 1
    return 
        if (empty($idnext)) then
        (local:get-id(listPerson/person[last()]/@xml:id) + 1)
        else ($idnext),
    let $newpeid := 
    if (fn:string-length($idnext) = 1) then
       concat('pe000', $idnext) else if 
       (fn:string-length($idnext) = 2) then 
       concat('pe00', $idnext) else if 
       (fn:string-length($idnext) = 3) then 
       concat('pe0', $idnext) else 
       concat('pe', $idnext)

    return

<html stuff>)[1]

return a err:XPDY0002 variable '$idnext' is not set. error.

Third and final update

Below is the code which does precisely what I want, i.e. returning the first available ID, wheter it's within a gap or not.

let $id_gap :=

        (for $person in doc('myfile.xml')//tei:listPerson/tei:person[position() ne last()]
        where local:get-id($person/@xml:id) ne (local:get-id($person/following-sibling::tei:person[1]/@xml:id) - 1)
        return (local:get-id($person/@xml:id) + 1))[1]

        let $idnext :=
        if (empty($id_gap))
        then (local:get-id(doc('myfile.xml')//tei:listPerson/tei:person[last()]/@xml:id) + 1)
        else ($id_gap)

        let $newpeid := 
         if (fn:string-length($idnext) = 1) then
            concat('pe000', $idnext) else if 
            (fn:string-length($idnext) = 2) then 
            concat('pe00', $idnext) else if 
            (fn:string-length($idnext) = 3) then 
            concat('pe0', $idnext) else 
            concat('pe', $idnext)

       return

  <html code>
HBMCS
  • 686
  • 5
  • 25
  • Do the `xml:id` always start with `1`, as in the example? Can there be multiple gaps and do you need to return all of them? Also, can there be gaps of several ids e.g. `.....`? Which values do you want to return in that case? – Martin Honnen Feb 20 '18 at 15:59
  • Hi Martin, it's only one file with all the ids in it, and the xml:id always starts with `pe` followed by 4 digits, as in example. Yes, there might be several gaps: in that case I want it to return the first gaps available. For instance, if there's a gap between `pe0005` and `pe0009` I want it to return `pe0006`. – HBMCS Feb 20 '18 at 16:04

2 Answers2

2

I tried it like this:

declare function local:get-id($xml-id as xs:string) as xs:integer {
    xs:integer(replace($xml-id, '[^0-9]+', ''))
};

for $person in (listPerson/person)[position() ne last()]
where local:get-id($person/@xml:id) ne (local:get-id($person/following-sibling::person[1]/@xml:id) - 1)
return local:get-id($person/@xml:id) + 1

and at http://xqueryfiddle.liberty-development.net/nbUY4kh for the sample input

<listPerson>
    <person xml:id="pe0001">
        <persName>
            <surname>Anderson</surname>
            [...]
       </persName>
    </person>
    <person xml:id="pe0003">
        <persName>
            <surname>Smith</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0004">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0005">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0006">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0008">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0009">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0010">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
    <person xml:id="pe0014">
        <persName>
            <surname>Another</surname>
            [...]
        </persName>
    </person>
</listPerson>

it gives

2
7
11

It might also be doable by a window clause although I am not sure Exist-Db supports that.

As for returning a new id if there are no gaps, I am not sure whether there is a more elegant or compact solution but I guess a simple check

let $new-ids :=
    for $person in (listPerson/person)[position() ne last()]
    where local:get-id($person/@xml:id) ne (local:get-id($person/following-sibling::person[1]/@xml:id) - 1)
    return local:get-id($person/@xml:id) + 1
return
    if (empty($new-ids))
    then local:get-id(listPerson/person[last()]/@xml:id) + 1
    else $new-ids

implements your verbal description: http://xqueryfiddle.liberty-development.net/nbUY4kh/2

Martin Honnen
  • 160,499
  • 6
  • 90
  • 110
  • This is exactly what I was looking for, thanks so much! – HBMCS Feb 20 '18 at 16:56
  • Martin, you're correct that eXist doesn't yet support the window clause, but I expect it will be included in a forthcoming update. Each release supports more functions and syntax from XQuery 3.x. – Joe Wicentowski Feb 20 '18 at 22:19
  • @Martin Honnen, I've tried several ways of making the code work when there are _no_ gaps, with no success. Of course if I take the '[position() ne last()]' out the 'where' clause will fail due to no second parameter passed when we are at the end of the list. Is there a was of inserting a 'fallback', so to speak or, even better, to detect when we've reached the end of the list and return the next available ID? – HBMCS Mar 02 '18 at 14:33
  • @HBMCS, you might want to ask an additional question on that if the above edit does not help. – Martin Honnen Mar 03 '18 at 10:35
  • @Martin Honnen thanks, I'll do. This was part of my original question ("I'd like eXist-db to return that ID and 'fill the gap', otherwise just output the latest available ID (i.e. pe0005)."), though I appreciate that I accepted your answer before checking that your code did everything I wanted. – HBMCS Mar 03 '18 at 12:13
  • @Martin Honnen apologies, for some reasons I wasn't seeing your edited answer, I had to refresh the cache. I've updated my original question with the code I am using and the error I now get. Thanks! – HBMCS Mar 03 '18 at 12:53
  • If you use `for $person in doc('/db/apps/app-ct/data/indices/pedb.xml')//tei:listPerson/tei:person[position() ne last()]` to access your data then of course in the expression `local:get-id(listPerson/person[last()]/@xml:id)` you also need `local:get-id(doc('/db/apps/app-ct/data/indices/pedb.xml')//tei:listPerson/tei:person[last()]/@xml:id) ` – Martin Honnen Mar 03 '18 at 13:11
  • @Martin Honnen It was still telling me the '$idnext' is not set. After many, many trials I finally managed to do what I wanted by introducing another variable passed to the following 'if'. I'm updating my original question with the final working code. – HBMCS Mar 03 '18 at 14:54
1

Another approach:

(for $key in (1 to 9999)!format-number(., '0000')
 where empty($persons[@xml:id=$key])
 return $key)[1]

Gets the first number NNNN in the range 1 to 9999 for which there is no element in $persons with xml:id equal to peNNNN.

Michael Kay
  • 156,231
  • 11
  • 92
  • 164
  • Wow, this looks very elegant! Do you mean something like: `declare function app:addPerson($node as node(), $model as map(*)) { for $person in doc('masterfile.xml')//tei:listPerson/tei:person let $newpeid := (1 to 9999)[empty($place[@xml:id='pl'||format-number(., '0000')])][1]` ? It returns a 'Type mismatch error' (function: format-number($value as xs:numeric?, $picture as xs:string) xs:string. Required type: xs:numeric, got element()). Anyway, Martin's solution silently fails if all the gaps are filled and the query needs to return the last+1. Would your approach fix that? – HBMCS Feb 21 '18 at 11:03
  • Sorry, the "." selects the wrong thing with nested predicates. Rewrote as a FLWOR. – Michael Kay Feb 21 '18 at 12:31
  • Thanks Michael. For some reasons your query takes around 6 seconds to evaluate; the `where current_id ne (next_id -1) ` evaluates almost immediately, so I'll have to stick with that. – HBMCS Feb 21 '18 at 14:44
  • The query is very dependent on the optimizer finding a good evaluation strategy so it's quite likely to vary from one implementation to another. – Michael Kay Feb 21 '18 at 16:50