1

After the issue in the following Stackoverflow is fixed, I have another problem when I try to make a join as below. The last query takes about 250ms while the first two take only 16ms. Is there a better way to perform join between two items?

Note: You can find the test data from this link.

  let $PlGeTys :=
  /root/PlGeTys/PlGeTy[
    isOfPlCt/@href=/root/PlCts/PlCt[
      environment='AIR'
    ]/@id
  ]

let $PlSpTys :=
  /root/PlSpTys/PlSpTy[
    isOfPlGeTy/@href=$PlGeTys/@id
  ]

for  $PlGeTy in  $PlGeTys,
 $PlSpTy in  $PlSpTys
 where $PlSpTy/isOfPlGeTy/@href=$PlGeTy/@id
 return <done>{$PlGeTy, $PlSpTy }</done>

Here is the query info:

Compiling:
- applying attribute index for $PlGeTys_0/@*:id
- rewriting where clause(s)
Query:
let $PlGeTys := /root/PlGeTys/PlGeTy[ isOfPlCt/@href=/root/PlCts/PlCt[ environment='AIR' ]/@id ] let $PlSpTys := /root/PlSpTys/PlSpTy[ isOfPlGeTy/@href=$PlGeTys/@id ] for $PlGeTy in $PlGeTys, $PlSpTy in $PlSpTys where $PlSpTy/isOfPlGeTy/@href=$PlGeTy/@id return "done"
Optimized Query:
let $PlGeTys_0 := db:open-pre("Output6",0)/*:root/*:PlGeTys/*:PlGeTy[(*:isOfPlCt/@*:href = root()/*:root/*:PlCts/*:PlCt[(*:environment = "AIR")]/@*:id)] let $PlSpTys_1 := db:attribute("Output6", $PlGeTys_0/@*:id)/self::*:href/parent::*:isOfPlGeTy/parent::*:PlSpTy for $PlGeTy_2 in $PlGeTys_0 for $PlSpTy_3 in ($PlSpTys_1)[(isOfPlGeTy/@href = $PlGeTy_2/@*:id)] return "done"
Result:
- Hit(s): 3642 Items
- Updated: 0 Items
- Printed: 18209 Bytes
- Read Locking: local [Output6]
- Write Locking: none
Timing:
- Parsing: 0.77 ms
- Compiling: 0.47 ms
- Evaluating: 215.71 ms
- Printing: 0.17 ms
- Total Time: 217.11 ms
Query plan:
<QueryPlan compiled="true">
  <GFLWOR>
    <Let>
      <Var name="$PlGeTys" id="0"/>
      <IterPath>
        <DBNode name="Output6" pre="0"/>
        <IterStep axis="child" test="*:root"/>
        <IterStep axis="child" test="*:PlGeTys"/>
        <IterStep axis="child" test="*:PlGeTy">
          <CmpG op="=">
            <CachedPath>
              <IterStep axis="child" test="*:isOfPlCt"/>
              <IterStep axis="attribute" test="*:href"/>
            </CachedPath>
            <IterPath>
              <Root/>
              <IterStep axis="child" test="*:root"/>
              <IterStep axis="child" test="*:PlCts"/>
              <IterStep axis="child" test="*:PlCt">
                <CmpG op="=">
                  <CachedPath>
                    <IterStep axis="child" test="*:environment"/>
                  </CachedPath>
                  <Str value="AIR" type="xs:string"/>
                </CmpG>
              </IterStep>
              <IterStep axis="attribute" test="*:id"/>
            </IterPath>
          </CmpG>
        </IterStep>
      </IterPath>
    </Let>
    <Let>
      <Var name="$PlSpTys" id="1"/>
      <CachedPath>
        <ValueAccess data="Output6" type="ATTRIBUTE">
          <CachedPath>
            <VarRef>
              <Var name="$PlGeTys" id="0"/>
            </VarRef>
            <IterStep axis="attribute" test="*:id"/>
          </CachedPath>
        </ValueAccess>
        <IterStep axis="self" test="*:href"/>
        <IterStep axis="parent" test="*:isOfPlGeTy"/>
        <IterStep axis="parent" test="*:PlSpTy"/>
      </CachedPath>
    </Let>
    <For>
      <Var name="$PlGeTy" id="2"/>
      <VarRef>
        <Var name="$PlGeTys" id="0"/>
      </VarRef>
    </For>
    <For>
      <Var name="$PlSpTy" id="3"/>
      <IterFilter>
        <VarRef>
          <Var name="$PlSpTys" id="1"/>
        </VarRef>
        <CmpG op="=">
          <CachedPath>
            <IterStep axis="child" test="isOfPlGeTy"/>
            <IterStep axis="attribute" test="href"/>
          </CachedPath>
          <IterPath>
            <VarRef>
              <Var name="$PlGeTy" id="2"/>
            </VarRef>
            <IterStep axis="attribute" test="*:id"/>
          </IterPath>
        </CmpG>
      </IterFilter>
    </For>
    <Str value="done" type="xs:string"/>
  </GFLWOR>
</QueryPlan>

Update two:

The issue above is fixed. But when I want to use it in a local function as below, it takes about 700 ms. Am I doing something wrong?

declare function local:result($root as element(root)) as element()* {
  let $PlCts := $root/PlCts/PlCt[environment = 'AIR']/@id
  for $PlGeTy in $root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts],
      $PlSpTy in $root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id]
  return <done>{ $PlGeTy, $PlSpTy }</done>
};

let $result := local:result(/root)
return $result

Query info;

Compiling:
- removing redundant element()* cast.
- inlining local:result#1
- inlining $root_5 as element(root)
- simplifying flwor expression
Query:
declare function local:result($root as element(root)) as element()* { let $PlCts := $root/PlCts/PlCt[environment = 'AIR']/@id for $PlGeTy in $root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts], $PlSpTy in $root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id] return <done>{ $PlGeTy, $PlSpTy }</done> }; let $result := local:result(/root) return $result
Optimized Query:
let $PlCts_6 := ((: element(root), true :) db:open-pre("Output6",0)/*:root)/PlCts/PlCt[(environment = "AIR")]/@id let $result_4 := for $PlGeTy_7 in ((: element(root), true :) db:open-pre("Output6",0)/*:root)/PlGeTys/PlGeTy[(isOfPlCt/@href = $PlCts_6)] for $PlSpTy_8 in ((: element(root), true :) db:open-pre("Output6",0)/*:root)/PlSpTys/PlSpTy[(isOfPlGeTy/@href = $PlGeTy_7/@id)] return element done { (($PlGeTy_7, $PlSpTy_8)) } return $result_4
Result:
- Hit(s): 3642 Items
- Updated: 0 Items
- Printed: 553 KB
- Read Locking: local [Output6]
- Write Locking: none
Timing:
- Parsing: 1.41 ms
- Compiling: 2.9 ms
- Evaluating: 581.5 ms
- Printing: 8.34 ms
- Total Time: 594.15 ms
Query plan:
<QueryPlan compiled="true">
  <GFLWOR>
    <Let>
      <Var name="$PlCts" id="6"/>
      <IterPath>
        <TypeCheck type="element(root)" function="true">
          <IterPath>
            <DBNode name="Output6" pre="0"/>
            <IterStep axis="child" test="*:root"/>
          </IterPath>
        </TypeCheck>
        <IterStep axis="child" test="PlCts"/>
        <IterStep axis="child" test="PlCt">
          <CmpG op="=">
            <CachedPath>
              <IterStep axis="child" test="environment"/>
            </CachedPath>
            <Str value="AIR" type="xs:string"/>
          </CmpG>
        </IterStep>
        <IterStep axis="attribute" test="id"/>
      </IterPath>
    </Let>
    <Let>
      <Var name="$result" id="4"/>
      <GFLWOR>
        <For>
          <Var name="$PlGeTy" id="7"/>
          <IterPath>
            <TypeCheck type="element(root)" function="true">
              <IterPath>
                <DBNode name="Output6" pre="0"/>
                <IterStep axis="child" test="*:root"/>
              </IterPath>
            </TypeCheck>
            <IterStep axis="child" test="PlGeTys"/>
            <IterStep axis="child" test="PlGeTy">
              <CmpG op="=">
                <CachedPath>
                  <IterStep axis="child" test="isOfPlCt"/>
                  <IterStep axis="attribute" test="href"/>
                </CachedPath>
                <VarRef>
                  <Var name="$PlCts" id="6"/>
                </VarRef>
              </CmpG>
            </IterStep>
          </IterPath>
        </For>
        <For>
          <Var name="$PlSpTy" id="8"/>
          <IterPath>
            <TypeCheck type="element(root)" function="true">
              <IterPath>
                <DBNode name="Output6" pre="0"/>
                <IterStep axis="child" test="*:root"/>
              </IterPath>
            </TypeCheck>
            <IterStep axis="child" test="PlSpTys"/>
            <IterStep axis="child" test="PlSpTy">
              <CmpG op="=">
                <CachedPath>
                  <IterStep axis="child" test="isOfPlGeTy"/>
                  <IterStep axis="attribute" test="href"/>
                </CachedPath>
                <IterPath>
                  <VarRef>
                    <Var name="$PlGeTy" id="7"/>
                  </VarRef>
                  <IterStep axis="attribute" test="id"/>
                </IterPath>
              </CmpG>
            </IterStep>
          </IterPath>
        </For>
        <CElem>
          <QNm value="done" type="xs:QName"/>
          <List>
            <VarRef>
              <Var name="$PlGeTy" id="7"/>
            </VarRef>
            <VarRef>
              <Var name="$PlSpTy" id="8"/>
            </VarRef>
          </List>
        </CElem>
      </GFLWOR>
    </Let>
    <VarRef>
      <Var name="$result" id="4"/>
    </VarRef>
  </GFLWOR>
</QueryPlan>

Update three: Now, I have another issue when I extend the query with the additional "Cont" items, The query execution lasts about 600 ms. But without "Cont" items, it takes only 35 ms. Do you any suggestion on optimizing this query?

let $PlCts := /root/PlCts/PlCt[environment = 'AIR']/@id

for $PlGeTy in /root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts],
    $PlSpTy in /root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id],
    $Cont in /root/Conts/Cont[@id=$PlSpTy/isOfCont/@href]
return <done>{ $PlGeTy, $PlSpTy, $Cont }</done>

This is the link of the latest XML data for testing.

The query info:

Compiling:
- applying text index for "AIR"
- applying attribute index for $PlCts_0
- applying attribute index for $PlGeTy_1/@id
- applying attribute index for $PlSpTy_2/isOfCont/@href
- inlining $PlCts_0
Query:
let $PlCts := /root/PlCts/PlCt[environment = 'AIR']/@id for $PlGeTy in /root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts], $PlSpTy in /root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id], $Cont in /root/Conts/Cont[@id=$PlSpTy/isOfCont/@href] return <done>{ $PlGeTy, $PlSpTy, $Cont }</done>
Optimized Query:
for $PlGeTy_1 in db:attribute("Output7", db:text("Output7", "AIR")/parent::*:environment/parent::*:PlCt/@*:id)/self::*:href/parent::*:isOfPlCt/parent::*:PlGeTy for $PlSpTy_2 in db:attribute("Output7", $PlGeTy_1/@id)/self::*:href/parent::*:isOfPlGeTy/parent::*:PlSpTy for $Cont_3 in db:attribute("Output7", $PlSpTy_2/isOfCont/@href)/self::*:id/parent::*:Cont return element done { (($PlGeTy_1, $PlSpTy_2, $Cont_3)) }
Result:
- Hit(s): 3642 Items
- Updated: 0 Items
- Printed: 1159 KB
- Read Locking: local [Output7]
- Write Locking: none
Timing:
- Parsing: 0.39 ms
- Compiling: 0.68 ms
- Evaluating: 585.29 ms
- Printing: 14.36 ms
- Total Time: 600.72 ms
Query plan:
<QueryPlan compiled="true">
  <GFLWOR>
    <For>
      <Var name="$PlGeTy" id="1"/>
      <CachedPath>
        <ValueAccess data="Output7" type="ATTRIBUTE">
          <CachedPath>
            <ValueAccess data="Output7" type="TEXT" name="*:environment">
              <Str value="AIR" type="xs:string"/>
            </ValueAccess>
            <IterStep axis="parent" test="*:PlCt"/>
            <IterStep axis="attribute" test="*:id"/>
          </CachedPath>
        </ValueAccess>
        <IterStep axis="self" test="*:href"/>
        <IterStep axis="parent" test="*:isOfPlCt"/>
        <IterStep axis="parent" test="*:PlGeTy"/>
      </CachedPath>
    </For>
    <For>
      <Var name="$PlSpTy" id="2"/>
      <CachedPath>
        <ValueAccess data="Output7" type="ATTRIBUTE">
          <IterPath>
            <VarRef>
              <Var name="$PlGeTy" id="1"/>
            </VarRef>
            <IterStep axis="attribute" test="id"/>
          </IterPath>
        </ValueAccess>
        <IterStep axis="self" test="*:href"/>
        <IterStep axis="parent" test="*:isOfPlGeTy"/>
        <IterStep axis="parent" test="*:PlSpTy"/>
      </CachedPath>
    </For>
    <For>
      <Var name="$Cont" id="3"/>
      <CachedPath>
        <ValueAccess data="Output7" type="ATTRIBUTE">
          <IterPath>
            <VarRef>
              <Var name="$PlSpTy" id="2"/>
            </VarRef>
            <IterStep axis="child" test="isOfCont"/>
            <IterStep axis="attribute" test="href"/>
          </IterPath>
        </ValueAccess>
        <IterStep axis="self" test="*:id"/>
        <IterStep axis="parent" test="*:Cont"/>
      </CachedPath>
    </For>
    <CElem>
      <QNm value="done" type="xs:QName"/>
      <List>
        <VarRef>
          <Var name="$PlGeTy" id="1"/>
        </VarRef>
        <VarRef>
          <Var name="$PlSpTy" id="2"/>
        </VarRef>
        <VarRef>
          <Var name="$Cont" id="3"/>
        </VarRef>
      </List>
    </CElem>
  </GFLWOR>
</QueryPlan>
Community
  • 1
  • 1

1 Answers1

1

In those for loops, the results of the two "large" XPath expressions are already cached by the optimizer. Furthermore, you're actually comparing the href/id attributes twice.

Cleaning up the query removing this double effort reduces the execution time by about 90%.

let $PlCts := /root/PlCts/PlCt[environment = 'AIR']/@id
for $PlGeTy in /root/PlGeTys/PlGeTy[isOfPlCt/@href = $PlCts],
    $PlSpTy in /root/PlSpTys/PlSpTy[isOfPlGeTy/@href = $PlGeTy/@id]
return <done>{ $PlGeTy, $PlSpTy }</done>
Jens Erat
  • 37,523
  • 16
  • 80
  • 96
  • In return part, I will also need to get the data from PlGeTy. Is it possible with your solution? – user1587140 Jun 16 '15 at 13:07
  • No, you'd need to search it again (well, you already have the ID, this might still be faster because the attribute will be retrieved using the attribute index, which is pretty fast). Do you also need information from `$PlSpTys`? If not, you could loop over this instead. Please include such requirements in the question, as it limits appropriate answers. – Jens Erat Jun 16 '15 at 13:13
  • I have updated the "return" part. Sorry for confusion. @JensErat – user1587140 Jun 16 '15 at 13:15
  • Actually, the cleaned up query which at the same time is more general (with respect to having both variables and being able to perform arbitrary further analysis with them) performs even better than the one I had before. – Jens Erat Jun 16 '15 at 13:47
  • After applying your solution to the my real data, the query time decreased to 60ms from 6 seconds. Thank you very much;) @JensErat – user1587140 Jun 17 '15 at 05:10
  • BTW, when I put this query into a local function, the query takes about 700ms. Could you have a look at the Update two? @JensErat – user1587140 Jun 17 '15 at 07:39
  • I guess this is because the path expressions don't start at the root element any more, but at some arbitrary one. I don't know the BaseX query optimizer and compiler well enough to give a bold statement here, though; the BaseX mailing list might be a better place to ask this query optimizer specific question. – Jens Erat Jun 17 '15 at 08:27
  • I sent the question to the BaseX team. When I get solution, I will update the topic. – user1587140 Jun 17 '15 at 09:19
  • Is there anybody there who can help on the issue for **Update three**? @JensErat – user1587140 Jun 17 '15 at 11:19
  • 2
    This is no discussion forum, repeatedly changing a question and/or adding follow-up questions is not very welcome here. Anyway, you should have a look at cost estimation and consider whether this is not actually expected runtime. – Jens Erat Jun 17 '15 at 11:26
  • I am very new in Stackoverflow and also XQuery. I will keep it in my mind. – user1587140 Jun 17 '15 at 11:54