1

I have created two documents in Solr:

<doc>
  <float name="score">1.00711</float>
  <str name="_id">ejn01:2560000000075596</str>
  <str name="title">Journal of neurology research</str>
</doc>
<doc>
  <float name="score">1.00711</float>
  <str name="_id">ejn01:954925518616</str>
  <str name="title">Journal of neurology</str>
</doc>

The field "title" has the following definition in schema.xml:

<fieldType name="utf8text" class="solr.TextField" positionIncrementGap="100" omitNorms="false">
  <analyzer type="index">
    <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="1024"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.ASCIIFoldingFilterFactory"/>
    <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" format="solr" ignoreCase="false" expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.StandardTokenizerFactory" maxTokenLength="1024"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.ASCIIFoldingFilterFactory"/>
    <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" format="solr" ignoreCase="false" expand="true" tokenizerFactory="solr.WhitespaceTokenizerFactory"/>
  </analyzer>
</fieldType>

If I use the query "journal of neurology", both documents have the same score, although the second document is more exact. Supplying a phrase query does not fix the issue. I also see that the computed fieldNorm is "0.5" for both documents. Does this have something to do with the loss of precision when storing the length norm into one byte?

These are all the supplied parameters (defaults in solrconfig.xml):

<str name="lowercaseOperators">false</str>
<str name="mm">-10%</str>
<str name="pf">author^3 title^2</str>
<str name="sort">score desc</str>
<arr name="bq">
  <str>source:ser01^10</str>
  <str>source:ejn01^10</str>
 <str>(*:* -type:article)^999</str>
</arr>
<str name="echoParams">all</str>
<str name="df">all</str>
<str name="tie">0</str>
<str name="qf">
author^15 title^10 subject^1 summary^1 library^1 location^1 publisher^1 place_published^1 issn^1 isbn^1
</str>
<str name="q.alt">*:*</str>
<str name="ps">2</str>
<str name="defType">edismax</str>
<str name="q">journal of neurology</str>
<str name="echoParams">all</str>
<str name="sort">score desc</str>

Looking the computation of the score, I see no single difference between them

<str name="ejn01:2560000000075596">
1.0071099 = (MATCH) sum of:
  0.0053001107 = (MATCH) sum of:
    0.0017667036 = (MATCH) max of:
      0.0017667036 = (MATCH) weight(title:journal^10.0 in 0), product of:
        0.005943145 = queryWeight(title:journal^10.0), product of:
          10.0 = boost
          0.5945349 = idf(docFreq=2, maxDocs=2)
          9.996294E-4 = queryNorm
        0.29726744 = (MATCH) fieldWeight(title:journal in 0), product of:
          1.0 = tf(termFreq(title:journal)=1)
          0.5945349 = idf(docFreq=2, maxDocs=2)
          0.5 = fieldNorm(field=title, doc=0)
    0.0017667036 = (MATCH) max of:
      0.0017667036 = (MATCH) weight(title:of^10.0 in 0), product of:
        0.005943145 = queryWeight(title:of^10.0), product of:
          10.0 = boost
          0.5945349 = idf(docFreq=2, maxDocs=2)
          9.996294E-4 = queryNorm
        0.29726744 = (MATCH) fieldWeight(title:of in 0), product of:
          1.0 = tf(termFreq(title:of)=1)
          0.5945349 = idf(docFreq=2, maxDocs=2)
          0.5 = fieldNorm(field=title, doc=0)
    0.0017667036 = (MATCH) max of:
      0.0017667036 = (MATCH) weight(title:neurology^10.0 in 0), product of:
        0.005943145 = queryWeight(title:neurology^10.0), product of:
          10.0 = boost
          0.5945349 = idf(docFreq=2, maxDocs=2)
          9.996294E-4 = queryNorm
        0.29726744 = (MATCH) fieldWeight(title:neurology in 0), product of:
          1.0 = tf(termFreq(title:neurology)=1)
          0.5945349 = idf(docFreq=2, maxDocs=2)
          0.5 = fieldNorm(field=title, doc=0)
  0.0031800664 = (MATCH) max of:
    0.0031800664 = (MATCH) weight(title:"journal of neurology"~2^2.0 in 0), product of:
      0.0035658872 = queryWeight(title:"journal of neurology"~2^2.0), product of:
        2.0 = boost
        1.7836046 = idf(title: journal=2 of=2 neurology=2)
        9.996294E-4 = queryNorm
      0.8918023 = fieldWeight(title:"journal of neurology" in 0), product of:
        1.0 = tf(phraseFreq=1.0)
        1.7836046 = idf(title: journal=2 of=2 neurology=2)
        0.5 = fieldNorm(field=title, doc=0)
  0.99862975 = (MATCH) sum of:
    0.99862975 = (MATCH) MatchAllDocsQuery, product of:
      0.99862975 = queryNorm
</str>
<str name="ejn01:954925518616">
1.0071099 = (MATCH) sum of:
  0.0053001107 = (MATCH) sum of:
    0.0017667036 = (MATCH) max of:
      0.0017667036 = (MATCH) weight(title:journal^10.0 in 1), product of:
        0.005943145 = queryWeight(title:journal^10.0), product of:
          10.0 = boost
          0.5945349 = idf(docFreq=2, maxDocs=2)
          9.996294E-4 = queryNorm
        0.29726744 = (MATCH) fieldWeight(title:journal in 1), product of:
          1.0 = tf(termFreq(title:journal)=1)
          0.5945349 = idf(docFreq=2, maxDocs=2)
          0.5 = fieldNorm(field=title, doc=1)
    0.0017667036 = (MATCH) max of:
      0.0017667036 = (MATCH) weight(title:of^10.0 in 1), product of:
        0.005943145 = queryWeight(title:of^10.0), product of:
          10.0 = boost
          0.5945349 = idf(docFreq=2, maxDocs=2)
          9.996294E-4 = queryNorm
        0.29726744 = (MATCH) fieldWeight(title:of in 1), product of:
          1.0 = tf(termFreq(title:of)=1)
          0.5945349 = idf(docFreq=2, maxDocs=2)
          0.5 = fieldNorm(field=title, doc=1)
    0.0017667036 = (MATCH) max of:
      0.0017667036 = (MATCH) weight(title:neurology^10.0 in 1), product of:
        0.005943145 = queryWeight(title:neurology^10.0), product of:
          10.0 = boost
          0.5945349 = idf(docFreq=2, maxDocs=2)
          9.996294E-4 = queryNorm
        0.29726744 = (MATCH) fieldWeight(title:neurology in 1), product of:
          1.0 = tf(termFreq(title:neurology)=1)
          0.5945349 = idf(docFreq=2, maxDocs=2)
          0.5 = fieldNorm(field=title, doc=1)
  0.0031800664 = (MATCH) max of:
    0.0031800664 = (MATCH) weight(title:"journal of neurology"~2^2.0 in 1), product of:
      0.0035658872 = queryWeight(title:"journal of neurology"~2^2.0), product of:
        2.0 = boost
        1.7836046 = idf(title: journal=2 of=2 neurology=2)
        9.996294E-4 = queryNorm
      0.8918023 = fieldWeight(title:"journal of neurology" in 1), product of:
        1.0 = tf(phraseFreq=1.0)
        1.7836046 = idf(title: journal=2 of=2 neurology=2)
        <b>0.5 = fieldNorm(field=title, doc=1)
  0.99862975 = (MATCH) sum of:
    0.99862975 = (MATCH) MatchAllDocsQuery, product of:
      0.99862975 = queryNorm
</str>

Any idea why the fieldNorm is the same for both documents?

Thanks in advance!

0 Answers0