1

I'm using Solr 5.2.1 and I have a field "url" that needs to be unique. I have followed https://wiki.apache.org/solr/Deduplication and I cant still update the index with the same url many times and Solr deduplication did not work to stop that from happening.

I have tried with overwriteDupes=false and overwriteDupes=true, but to no avail.

I am sure something is wrong, but I just can't figure out.

solconfig.xml

<?xml version="1.0" encoding="UTF-8" ?>

<luceneMatchVersion>LUCENE_42</luceneMatchVersion>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.StandardDirectoryFactory}" />

<codecFactory name="CodecFactory" class="solr.SchemaCodecFactory" />

<!-- These dir are relative to this xml! -->
<lib dir="../../../solr/contrib/extraction/lib" />
<lib dir="../../../solr/dist/" regex="solr-cell-\d.*\.jar" />

<requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />

<requestHandler name="/update" class="solr.UpdateRequestHandler">
    <lst name="defaults">
        <str name="update.chain">dedupe</str>
    </lst>
</requestHandler>

<requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />
<admin>
    <defaultQuery>*:*</defaultQuery>
</admin>

<!--
<requestHandler name="/update/extract" class="solr.extraction.ExtractingRequestHandler">
    <lst name="defaults">
        <str name="captureAttr">true</str>
        <str name="fmap.content">text</str>
        <str name="lowernames">true</str>
    </lst>
</requestHandler>

--> true id true url solr.processor.Lookup3Signature org.apache.solr.update.processor.TextProfileSignature</str>-->

schema.xml

<?xml version="1.0" ?>

<types>
    <fieldtype name="string" class="solr.StrField" />
    <fieldType name="uuid" class="solr.UUIDField" indexed="true" />
</types>

<fields>

    <field name="id" type="string" indexed="true" stored="true" multiValued="false" />
    <field name="url" type="string" />
    <field name="title" type="string" />
    <field name="summary" type="string" />
    <field name="description" type="string" />
    <dynamicField name="*" type="string" multiValued="true" indexed="true" stored="true" />
    <copyField source="*" dest="fulltext" />
    <field name="fulltext" type="string" multiValued="true" />
</fields>

<defaultSearchField>fulltext</defaultSearchField>

<solrQueryParser defaultOperator="OR" />

1 Answers1

0

I got it working by copying the data_driven_schema_configs folder and modifying the schema.xml and solrconfig.xml according to https://wiki.apache.org/solr/Deduplication