(edit: solr-6.6.0 on ubuntu)
I'm trying to use Solr's DataImportHandler to index a MySQL database, which includes blob entries with RTF files. For this I'm using the FieldStreamDataSource, as specified in these answers:
How do I index Rich Format Documents in Blobs
Unsupported type Exception on Importing Documents from Database
While all other non-Blob Fields get indexed, I keep getting the following Java Runtime error from FieldStreamDataSource.getData() method, as appears in the Solr log files:
Caused by: java.lang.RuntimeException: unsupported type : class java.lang.String at org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:77) at org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:47) at org.apache.solr.handler.dataimport.DebugLogger$2.getData(DebugLogger.java:187)
Which refers to these lines of the Java class (see arrows '<==='):
@Override
public InputStream getData(String query) {
Object o = wrapper.getVariableResolver().resolve(dataField);
if (o == null) {
throw new DataImportHandlerException(SEVERE, "No field available for name : " + dataField);
} else if (o instanceof Blob) { // <========= XXX
Blob blob = (Blob) o;
try {
return blob.getBinaryStream();
} catch (SQLException sqle) {
LOG.info("Unable to get data from BLOB");
return null;
}
} else if (o instanceof byte[]) {
byte[] bytes = (byte[]) o;
return new ByteArrayInputStream(bytes);
} else {
throw new RuntimeException("unsupported type : " + o.getClass()); // <========= XXX
}
Which should mean that the getData() method is not getting a Blob type, but a String.
I know it has been suggested in many other threads that this error arises when the blob value is null in the database:
Unsupported type Exception on Importing Documents from Database
Solr DIH Throwing Error Unsupported Type Class Java.Lang.String
However, that is not the case here, as I have only 5 entries in this test DB, non of which has a null value. Also, I am checking the debug output from the Solr DIH interface, which shows that the Blob value was indeed retrieved from the DB (see arrows '<==='):
"verbose-output": [
"entity:reports1",
[
"document#1",
[
"query",
"SELECT id, institute, exam_date, age, acc, pacs FROM reports",
"time-taken",
"0:0:0.8",
null,
"----------- row #1-------------",
"id",
"1",
"institute",
"RADIOLOGY",
"age",
"68",
"acc",
"165184654",
"pacs",
"233215",
"exame_date",
"2016-02-05T00:00:00Z",
null,
"---------------------------------------------",
"entity:reports2",
[
"query",
"SELECT report FROM reports WHERE id='1'",
"time-taken",
"0:0:0.6",
null,
"----------- row #1-------------",
"report", // <========= COLUMN NAME RETURNED FROM THE SQL SELECT
"e1xydGYxXGFkZWZ[...]", // <========= VALUE RETURNED FROM THE SQL SELECT
null,
"---------------------------------------------",
"entity:report",
[
"query",
"report",
"EXCEPTION", // <========== EXCEPTION THROWN
"java.lang.RuntimeException: unsupported type : class java.lang.String\n\tat org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:77)\n\tat org.apache.solr.handler.dataimport.FieldStreamDataSource.getData(FieldStreamDataSource.java:47)\n\tat org.apache.solr.handler.dataimport.DebugLogger$2.getData(DebugLogger.java:187)\n\tat org.apache.solr.handler.dataimport.TikaEntityProcessor.nextRow(TikaEntityProcessor.java:128)\n\tat org.apache.solr.handler.dataimport.EntityProcessorWrapper.nextRow(EntityProcessorWrapper.java:267)\n\tat org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:475)\n\tat org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:516)\n\tat org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:516)\n\tat org.apache.solr.handler.dataimport.DocBuilder.buildDocument(DocBuilder.java:414)\n\tat org.apache.solr.handler.dataimport.DocBuilder.doFullDump(DocBuilder.java:329)\n\tat org.apache.solr.handler.dataimport.DocBuilder.execute(DocBuilder.java:232)\n\tat org.apache.solr.handler.dataimport.DataImporter.doFullImport(DataImporter.java:415)\n\tat org.apache.solr.handler.dataimport.DataImporter.runCmd(DataImporter.java:474)\n\tat org.apache.solr.handler.dataimport.DataImportHandler.handleRequestBody(DataImportHandler.java:180)\n\tat org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173)\n\tat org.apache.solr.core.SolrCore.execute(SolrCore.java:2477)\n\tat org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723)\n\tat org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529)\n\tat org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361)\n\tat org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305)\n\tat org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691)\n\tat org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)\n\tat org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)\n\tat org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180)\n\tat org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512)\n\tat org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185)\n\tat org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112)\n\tat org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)\n\tat org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213)\n\tat org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134)\n\tat org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)\n\tat org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134)\n\tat org.eclipse.jetty.server.Server.handle(Server.java:534)\n\tat org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320)\n\tat org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251)\n\tat org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273)\n\tat org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95)\n\tat org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93)\n\tat org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303)\n\tat org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.produceConsume(ExecuteProduceConsume.java:148)\n\tat org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.run(ExecuteProduceConsume.java:136)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:671)\n\tat org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:589)\n\tat java.lang.Thread.run(Thread.java:748)\n",
"time-taken",
"0:0:0.0"
]
]
],
Here is my data-config.xml
<dataConfig>
<dataSource
name="db"
type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://localhost:3306/RIS"
user="root"
password="********"/>
<dataSource name="fieldStream" type="FieldStreamDataSource"/>
<document>
<entity
name="reports1"
query="SELECT id, institute, exam_date, age, acc, pacs FROM reports"
dataSource="db"
>
<field column="id" name="id"/>
<field column="institute" name="institute"/>
<field column="exam_date" name="exam_date"/>
<field column="age" name="age"/>
<field column="acc" name="acc"/>
<field column="pacs" name="pacs"/>
<entity
name="reports2"
query="SELECT report FROM reports WHERE id='${reports1.id}'"
dataSource="db"
>
<entity
name="report"
dataSource="fieldStream"
processor="TikaEntityProcessor"
url="report"
dataField="reports2.REPORT"
format="text"
onError="continue">
<field column="text" name="report"/>
</entity>
</entity>
</entity>
</document>
</dataConfig>
So it appears to me that the value retrieved from the blob column of the DB is being passed or reckonized as a String, and not a Blob type. Can anyone help me see if I'm doing something wrong? I have searched everywhere and can't see the solution:/
Many thanks