0

I am trying to implement full-text query for my json documents. I want to search by title.

My json is as follows:

{  
   "release":{  
      "genres":{  
         "genre":"Electronic"
      },
      "identifiers":{  
         "identifier":[  
            {  
               "description":"A-Side",
               "value":"MPO SK 032 A1 G PHRUPMASTERGENERAL T27 LONDON",
               "type":"Matrix / Runout"
            },
            {  
               "description":"B-Side",
               "value":"MPO SK 032 B1",
               "type":"Matrix / Runout"
            },
            {  
               "description":"C-Side",
               "value":"MPO SK 032 C1",
               "type":"Matrix / Runout"
            },
            {  
               "description":"D-Side",
               "value":"MPO SK 032 D1",
               "type":"Matrix / Runout"
            }
         ]
      },
      "status":"Accepted",
      "videos":{  
         "video":[  
            {  
               "title":"The Persuader (Jesper Dahlbäck) - Östermalm",
               "duration":290,
               "description":"The Persuader (Jesper Dahlbäck) - Östermalm",
               "src":"http://www.youtube.com/watch?v=AHuQWcylaU4",
               "embed":true
            },
            {  
               "title":"The Persuader - Vasastaden",
               "duration":380,
               "description":"The Persuader - Vasastaden",
               "src":"http://www.youtube.com/watch?v=5rA8CTKKEP4",
               "embed":true
            },
            {  
               "title":"The Persuader-Stockholm-Sodermalm",
               "duration":335,
               "description":"The Persuader-Stockholm-Sodermalm",
               "src":"http://www.youtube.com/watch?v=QVdDhOnoR8k",
               "embed":true
            },
            {  
               "title":"The Persuader - Norrmalm",
               "duration":289,
               "description":"The Persuader - Norrmalm",
               "src":"http://www.youtube.com/watch?v=hy47qgyJeG0",
               "embed":true
            }
         ]
      },
      "labels":{  
         "label":{  
            "catno":"SK032",
            "name":"Svek"
         }
      },
      "companies":{  
         "company":[  
            {  
               "id":271046,
               "catno":"",
               "name":"The Globe Studios",
               "entity_type_name":"Recorded At",
               "resource_url":"http://api.discogs.com/labels/271046",
               "entity_type":23
            },
            {  
               "id":56025,
               "catno":"",
               "name":"MPO",
               "entity_type_name":"Pressed By",
               "resource_url":"http://api.discogs.com/labels/56025",
               "entity_type":17
            }
         ]
      },
      "styles":{  
         "style":"Deep House"
      },
      "formats":{  
         "format":{  
            "text":"",
            "name":"Vinyl",
            "qty":2,
            "descriptions":{  
               "description":[  
                  "12\"",
                  "33 ⅓ RPM"
               ]
            }
         }
      },
      "country":"Sweden",
      "id":1,
      "released":"1999-03-00",
      "artists":{  
         "artist":{  
            "id":1,
            "anv":"",
            "name":"Persuader, The",
            "role":"",
            "tracks":"",
            "join":""
         }
      },
      "title":"Stockholm",
      "master_id":5427,
      "tracklist":{  
         "track":[  
            {  
               "position":"A",
               "duration":"4:45",
               "title":"Östermalm"
            },
            {  
               "position":"B1",
               "duration":"6:11",
               "title":"Vasastaden"
            },
            {  
               "position":"B2",
               "duration":"2:49",
               "title":"Kungsholmen"
            },
            {  
               "position":"C1",
               "duration":"5:38",
               "title":"Södermalm"
            },
            {  
               "position":"C2",
               "duration":"4:52",
               "title":"Norrmalm"
            },
            {  
               "position":"D",
               "duration":"5:16",
               "title":"Gamla Stan"
            }
         ]
      },
      "data_quality":"Complete and Correct",
      "extraartists":{  
         "artist":{  
            "id":239,
            "anv":"",
            "name":"Jesper Dahlbäck",
            "role":"Music By [All Tracks By]",
            "tracks":"",
            "join":""
         }
      },
      "notes":"The song titles are the names of Stockholm's districts."
   }
}

I have indexed the above document in elasticsearch using the following command :

curl -X PUT "http://127.0.0.1:9200/_river/smalldiscogs/_meta" -d" 
{
    ""type"": ""couchdb"",
    ""couchdb"": {
        ""host"": ""localhost"",
        ""port"": 5984,
        ""db"": ""smalldiscogs"",
        ""filter"": null
    },
    ""index"": {
        ""index"": ""smalldiscogs"",
        ""type"": ""smalldiscogs"",
        ""bulk_size"": ""100"",
        ""bulk_timeout"": ""10000ms""
    }
}"

My question is how to index only the title field. Please help?

  • Do you refer to the top-level `title` field, the ones in `videos.video.title` or the ones in `tracklist.track.title`? – Val Jun 18 '15 at 03:51
  • Oops. Sorry about that. I was referring to release.title. In this case, it would be "Stockholm". Just below artists tag. –  Jun 18 '15 at 07:32

1 Answers1

1

Since you're using the elasticsearch-river-couchdb plugin, you can configure the river with a groovy script that will remove all the fields but the ones you specify.

An example is given in the official documentation of the plugin and simply amounts to add the following the script to the couchdb object:

curl -XPUT 'http://127.0.0.1:9200/_river/smalldiscogs/_meta' -d ' 
{
    "type": "couchdb",
    "couchdb": {
        "host": "localhost",
        "port": 5984,
        "db": "smalldiscogs",
        "filter": null,
        "script": "var title = ctx.doc.release.title; ctx.doc.remove("release"); ctx.doc.title = title;"
    },
    "index": {
        "index": "smalldiscogs",
        "type": "smalldiscogs",
        "bulk_size": "100",
        "bulk_timeout": "10000ms"
    }
}'

The script looks like this and will simply remove from the document all the fields exception the title one:

var title = ctx.doc.release.title;     <--- remember the title
ctx.doc.remove("release");             <--- remove all fields
ctx.doc.title = title;                 <--- re-add only the title field
Val
  • 207,596
  • 13
  • 358
  • 360