3

The official Solr Java API has a deleteByQuery operation where we can delete documents that satisfy a query. The AWS CloudSearch SDK doesn't seem to have matching functionality. Am I just not seeing the deleteByQuery equivalent, or is this something we'll need to roll our own?

Something like this:

SearchRequest searchRequest = new SearchRequest();
searchRequest.setQuery(queryString);
searchRequest.setReturn("id,version");
SearchResult searchResult = awsCloudSearch.search(searchRequest);
JSONArray docs = new JSONArray();
for (Hit hit : searchResult.getHits().getHit()) {
    JSONObject doc = new JSONObject();
    doc.put("id", hit.getId());
    // is version necessary?
    doc.put("version", hit.getFields().get("version").get(0));
    doc.put("type", "delete");
    docs.put(doc);
}
UploadDocumentsRequest uploadDocumentsRequest = new UploadDocumentsRequest();
StringInputStream documents = new StringInputStream(docs.toString());
uploadDocumentsRequest.setDocuments(documents);
UploadDocumentsResult uploadResult = awsCloudSearch.uploadDocuments(uploadDocumentsRequest);

Is this reasonable? Is there an easier way?

Eric
  • 283
  • 5
  • 12

3 Answers3

2

You're correct that CloudSearch doesn't have an equivalent to deleteByQuery. Your approach looks like the next best thing.

And no, version is not necessary -- it was removed with the CloudSearch 01-01-2013 API (aka v2).

alexroussos
  • 2,671
  • 1
  • 25
  • 38
0

CloudSearch doesn't provide delete as query, it supports delete in a slightly different way i.e. build json object having only document id (to be deleted) and operation should be specified as delete. These json objects can be batched together but batch size has to be less than 5 MB.

Following class supports this functionality, you just pass its delete method the array of ids to be deleted:

class AWS_CS
{
    protected $client;

    function connect($domain)
    {
        try{
            $csClient = CloudSearchClient::factory(array(
                            'key'          => 'YOUR_KEY',
                            'secret'      => 'YOUR_SECRET',
                            'region'     =>  'us-east-1'

                        ));

            $this->client = $csClient->getDomainClient(
                        $domain,
                        array(
                            'credentials' => $csClient->getCredentials(),
                            'scheme' => 'HTTPS'
                        )
                    );
        }
        catch(Exception $ex){
            echo "Exception: ";
            echo $ex->getMessage();
        }
        //$this->client->addSubscriber(LogPlugin::getDebugPlugin());        
    }
    function search($queryStr, $domain){

        $this->connect($domain);

        $result = $this->client->search(array(
            'query' => $queryStr,
            'queryParser' => 'lucene',
            'size' => 100,
            'return' => '_score,_all_fields'
            ))->toArray();

        return json_encode($result['hits']);
        //$hitCount = $result->getPath('hits/found');
        //echo "Number of Hits: {$hitCount}\n";
    }

    function deleteDocs($idArray, $operation = 'delete'){

        $batch = array();

        foreach($idArray as $id){
            //dumpArray($song);
            $batch[] = array(
                        'type'        => $operation,
                        'id'        => $id);                       
        }
        $batch = array_filter($batch);
        $jsonObj = json_encode($batch, JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_QUOT | JSON_HEX_AMP);

        print_r($this->client->uploadDocuments(array(
                        'documents'     => $jsonObj,
                        'contentType'     =>'application/json'
                    )));

        return $result['status'] == 'success' ? mb_strlen($jsonObj) : 0;
    }   
}
0

Modified for C# - Deleting uploaded document in cloud search

 public void DeleteUploadedDocuments(string location)
    {
        SearchRequest searchRequest = new SearchRequest { };
        searchRequest = new SearchRequest { Query = "resourcename:'filepath'", QueryParser = QueryParser.Lucene, Size = 10000 };
        searchClient = new AmazonCloudSearchDomainClient( ConfigurationManager.AppSettings["awsAccessKeyId"]  ,  ConfigurationManager.AppSettings["awsSecretAccessKey"]  , new AmazonCloudSearchDomainConfig { ServiceURL = ConfigurationManager.AppSettings["CloudSearchEndPoint"] });

        SearchResponse searchResponse = searchClient.Search(searchRequest);
        JArray docs = new JArray();

        foreach (Hit hit in searchResponse.Hits.Hit)
        {
            JObject doc = new JObject();
            doc.Add("id", hit.Id);
            doc.Add("type", "delete");
            docs.Add(doc);
        }

        UpdateIndexDocument<JArray>(docs, ConfigurationManager.AppSettings["CloudSearchEndPoint"]);
    }

    public void UpdateIndexDocument<T>(T document, string DocumentUrl)
    {
        AmazonCloudSearchDomainConfig config = new AmazonCloudSearchDomainConfig { ServiceURL = DocumentUrl };
        AmazonCloudSearchDomainClient searchClient = new AmazonCloudSearchDomainClient( ConfigurationManager.AppSettings["awsAccessKeyId"]  ,  ConfigurationManager.AppSettings["awsSecretAccessKey"]   , config);
        using (Stream stream = GenerateStreamFromString(JsonConvert.SerializeObject(document)))
        {
            UploadDocumentsRequest upload = new UploadDocumentsRequest()
            {
                ContentType = "application/json",
                Documents = stream
            };
            searchClient.UploadDocuments(upload);
        };

    }
Meena
  • 1
  • As it’s currently written, your answer is unclear. Please [edit] to add additional details that will help others understand how this addresses the question asked. You can find more information on how to write good answers [in the help center](/help/how-to-answer). – Community Sep 27 '21 at 15:48