0

I am trying to use Google Indexing API to index my site URLs via python. I don't know much about coding so I copied this code from one site.

As I searched for this error code before it seems there is something wrong with csv format maybe?

My csv is formated like this "url," and then new row (the second column being empty).

The code is following:

from oauth2client.service_account import ServiceAccountCredentials
import httplib2
import json

import pandas as pd

# https://developers.google.com/search/apis/indexing-api/v3/prereqs#header_2
JSON_KEY_FILE = "/content/astute-tractor-329613-1baed60ec1c0.json"
SCOPES = ["https://www.googleapis.com/auth/indexing"]

credentials = ServiceAccountCredentials.from_json_keyfile_name(JSON_KEY_FILE, scopes=SCOPES)
http = credentials.authorize(httplib2.Http())

def indexURL(urls, http):
    # print(type(url)); print("URL: {}".format(url));return;

    ENDPOINT = "https://indexing.googleapis.com/v3/urlNotifications:publish"
    
    for u in urls:
        # print("U: {} type: {}".format(u, type(u)))
    
        content = {}
        content['url'] = u.strip()
        content['type'] = "URL_UPDATED"
        json_ctn = json.dumps(content)    
        # print(json_ctn);return
    
        response, content = http.request(ENDPOINT, method="POST", body=json_ctn)

        result = json.loads(content.decode())

        # For debug purpose only
        if("error" in result):
            print("Error({} - {}): {}".format(result["error"]["code"], result["error"]["status"], result["error"]["message"]))
        else:
            print("urlNotificationMetadata.url: {}".format(result["urlNotificationMetadata"]["url"]))
            print("urlNotificationMetadata.latestUpdate.url: {}".format(result["urlNotificationMetadata"]["latestUpdate"]["url"]))
            print("urlNotificationMetadata.latestUpdate.type: {}".format(result["urlNotificationMetadata"]["latestUpdate"]["type"]))
            print("urlNotificationMetadata.latestUpdate.notifyTime: {}".format(result["urlNotificationMetadata"]["latestUpdate"]["notifyTime"]))

"""
data.csv has 2 columns: URL and date.
I just need the URL column.
"""
csv = pd.read_csv("/content/indekseerida-1.csv")
csv[["URL"]].apply(lambda x: indexURL(x, http))

It gives me these errors:

KeyError                                  Traceback (most recent call last)
<ipython-input-8-f2bea693a148> in <module>()
     44 """
     45 csv = pd.read_csv("/content/indekseerida-1.csv")
---> 46 csv[["URL"]].apply(lambda x: indexURL(x, http))

/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py in __getitem__(self, key)
   2910             if is_iterator(key):
   2911                 key = list(key)
-> 2912             indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1]
   2913 
   2914         # take() does not accept boolean indexers

/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
   1252             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1253 
-> 1254         self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
   1255         return keyarr, indexer
   1256 

/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
   1296             if missing == len(indexer):
   1297                 axis_name = self.obj._get_axis_name(axis)
-> 1298                 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   1299 
   1300             # We (temporarily) allow for some missing keys with .loc, except in

KeyError: "None of [Index(['URL'], dtype='object')] are in the [columns]"
Allar
  • 85
  • 9

1 Answers1

0

Just to fix the error, you can replace

csv[["URL"]].apply(lambda x: indexURL(x, http))

with

indexURLNew = lambda x: indexURL(x, http)
result_iterator = map(indexURLNew, csv["URL"])
print(list(result_iterator))

result_iterator is an iterator from which you can get result

Vibhor Verma
  • 161
  • 1
  • 4
  • 13