I am evaluating Airbyte to ingest data from multiple sources, one of them is a servicesnow API, I developed a connector using Airbyte CDK.
I am trying to implement incremental streams or slides to improve data recovery performance. since pulling everything in a single block takes a lot of resources and the aws instance crashes.
I followed other sources and set state_checkpoint_interval but it keeps bringing back all the logs in one go, it should be 100 by 100 I think so.
Does anyone have experience working with it?
I followed this documentation but I do not have results: Incremental-Stream https://docs.airbyte.com/connector-development/cdk-python/incremental-stream
Stream-slices https://docs.airbyte.com/connector-development/cdk-python/stream-slices
My source code:
class ServicesnowApi(HttpStream):
url_base = "https://xxx.service-now.com/api/now/v1/"
# Set this as a noop.
primary_key = None
# Save the state every 100 records
state_checkpoint_interval = 100
page_size = 100
cursor_field = "sys_updated_on"
def __init__(self, limit: str, sys_created_from: str, **kwargs):
super().__init__(**kwargs)
# Here's where we set the variable from our input to pass it down to the source.
self.limit = limit
self.sys_created_from = sys_created_from
def path(self, **kwargs) -> str:
# This defines the path to the endpoint that we want to hit.
limit = self.limit
sys_created_from = self.sys_created_from
return f"table/incident?sysparm_offset=0&sysparm_limit={limit}&sysparm_query=sys_created_on>={sys_created_from} 00:00^active=ISNOTEMPTY"
def request_params(
self,
stream_state: Mapping[str, Any],
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None,
) -> MutableMapping[str, Any]:
limit = self.limit
sys_created_from = self.sys_created_from
return {"limit": limit, "sys_created_from":sys_created_from}
def parse_response(
self,
response: requests.Response,
stream_state: Mapping[str, Any],
stream_slice: Mapping[str, Any] = None,
next_page_token: Mapping[str, Any] = None,
) -> Iterable[Mapping]:
result = response.json()['result']
return result
def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
return None