Create Purview scan with Python SDK with a specified scope/folderpath for an AdlsGen2 resource.
You can use the below python sdk code to scan with a specified scope/folderpath
for an AdlsGen2 resource in Microsoft Purview.
Code:
import uuid
from azure.purview.scanning import PurviewScanningClient
from azure.purview.administration.account import PurviewAccountClient
from azure.identity import DefaultAzureCredential
from azure.core.exceptions import HttpResponseError
ds_name = "<name of your registered data source>"
scan_name = "<name of the scan you want to define>" reference_name_purview = "<name of your Microsoft Purview account>"
collection_name = "<name of the collection where you will be creating the scan>"
def get_credentials():
credentials = DefaultAzureCredential()
return credentials
def get_purview_client():
credentials = get_credentials()
client = PurviewScanningClient(endpoint=f"https://{reference_name_purview}.scan.purview.azure.com", credential=credentials, logging_enable=True)
return client
def get_admin_client():
credentials = get_credentials()
client = PurviewAccountClient(endpoint=f"https://{reference_name_purview}.purview.azure.com/", credential=credentials, logging_enable=True)
return client
try:
admin_client = get_admin_client()
except ValueError as e:
print(e)
collection_list = admin_client.collections.list_collections()
for collection in collection_list:
if collection["friendlyName"].lower() == collection_name.lower():
collection_name = collection["name"]
body_input = body = {
"kind": "AdlsGen2Msi",
"properties": {
"scanRulesetName": "AdlsGen2",
"scanRulesetType": "System",
"collection": {
"referenceName": collection_name,
"type": "CollectionReference"
},
"resourceSet": {
"kind": "AdlsGen2ResourceSet",
"properties": {
"fileSystem": "filesystem1",
"directory": "directory1/day.csv"
}
}
}
}
try:
client = get_purview_client()
response = client.scans.create_or_update(data_source_name=ds_name, scan_name=scan_name, body=body_input)
print(response)
print(f"Scan {scan_name} successfully created or updated")
except HttpResponseError as e:
print(e)
run_id = uuid.uuid4() #unique id of the new scan
try:
response = client.scan_result.run_scan(data_source_name=ds_name, scan_name=scan_name, run_id=run_id)
print(response)
print(f"Scan {scan_name} successfully started")
except HttpResponseError as e:
print(e)
Output:
{'properties': {'scanRulesetName': 'AdlsGen2', 'scanRulesetType': 'System', 'collection': {'lastModifiedAt': '2023-07-12T07:19:49.5851Z', 'referenceName': 'xxxx', 'type': 'CollectionReference'}, 'createdAt': '2023-07-12T07:19:49.5851Z', 'lastModifiedAt': '2023-07-12T07:19:49.5851Z'}, 'kind': 'AdlsGen2Msi', 'creationType': 'Manual', 'id': 'datasources/demodatalake567/scans/scan326', 'name': 'scan326'}
Scan scan326 successfully created or updated
{'scanResultId': 'xxxx', 'startTime': '2023-07-12T07:19:55.1233949Z', 'endTime': None, 'status': 'Accepted', 'error': None}
Scan scan326 successfully started

Portal:

Reference:
Tutorial: How to use Microsoft Purview Python SDK - Microsoft Purview | Microsoft Learn