3

AS per Microsoft documents:

Connect to Azure Data Lake Storage Gen2 by using an account key:

def initialize_storage_account(storage_account_name, storage_account_key):
    
    try:  
        global service_client

        service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format(
            "https", storage_account_name), credential=storage_account_key)
    
    except Exception as e:
        print(e)

Upload a file to a directory:

def upload_file_to_directory():
    try:
        file_system_client = service_client.get_file_system_client(file_system="my-file-system")

        directory_client = file_system_client.get_directory_client("my-directory/filter")
        
        file_client = directory_client.create_file("my_csv_write.csv")
        local_file = open("C:\\Users\\my_csv_read.csv",'r')

        file_contents = local_file.read()

        file_client.append_data(data=file_contents, offset=0, length=len(file_contents))

        file_client.flush_data(len(file_contents))
        print("File uploaded")

    except Exception as e:
      print(e)

I can upload file from my local to azure storage with this function, and it works.

but what I want to do is Read file from azure storage and write in azure storage. What I did

def read_and_write_to_directory():
    try:
        file_system_client = service_client.get_file_system_client(file_system="my-file-system")

        directory_client_read = file_system_client.get_directory_client("my-directory")
        directory_client_write = file_system_client.get_directory_client("my-directory/filter")

        file_client_read = directory_client_read.get_file_client("my_csv_read.csv")
        file_path = open(file_client_read,'r')
        file_contents = file_path.read()
        

        file_client_write = directory_client_write.create_file("my_csv_write.csv")
        file_client_write.append_data(file_contents, overwrite=True)
        
    
    except Exception as e:
        print(e)

But it does not work,

error

expected str, bytes or os.PathLike object, not DataLakeFileClient

So What is the correct way to read file from azure lake storage and write in azure lake storage?

Ecstasy
  • 1,866
  • 1
  • 9
  • 17
Sohel Reza
  • 281
  • 1
  • 6
  • 23

1 Answers1

0

It's probably a little late, but I stumbled onto this while looking for a different issue. I think you need to read the file content and then write that, as described here, and write it, as described here So your code would look like this:

def read_and_write_to_directory():
try:
    file_system_client = service_client.get_file_system_client(file_system="my-file-system")

    directory_client_read = file_system_client.get_directory_client("my-directory")
    directory_client_write = file_system_client.get_directory_client("my-directory/filter")

    file_client_read = directory_client_read.get_file_client("my_csv_read.csv")
    #file_path = open(file_client_read,'r')
    #file_contents = file_path.read()

    download = file_client_read.download_file()
    downloaded_bytes = download.readall()

    

    file_client_write = directory_client_write.create_file("my_csv_write.csv")
    #file_client_write.append_data(file_contents, overwrite=True)
    file_client_write.append_data(downloaded_bytes, overwrite=True)


except Exception as e:
    print(e)
Maus
  • 87
  • 10