1

I have a simple function that inserts a Python dictionary into DuckDB. How can I insert it into my table without creating a temporary file?

def save_to_duckdb(data):
    # Connect to the Duckdb database
    conn = duckdb.connect('nodes_log_duck.db')
    # Get the table name from the "name" field in the dictionary
    table_name = data.get('name')
    # Create a temp file
    file_name = table_name + str(int(time.time()))
    with open( file_name,"w") as file:
        json.dump(data,file)
    # Create the table if it doesn't exist
    conn.execute(f" CREATE TABLE IF NOT EXISTS {table_name} as SELECT * FROM read_json_auto({file_name});")

    # Insert the dictionary data into the table
    conn.execute(f"INSERT INTO {table_name} FROM (SELECT * FROM read_json_auto({file_name}))")

    # Commit the changes to the database and close the connection
    conn.commit()
    conn.close()
Geo-7
  • 127
  • 9

1 Answers1

0

It seems there is no way to insert a Python dictionary into DuckDB 0.8.1. I use Polars DataFrame for this, and based on a GitHub discussion in the DuckDB repository, someone suggested using fsspec, which works fine. Although using read_json with fsspec creates better data types for DuckDB tables.

**fsspec**

def save_to_duckdb(data, db_name):
    with duckdb.connect(db_name) as conn:
    
        # Get the table name from the "name" field in the dictionary
        table_name = data.get('name')
        if not table_name:
            return

        # Create a memory filesystem and write the dictionary data to it
        with fsspec.filesystem('memory').open(f'{table_name}.json', 'w') as file:
            file.write(json.dumps(data))

        # Register the memory filesystem and create the table
        conn.register_filesystem(fsspec.filesystem('memory'))
        conn.execute(f"CREATE TABLE IF NOT EXISTS {table_name} AS SELECT * FROM read_json_auto('memory://{table_name}.json')")

        # Insert the data into the table
        conn.execute(f"INSERT INTO {table_name} SELECT * FROM read_json_auto('memory://{table_name}.json')")

**Plors**

def save_to_duckdb(data, db_name):
    # Get the table name from the "name" field in the dictionary
    table_name = data.get('name')
    if table_name is None:
        return

    # Create a polars DataFrame from the data dictionary
    df = pl.DataFrame(data)

    # Connect to the Duckdb database and insert the DataFrame into the database
    with duckdb.connect(db_name) as con:
        con.execute(f"CREATE TABLE IF NOT EXISTS {table_name} AS SELECT * FROM df")
        con.execute(f"INSERT INTO {table_name} SELECT * FROM df")
        con.commit()

"Notice that this code inserts data into the database twice for the first time."

Geo-7
  • 127
  • 9