2

I have a predefined PostgreSQL database from an OfBiz installation. The database has numerous foreign key components. I am trying to write a python program to copy the data from a production database into staging or development databases.

The last step is to clear some private data out of my data-set that should not be seen by developers.

I have the reflection set up as follows:

def reflectSourceTables():
     global Base
     Base = automap_base(metadata = smeta)
     global baseNum
     baseNum = 0
     Base.prepare(name_for_collection_relationship=_name_for_collection_relationship, name_for_scalar_relationship=_name_for_scalar_relationship, generate_relationship=_generate_relationship)

My reflection is set up as follows

def _name_for_scalar_relationship(base, local_cls, referred_cls, constraint):
    global baseNum
    if constraint.name:
        baseNum += 1
        disc = '_'.join(col.name for col in constraint.columns)
        return referred_cls.__name__.lower() + '.' + disc + "_scalar_" + str(baseNum)
    # if this didn't work, revert to the default behavior
    return name_for_scalar_relationship(base, local_cls, referred_cls, constraint)

def _name_for_collection_relationship(base, local_cls, referred_cls, constraint):
    global baseNum
    if constraint.name:
        baseNum += 1
        
        disc = '_'.join(col.name for col in constraint.columns)
        return referred_cls.__name__.lower() + '.' + disc + "_collection_" + str(baseNum)
    
    # if this didn't work, revert to the default behavior
    return name_for_collection_relationship(base, local_cls, referred_cls, constraint)

def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
    
    if direction is interfaces.ONETOMANY:
        kw['cascade'] = 'all, delete-orphan'
        kw['passive_deletes'] = True
    
    return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)

I am able to see the relationship between tables with the following code:

def getTableList(smeta):
    tableList = []
    if args.tables:
        ##Validate tables are in database
        for table in args.tables:
            if smeta.tables[table] in smeta.sorted_tables:
                tableList.append(str(smeta.tables[table]))
            else:
                log('Table {0} does not exist on source'.format(table))
    else:
        tableList = smeta.sorted_tables
    
    if args.tables:
        for table in tableList:
            
            for relationship in getattr(Base.classes,str(table)).__mapper__.relationships:
                #print(relationship)
                tableName = re.search(r'\.(.*)\.', str(relationship)).group(1)
                if tableName and tableName not in tableList:
                    tableList.append(tableName)
            
    return tableList
    

But my hope was that the delete code:

def cleanData():
    log("Clean Data")
    destSession = sessionmaker()
    destSess = destSession(bind=db2)
    
    for partyId in partyIds:
        log("Cleaning data for {0}".format(partyId))
        voucher = Base.classes.voucher
        invoice = Base.classes.invoice
        voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
        voucherDelete.delete(synchronize_session=False)
        invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
        invoiceDelete.delete(synchronize_session=False)
        destSess.commit()

The delete does delete vouchers and invoices, but does not delete child invoice_item records.

My database setup does not include cascade delete functionality for foreign keys, but I was hoping that I could have the ORM provide the functionality.

Ideally this code would delete the children of a voucher or invoice.

Edit

New relationship generation is as follows:

def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
## Write this to include cascade delete see: https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html#custom-relationship-arguments
if direction is interfaces.ONETOMANY or direction is interfaces.MANYTOMANY:
    kw['cascade'] = 'all, delete, delete-orphan'
    kw['passive_deletes'] = False

if direction is interfaces.MANYTOONE:
    kw['viewonly'] = True
    
return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)

Delete code changed to:

voucher = Base.classes.voucher
    invoice = Base.classes.invoice
    invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
    rs = invoiceDelete.all()
    for result in rs:
        destSess.delete(result)
    voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
    rs = voucherDelete.all()
    for result in rs:
        destSess.delete(result)
    destSess.commit()

This results in the following error:

sqlalchemy.exc.IntegrityError: (psycopg2.IntegrityError) update or delete on table "invoice_item" violates foreign key constraint "invoice_imat_itm" on table "invoice_item_attribute"
DETAIL:  Key (invoice_id, invoice_item_seq_id)=(19439, 00001) is still referenced from table "invoice_item_attribute".
 [SQL: 'DELETE FROM invoice_item WHERE invoice_item.invoice_id = %(invoice_id)s AND invoice_item.invoice_item_seq_id = %(invoice_item_seq_id)s'] [parameters: ({'invoice_id': '19439', 'invoice_item_seq_id': '00001'}, {'invoice_id': '33674', 'invoice_item_seq_id': '00001'}, {'invoice_id': '49384', 'invoice_item_seq_id': '00001'}, {'invoice_id': '58135', 'invoice_item_seq_id': '00001'}, {'invoice_id': '83457', 'invoice_item_seq_id': '00001'})] (Background on this error at: http://sqlalche.me/e/gkpj)
Community
  • 1
  • 1
Urgazhi
  • 31
  • 7

1 Answers1

1

The following configuration allowed me to delete children records of Vouchers and Invoices within my database.

Relationship auto-mapper set up as follows:

def _generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw):
    ## Write this to include cascade delete see: https://docs.sqlalchemy.org/en/latest/orm/extensions/automap.html#custom-relationship-arguments
    if direction is interfaces.ONETOMANY:
        kw['cascade'] = 'all, delete, delete-orphan'
        kw['passive_deletes'] = False
        kw['lazy'] = 'immediate'
    if direction is interfaces.MANYTOONE or direction is interfaces.MANYTOMANY:
        kw['viewonly'] = True

    return generate_relationship(base, direction, return_fn, attrname, local_cls, referred_cls, **kw)

Delete Code is:

def cleanData():
    for partyId in partyIds:
        log("Cleaning data for {0}".format(partyId))

        invoice = Base.classes.invoice
        invoiceDelete = destSess.query(invoice).filter_by(party_id_from=partyId)
        rs = invoiceDelete.all()
        for result in rs:
            deleteChildren(result, destSess)
            destSess.delete(result)
        destSess.commit()

        voucher = Base.classes.voucher
        voucherDelete = destSess.query(voucher).filter_by(party_id=partyId)
        rs = voucherDelete.all()
        for result in rs:
            deleteChildren(result, destSess)        
            destSess.delete(result)

        destSess.commit()

def deleteChildren(result, destSess):
    for relationship in result.__mapper__.relationships:
        if relationship.direction is interfaces.ONETOMANY:
            childs = getattr(result, str(re.search(r'\.(.*)', str(relationship)).group(1)))
            for child in childs:
                if child.__mapper__.relationships:
                    deleteChildren(child, destSess)
                    destSess.commit()
                destSess.delete(child)
                destSess.commit()

To explain, I get the records related to my party id that I want to remove from the database, and using a recursive method get the children from my defined relationship using eager fetching. If that child record has children, I call the same method. When I run out of one to many relationships to follow, I remove the record and return to its parent, deleting that record as well.

Urgazhi
  • 31
  • 7