Is there a sure-fire way to check that the class of an object is a sub-class of the desired super?
For Example, in a migration script that I'm writing, I have to convert objects of a given type to dictionaries in a given manner to ensure two-way compatability of the data.
This is best summed up like so:
- Serializable
- User
- Status
- Issue
- Test
- Set
- Step
- Cycle
However, when I'm recursively checking objects after depickling, I receive a Test object that yields the following results:
Testing data object type:
type(data)
{type}< class'__main.Test' >
Testing Class type:
type(Test())
{type}< class'__main.Test' >
Testing object type against class type:
type(Test()) == type(data)
{bool}False
Testing if object isinstance() of Class:
isinstance(data, Test)
{bool}False
Testing if Class isinstance() of Super Class:
isinstance(Test(), Serializable)
{bool}True
Testing isinstance() of Super Class::
isinstance(data, Serializable)
{bool}False
Interestingly, it doesn't appear to have any such problem prior to pickling as it handles the creation of dictionary and integrity hash just fine. This only crops up with depickled objects in both Pickle and Dill.
For Context, here's the code in it's native environment - the DataCache object that is pickled:
class DataCache(object):
_hash=""
_data = None
@staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
@staticmethod
def dictify(data):
if isinstance(data,list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data,(dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key,value in datahash.iteritems():
datahash[key]= DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict = {}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
Finally, I know there's arguments for using JSON for readability in storage, I needed Pickle's ability to convert straight to and from Objects without specifying the object type myself. (thanks to the nesting, it's not really feasible)
Am I going mad here or does pickling do something to the class definitions?
EDIT:
Minimal Implementation:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import requests
from aenum import Enum
import json # _tricks
import base64
import argparse
import os
import sys
import datetime
import dill
import hashlib
import collections
class Serializable(object):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
def __str__(self):
return str(self.sortSelf())
def sortSelf(self):
return collections.OrderedDict(sorted(self.__dict__.items()))
def toDict(self):
return self.__dict__
def fromDict(self, dict):
# Not using __dict__.update(...) to avoid polluting objects with the excess data
varMap = self.__dict__
if dict and varMap:
for key in varMap:
if (key in dict):
varMap[key] = dict[key]
self.__dict__.update(varMap)
return self
return None
class Issue(Serializable):
def __init__(self, initDict={}):
self.id = 0
self.key = ""
self.fields = {}
if initDict:
self.__dict__.update(initDict)
Serializable.__init__(self)
def fieldToDict(self, obj, key, type):
if key in obj:
result = obj[key]
else:
return None
if result is None:
return None
if isinstance(result, type):
return result.toDict()
return result
def fromDict(self, jsonDict):
super(Issue, self).fromDict(jsonDict)
self.fields["issuetype"] = IssueType().fromDict(self.fields["issuetype"])
self.fields["assignee"] = User().fromDict(self.fields["assignee"])
self.fields["creator"] = User().fromDict(self.fields["creator"])
self.fields["reporter"] = User().fromDict(self.fields["reporter"])
return self
def toDict(self):
result = super(Issue, self).toDict()
blankKeys = []
for fieldName, fieldValue in self.fields.iteritems():
if fieldValue is None:
blankKeys.append(fieldName)
if blankKeys:
for key in blankKeys:
self.fields.pop(key, None)
result["fields"]["issuetype"] = self.fieldToDict(result["fields"], "issuetype", IssueType)
result["fields"]["creator"] = self.fieldToDict(result["fields"], "creator", User)
result["fields"]["reporter"] = self.fieldToDict(result["fields"], "reporter", User)
result["fields"]["assignee"] = self.fieldToDict(result["fields"], "assignee", User)
return result
class IssueType(Serializable):
def __init__(self):
self.id = 0
self.name = ""
def toDict(self):
return {"id": str(self.id)}
class Project(Serializable):
def __init__(self):
Serializable.__init__(self)
self.id = 0
self.name = ""
self.key = ""
class Cycle(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.totalExecutions = 0
self.endDate = ""
self.description = ""
self.totalExecuted = 0
self.started = ""
self.versionName = ""
self.projectKey = ""
self.versionId = 0
self.environment = ""
self.totalCycleExecutions = 0
self.build = ""
self.ended = ""
self.name = ""
self.modifiedBy = ""
self.projectId = 0
self.startDate = ""
self.executionSummaries = {'executionSummary': []}
class Step(Serializable):
def __init__(self):
self.id = ""
self.orderId = 0
self.step = ""
self.data = ""
self.result = ""
self.attachmentsMap = {}
def toDict(self):
dict = {}
dict["step"] = self.step
dict["data"] = self.data
dict["result"] = self.result
dict["attachments"] = []
return dict
class Status(Serializable):
def __init__(self):
self.id = 0
self.name = ""
self.description = ""
self.isFinal = True
self.color = ""
self.isNative = True
self.statusCount = 0
self.statusPercent = 0.0
class User(Serializable):
def __init__(self):
self.displayName = ""
self.name = ""
self.emailAddress = ""
self.key = ""
self.active = False
self.timeZone = ""
class Execution(Serializable):
def __init__(self):
self.id = 0
self.orderId = 0
self.cycleId = -1
self.cycleName = ""
self.issueId = 0
self.issueKey = 0
self.projectKey = ""
self.comment = ""
self.versionId = 0,
self.versionName = "",
self.executedOn = ""
self.creationDate = ""
self.executedByUserName = ""
self.assigneeUserName = ""
self.status = {}
self.executionStatus = ""
def fromDict(self, jsonDict):
super(Execution, self).fromDict(jsonDict)
self.status = Status().fromDict(self.status)
# This is already listed as Execution Status, need to associate and convert!
return self
def toDict(self):
result = super(Execution, self).toDict()
result['status'] = result['status'].toDict()
return result
class ExecutionContainer(Serializable):
def __init__(self):
self.executions = []
def fromDict(self, jsonDict):
super(ExecutionContainer, self).fromDict(jsonDict)
self.executions = []
for executionDict in jsonDict["executions"]:
self.executions.append(Execution().fromDict(executionDict))
return self
class Test(Issue):
def __init__(self, initDict={}):
if initDict:
self.__dict__.update(initDict)
Issue.__init__(self)
def toDict(self):
result = super(Test, self).toDict()
stepField = "CustomField_0001"
if result["fields"][stepField]:
steps = []
for step in result["fields"][stepField]["steps"]:
steps.append(step.toDict())
result["fields"][stepField] = steps
return result
def fromDict(self, jsonDict):
super(Test, self).fromDict(jsonDict)
stepField = "CustomField_0001"
steps = []
if stepField in self.fields:
for step in self.fields[stepField]["steps"]:
steps.append(Step().fromDict(step))
self.fields[stepField] = {"steps": steps}
return self
class Set(Issue):
def __init__(self, initDict={}):
self.__dict__.update(initDict)
Issue.__init__(self)
class DataCache(object):
_hash = ""
_data = None
@staticmethod
def genHash(data):
dataDict = DataCache.dictify(data)
datahash = json.dumps(dataDict, sort_keys=True)
return hashlib.sha256(datahash).digest()
@staticmethod
def dictify(data):
if isinstance(data, list):
datahash = []
for item in data:
datahash.append(DataCache.dictify(item))
elif isinstance(data, (dict, collections.OrderedDict)):
datahash = collections.OrderedDict()
for key, value in datahash.iteritems():
datahash[key] = DataCache.dictify(value)
elif isinstance(data, Serializable):
datahash = data.toDict()
else:
datahash = data
return datahash
def __init__(self, restoreDict={}):
if restoreDict:
self.__dict__.update(restoreDict)
def __getinitargs__(self):
return (self.__dict__)
def set(self, data):
self._hash = DataCache.genHash(data)
self._data = data
def verify(self):
dataHash = DataCache.genHash(self._data)
return (self._hash == dataHash)
def get(self):
return self._data
def saveCache(name, projectKey, object):
filePath = "migration_caches/{projectKey}".format(projectKey=projectKey)
if not os.path.exists(path=filePath):
os.makedirs(filePath)
cache = DataCache()
cache.set(object)
targetFile = open("{path}/{name}".format(name=name, path=filePath), 'wb')
dill.dump(obj=cache, file=targetFile)
targetFile.close()
def loadCache(name, projectKey):
filePath = "migration_caches/{projectKey}/{name}".format(name=name, projectKey=projectKey)
result = False
try:
targetFile = open(filePath, 'rb')
try:
cache = dill.load(targetFile)
if isinstance(cache, DataCache):
if cache.verify():
result = cache.get()
except EOFError:
# except BaseException:
print ("Failed to load cache from file: {filePath}\n".format(filePath=filePath))
except IOError:
("Failed to load cache file at: {filePath}\n".format(filePath=filePath))
targetFile.close()
return result
testIssue = Test().fromDict({"id": 1000,
"key": "TEST",
"fields": {
"issuetype": {
"id": 1,
"name": "TestIssue"
},
"assignee": "Minothor",
"reporter": "Minothor",
"creator": "Minothor",
}
})
saveCache("Test", "TestProj", testIssue)
result = loadCache("Test", "TestProj")
EDIT 2
The script in it's current form, now seems to work correctly with vanilla Pickle, (initially switched to Dill due to a similar issue, which was solved by the switch).
However, if you are here with this issue and require Dill's features, then as Mike noted in the comments - it's possible to change the settings in dill.settings
to have Dill behave pickle referenced items only with joblib
mode, effectively mirroring pickle's standard pickling behaviour.