4

I want to store metadata for my ML models in pydantic. Is there a proper way to access a fields type? I know you can do BaseModel.__fields__['my_field'].type_ but I assume there's a better way.

I want to make it so that if a BaseModel fails to instantiate it is very clear what data is required to create this missing fields and which methods to use. Something like this :

from pydantic import BaseModel
import pandas as pd

# basic model
class Metadata(BaseModel):
    peaks_per_day: float


class PeaksPerDayType(float):
    data_required = pd.Timedelta("180D")
    data_type = "foo"

    @classmethod
    def determine(cls, data):
        return cls(data)

# use our custom float
class Metadata(BaseModel):
    peaks_per_day: PeaksPerDayType

def get_data(data_type, required_data):
    # get enough of the appropriate data type
    return [1]


# Initial data we have
metadata_json = {}
try:
    metadata = Metadata(**metadata_json)
    # peaks per day is missing
except Exception as e:
    error_msg = e

missing_fields = error_msg.errors()
missing_fields = [missing_field['loc'][0] for missing_field in missing_fields]

# For each missing field use its type hint to find what data is required to 
# determine it and access the method to determine the value

new_data = {}
for missing_field in missing_fields:
    req_data = Metadata[missing_field].data_required
    data_type = Metadata[missing_field].data_type
    data = get_data(data_type=data_type, required_data=req_data)

    new_data[missing_field] = Metadata[missing_field].determine(data)

metadata = Metadata(**metadata_json, **new_data)
this_josh
  • 333
  • 2
  • 11

1 Answers1

5

In the case you dont need to handle nested classes, this should work

from pydantic import BaseModel, ValidationError

import typing

class PeaksPerDayType(float):
    data_required = 123.22
    data_type = "foo"

    @classmethod
    def determine(cls, data):
        return cls(data)

# use our custom float
class Metadata(BaseModel):
    peaks_per_day: PeaksPerDayType

def get_data(data_type, required_data):
    # get enough of the appropriate data type
    return required_data

metadata_json = {}
try:
    Metadata(**metadata_json)
except ValidationError as e:
    field_to_type = typing.get_type_hints(Metadata)
    missing_fields = []
    for error in e.errors():
        if error['type']=='value_error.missing':
            missing_fields.append(error['loc'][0])
        else:
            raise

    new_data = {}
    for field in missing_fields:
        type_ = field_to_type[field]
        new_data[field] = get_data(type_.data_type, type_.data_required)

    print(Metadata(**metadata_json, **new_data))

peaks_per_day=123.22

Im not really sure whats the point of data_type or get_data, but I assume its some internal logic that you want to add

Ron Serruya
  • 3,988
  • 1
  • 16
  • 26
  • Thank you that works exactly as expected and uses a lot less internal stuff than my solution. You're assumption is correct. – this_josh Aug 31 '21 at 09:22
  • 1
    The key part that answers the title question is: `typing.get_type_hints(Metadata)` where `Metadata` is your Pydantic model. – supermodo Jan 21 '22 at 08:41