10

Given a dataclass instance, I would like print() or str() to only list the non-default field values. This is useful when the dataclass has many fields and only a few are changed.

@dataclasses.dataclass
class X:
  a: int = 1
  b: bool = False
  c: float = 2.0

x = X(b=True)
print(x)  # Desired output: X(b=True)
Hugues
  • 2,865
  • 1
  • 27
  • 39
  • Just out of curiosity, what do you want to see with `x = X(b=True, a=1)`? You have explicitly set it to the default value there. In other words, do you just want to see the fields that are not the default or do you want to see the fields that were set? – paxdiablo Mar 04 '22 at 04:42
  • It might be useful to see all the fields that were set explicitly. However, it doesn't seem possible to detect fields that were set explicitly to their default values -- unless `__init__` were modified to record this? – Hugues Mar 04 '22 at 17:19

2 Answers2

9

The solution is to add a custom __str__() function:

@dataclasses.dataclass
class X:
  a: int = 1
  b: bool = False
  c: float = 2.0

  def __str__(self):
    """Returns a string containing only the non-default field values."""
    s = ', '.join(f'{field.name}={getattr(self, field.name)!r}'
                  for field in dataclasses.fields(self)
                  if getattr(self, field.name) != field.default)
    return f'{type(self).__name__}({s})'

x = X(b=True)
print(x)        # X(b=True)
print(str(x))   # X(b=True)
print(repr(x))  # X(a=1, b=True, c=2.0)
print(f'{x}, {x!s}, {x!r}')  # X(b=True), X(b=True), X(a=1, b=True, c=2.0)

This can also be achieved using a decorator:

def terse_str(cls):  # Decorator for class.
  def __str__(self):
    """Returns a string containing only the non-default field values."""
    s = ', '.join(f'{field.name}={getattr(self, field.name)}'
                  for field in dataclasses.fields(self)
                  if getattr(self, field.name) != field.default)
    return f'{type(self).__name__}({s})'

  setattr(cls, '__str__', __str__)
  return cls

@dataclasses.dataclass
@terse_str
class X:
  a: int = 1
  b: bool = False
  c: float = 2.0
Hugues
  • 2,865
  • 1
  • 27
  • 39
2

One improvement I would suggest is to compute the result from dataclasses.fields and then cache the default values from the result. This will help performance because currently dataclasses evaluates the fields each time it is invoked.

Here's a simple example using a metaclass approach.

Note that I've also modified it slightly so it handles mutable-type fields that define a default_factory for instance.

from __future__ import annotations
import dataclasses


# adapted from `dataclasses` module
def _create_fn(name, args, body, *, globals=None, locals=None):
    if locals is None:
        locals = {}
    args = ','.join(args)
    body = '\n'.join(f'  {b}' for b in body)
    # Compute the text of the entire function.
    txt = f' def {name}({args}):\n{body}'
    local_vars = ', '.join(locals.keys())
    txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}"
    ns = {}
    exec(txt, globals, ns)
    return ns['__create_fn__'](**locals)


def terse_str(cls_name, bases, cls_dict):  # Metaclass for class

    def __str__(self):
        cls_fields: tuple[dataclasses.Field, ...] = dataclasses.fields(self)
        _locals = {}
        _body_lines = ['lines=[]']
        for f in cls_fields:
            name = f.name
            dflt_name = f'_dflt_{name}'
            dflt_factory = f.default_factory
            if dflt_factory is not dataclasses.MISSING:
                _locals[dflt_name] = dflt_factory()
            else:
                _locals[dflt_name] = f.default
            _body_lines.append(f'value=self.{name}')
            _body_lines.append(f'if value != _dflt_{name}:')
            _body_lines.append(f' lines.append(f"{name}={{value!r}}")')
        _body_lines.append(f'return f\'{cls_name}({{", ".join(lines)}})\'')
        # noinspection PyShadowingNames
        __str__ = _create_fn('__str__', ('self', ), _body_lines, locals=_locals)
        # set the __str__ with the cached `dataclass.fields`
        setattr(type(self), '__str__', __str__)
        # on initial run, compute and return __str__()
        return __str__(self)

    cls_dict['__str__'] = __str__
    return type(cls_name, bases, cls_dict)


@dataclasses.dataclass
class X(metaclass=terse_str):
    a: int = 1
    b: bool = False
    c: float = 2.0
    d: list[int] = dataclasses.field(default_factory=lambda: [1, 2, 3])


x1 = X(b=True)
x2 = X(b=False, c=3, d=[1, 2])

print(x1)    # X(b=True)
print(x2)    # X(c=3, d=[1, 2])

Finally, here's a quick and dirty test to confirm that caching is actually beneficial for repeated calls to str() or print:

import dataclasses
from timeit import timeit


def terse_str(cls):  # Decorator for class.
    def __str__(self):
        """Returns a string containing only the non-default field values."""
        s = ', '.join(f'{field.name}={getattr(self, field.name)}'
                      for field in dataclasses.fields(self)
                      if getattr(self, field.name) != field.default)
        return f'{type(self).__name__}({s})'

    setattr(cls, '__str__', __str__)
    return cls


# adapted from `dataclasses` module
def _create_fn(name, args, body, *, globals=None, locals=None):
    if locals is None:
        locals = {}
    args = ','.join(args)
    body = '\n'.join(f'  {b}' for b in body)
    # Compute the text of the entire function.
    txt = f' def {name}({args}):\n{body}'
    local_vars = ', '.join(locals.keys())
    txt = f"def __create_fn__({local_vars}):\n{txt}\n return {name}"
    ns = {}
    exec(txt, globals, ns)
    return ns['__create_fn__'](**locals)


def terse_str_meta(cls_name, bases, cls_dict):  # Metaclass for class

    def __str__(self):
        cls_fields: tuple[dataclasses.Field, ...] = dataclasses.fields(self)
        _locals = {}
        _body_lines = ['lines=[]']
        for f in cls_fields:
            name = f.name
            dflt_name = f'_dflt_{name}'
            dflt_factory = f.default_factory
            if dflt_factory is not dataclasses.MISSING:
                _locals[dflt_name] = dflt_factory()
            else:
                _locals[dflt_name] = f.default
            _body_lines.append(f'value=self.{name}')
            _body_lines.append(f'if value != _dflt_{name}:')
            _body_lines.append(f' lines.append(f"{name}={{value!r}}")')
        _body_lines.append(f'return f\'{cls_name}({{", ".join(lines)}})\'')
        # noinspection PyShadowingNames
        __str__ = _create_fn('__str__', ('self', ), _body_lines, locals=_locals)
        # set the __str__ with the cached `dataclass.fields`
        setattr(type(self), '__str__', __str__)
        # on initial run, compute and return __str__()
        return __str__(self)

    cls_dict['__str__'] = __str__
    return type(cls_name, bases, cls_dict)


@dataclasses.dataclass
@terse_str
class X:
    a: int = 1
    b: bool = False
    c: float = 2.0


@dataclasses.dataclass
class X_Cached(metaclass=terse_str_meta):
    a: int = 1
    b: bool = False
    c: float = 2.0


print(f"Simple:  {timeit('str(X(b=True))', globals=globals()):.3f}")
print(f"Cached:  {timeit('str(X_Cached(b=True))', globals=globals()):.3f}")

print()
print(X(b=True))
print(X_Cached(b=True))

Results:

Simple:  1.038
Cached:  0.289
rv.kvetch
  • 9,940
  • 3
  • 24
  • 53
  • 1
    Very nice handling of `default_factory` and useful caching of the field information for speedup. The `!r` formatting of the field value is nice -- I added that to the first answer. I had not seen the `metaclass` feature before; it is powerful; however it may not be as composable as a decorator. – Hugues Mar 04 '22 at 18:33
  • Yep, agreed, metaclasses are quite a nice feature. I've generally turned to use them, since Pycharm for the longest time hasn't had good support such as retaining autocompletion when using decorators. It looks like in a recent update they've managed to fix that though. That said, moving forward I'd probably suggest decorators as I agree it overall looks a little neater than with a metaclass approach. – rv.kvetch Mar 04 '22 at 20:41