1

Our applications use SLF4J's MDC and Logback's JSON encoder to write log lines as JSON. These lines are then processed by a log shipping pipeline and written to ElasticSearch as documents. They have logback.xml file that looks like this:

<configuration>
  <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
    <encoder class="net.logstash.logback.encoder.LogstashEncoder">
      <provider class="net.logstash.logback.composite.loggingevent.ArgumentsJsonProvider"/>
    </encoder>
  </appender>

  <root level="INFO">
    <appender-ref ref="STDOUT" />
  </root>
</configuration>

And the MDC is used like this:

MDC.put("stringVal", "val");
MDC.put("decimalVal", "1.23");
MDC.put("longVal", "123498559348792879267942876");
logger.info("foo");

The problem is that the interface for the MDC is void put(String key, String val), such that all values must be strings or autoboxable to strings. This, then, produces the following log:

{"@timestamp":"2021-08-23T16:32:04.231+01:00","@version":1,"message":"foo","logger_name":"MDCTest","thread_name":"main","level":"INFO","level_value":20000,"stringVal":"val","decimalVal":"1.23","longVal":"123498559348792879267942876"}

The string typed decimalVal and longVal then get picked up as string-typed in Elasticsearch using an automatic type mapping and we then cannot perform numeric operations on them.

In this case these numeric values are largely coming from the abuse of logs to ship metrics, but this conflation is being dealt with elsewhere.

I would prefer to not force developers to have to update the Elasticsearch Index Template or write configuration for the log shipping pipeline when they add more logs and so I have been looking for a way of doing this automatically. I have produced an implementation that works by replacing the internals of the Logback Encoder and Formatter in order to switch the MdcJsonProvider, the class that's responsible for serialising the MDC to JSON. This feels both extremely brittle and non-performant.

Is there a more elegant method of doing this, or a different way to get the same effect? I have looked at ch.qos.logback.contrib.jackson.JacksonJsonFormatter but I would still need to list numeric MDC properties in the logback file which I'm trying to avoid.

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;

import com.fasterxml.jackson.core.JsonGenerator;
import com.google.common.collect.Maps;

import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.spi.ContextAware;
import net.logstash.logback.LogstashFormatter;
import net.logstash.logback.composite.CompositeJsonFormatter;
import net.logstash.logback.composite.JsonProvider;
import net.logstash.logback.composite.JsonWritingUtils;
import net.logstash.logback.composite.loggingevent.MdcJsonProvider;
import net.logstash.logback.encoder.LogstashEncoder;

/**
 * This class is a hack.
 *
 * It exists because Logback's MDC interface is a map from String to String, and this cannot
 * be changed. Instead, we must modify the logger itself to check if any of the String values
 * appear numeric and then log them as such.
 *
 * This is necessary since we ship the documents directly to ElasticSearch. As such, be warned
 * that you may have mapping conflict issues if the type for any given key fluctuates or if you
 * occasionally write strings that are fully numeric.
 *
 * This class implements an Encoder, a Formatter, and a JsonProvider that work together. The
 * encoder specifies the formatter, and the formatter works by swapping out the MdcJsonProvider
 * implementation normally used to serialise the MDC values with another implementation with tries
 * to log them as numbers.
 *
 * Using this class has a cost. It will result in more short-lived object creation, so it is not
 * suitable for high frequency logging.
 */
public class JsonMdcTypePreservingEncoder extends LogstashEncoder {
    protected CompositeJsonFormatter<ILoggingEvent> createFormatter() {
        return new JsonMdcTypePreservingFormatter(this);
    }

    protected JsonMdcTypePreservingFormatter getFormatter() {
        return (JsonMdcTypePreservingFormatter) super.getFormatter();
    }

    /**
     * This class exists to remove the default MdcJsonProvider, responsible for logging the MDC
     * section as JSON, and swap it out for the custom one.
     */
    public static class JsonMdcTypePreservingFormatter extends LogstashFormatter {
        public JsonMdcTypePreservingFormatter(ContextAware declaredOrigin) {
            super(declaredOrigin);

            Optional<JsonProvider<ILoggingEvent>> oldProvider = getProviders().getProviders()
                    .stream()
                    .filter(o -> o.getClass() == MdcJsonProvider.class)
                    .findFirst();

            if (oldProvider.isPresent()) {
                getProviders().removeProvider(oldProvider.get());
                getProviders().addProvider(new TypePreservingMdcJsonProvider());
            }
        }

        /**
         * This class contains a duplicate of MdcJsonProvider.writeTo but with a small change.
         * Instead of taking the MDC Map<String, String> and logging it, it produces a modified
         * Map<String, Object> that potentially contains BigDecimal or BigInteger types alongside
         * Strings and serialises those instead.
         *
         * The only new code in this method is the call to getStringObjectMap.
         */
        public static class TypePreservingMdcJsonProvider extends MdcJsonProvider {
            private Map<String, Object> convertedProperties = Maps.newHashMap();

            @Override
            public void writeTo(JsonGenerator generator, ILoggingEvent event) throws IOException {
                Map<String, String> mdcProperties = event.getMDCPropertyMap();

                if (mdcProperties != null && !mdcProperties.isEmpty()) {
                    if (getFieldName() != null) {
                        generator.writeObjectFieldStart(getFieldName());
                    }

                    if (!getIncludeMdcKeyNames().isEmpty()) {
                        mdcProperties = new HashMap(mdcProperties);
                        ((Map) mdcProperties).keySet().retainAll(getIncludeMdcKeyNames());
                    }

                    if (!getExcludeMdcKeyNames().isEmpty()) {
                        mdcProperties = new HashMap(mdcProperties);
                        ((Map) mdcProperties).keySet().removeAll(getExcludeMdcKeyNames());
                    }

                    Map<String, Object> convertedProperties = getStringObjectMap(mdcProperties);

                    JsonWritingUtils.writeMapEntries(generator, convertedProperties);
                    if (getFieldName() != null) {
                        generator.writeEndObject();
                    }
                }
            }

            private Map<String, Object> getStringObjectMap(Map<String, String> mdcProperties) {
                convertedProperties.clear();

                for (String key : mdcProperties.keySet()) {
                    String value = mdcProperties.get(key);

                    // If the majority of MDC values are not numbers, this prevents unnecessary
                    // parsing steps but adds a character-wise inspection per value.
                    try {
                        BigInteger parsed = new BigInteger(value);
                        convertedProperties.put(key, parsed);
                    } catch (NumberFormatException e) {
                        try {
                            BigDecimal parsed = new BigDecimal(vlue);
                            convertedProperties.put(key, parsed);
                        } catch (NumberFormatException f) {
                            // No-op
                        }
                    }

                    if (!convertedProperties.containsKey(key)) {
                        convertedProperties.put(key, value);
                    }
                }
                return convertedProperties;
            }
        }
    }
}

Thanks for any advice!

Martin Foot
  • 3,594
  • 3
  • 28
  • 35

0 Answers0