14
public interface ICsvProductReaderConfigurationFactory
{
    Configuration Build();
}

public class CsvProductReaderConfigurationFactory : ICsvProductReaderConfigurationFactory
{
    private readonly ClassMap<ProductDto> classMap;

    public CsvProductReaderConfigurationFactory(IProductDtoClassMapProvider classMapProvider)
    {
        classMap = classMapProvider.Get();
    }

    public Configuration Build()
    {
        var config = new Configuration
        {
            Delimiter = "\t",
            HasHeaderRecord = true,
            IgnoreQuotes = true,
            MissingFieldFound = (rows, fieldIndex, readingContext) =>
                Log.Warn($"Missing Field Found at line {readingContext.Row}\r\n" +
                         $"Field at index {fieldIndex} does not exist\r\n" +
                         $"Raw record: {readingContext.RawRecord}"),
            BadDataFound = context => 
                Log.Warn($"Bad data found at row {context.Row}\r\n" +
                         $"Raw data: {context.RawRecord}")
        };

        config.RegisterClassMap(classMap);
        return config;
    }
}


public interface ICvsProductReader
{
    IEnumerable<ProductDto> GetAll(string filePath);
}

public class CvsProductReader : ICvsProductReader
{
    private readonly ICsvProductReaderConfigurationFactory csvProductReaderConfigurationFactory;

    public CvsProductReader(ICsvProductReaderConfigurationFactory csvProductReaderConfigurationFactory)
    {
        this.csvProductReaderConfigurationFactory = csvProductReaderConfigurationFactory;
    }

    public IEnumerable<ProductDto> GetAll(string filePath)
    {
        var csvReaderConfiguration = csvProductReaderConfigurationFactory.Build();

        using (var streamReader = new StreamReader(filePath))
        using (var csvReader = new CsvReader(streamReader, csvReaderConfiguration))
        {
            return csvReader.GetRecords<ProductDto>().ToArray();
        }
    }
}

MissingFieldFound property is called when a missing field is found, but can not affect result.

I was wondering if it's possible to configure CsvHelper to skip rows with missing fields.

Drag and Drop
  • 2,672
  • 3
  • 25
  • 37
Makrushin Evgenii
  • 953
  • 2
  • 9
  • 20

5 Answers5

13

There is nothing wrong the way you did it, here is a mcve showing a complete exemple

var good = new List<Test>();
var bad = new List<string>();

using (var stream = new MemoryStream())
using (var writer = new StreamWriter(stream))
using (var reader = new StreamReader(stream))
using (var csv = new CsvReader(reader))
{
    writer.WriteLine("FirstName,LastName");
    writer.WriteLine("\"Jon\"hn\"\",\"Doe\"");
    writer.WriteLine("\"JaneDoe\"");
    writer.WriteLine("\"Jane\",\"Doe\"");
    writer.Flush();
    stream.Position = 0;

    var isRecordBad = false;

    csv.Configuration.BadDataFound = context =>
    {
        isRecordBad = true;
        bad.Add(context.RawRecord);
    };

    csv.Configuration.MissingFieldFound = (headerNames, index, context) =>
    {
        isRecordBad = true;
        bad.Add(context.RawRecord);
    };

    while (csv.Read())
    {
        var record = csv.GetRecord<Test>();
        if (!isRecordBad)
        {
            good.Add(record);
        }

        isRecordBad = false;
    }
}

good.Dump();
bad.Dump();
Drag and Drop
  • 2,672
  • 3
  • 25
  • 37
  • Your solution is working well in my case. But I do not see a way to copy your solution in in the form not violates the SRP. – Makrushin Evgenii Oct 01 '18 at 11:42
  • 1
    @BanyRule It seems really hard to make it SOLID, sorry for the regression . `ShouldSkipRecord` could have Something that make it skip missing or bad data. For now `GetRecords` does not support this. You can make it a feature request : https://github.com/JoshClose/CsvHelper/issues. – Drag and Drop Oct 01 '18 at 12:05
  • This seems more like a workaround for when you read rows one by one yourself. For example in a case of using `CsvDataReader` and `SqlBulkCopy` you just provide reader instance to `SqlBulkCopy` (as it implements `IDataReader`) and don't handle reading yourself. – Laurynas Lazauskas Sep 22 '20 at 07:59
  • @LaurynasLazauskas, Sorry it's been years. I may need a little more context to understand. But you can use GetRecord**s** (with an S) and set `Configuration.BadDataFound` and `MissingFieldFound ` to null to have the default handling with no manual `csv.Read()`. There is no reason that missing fields row should be ignore, `Configuration.MissingFieldFound` allow you to set more complexe rules where you check what field is missing in order to know the approriate way you will handle: throws, insert in an other collection, err logging, etc – Drag and Drop Sep 22 '20 at 08:39
  • I am using `CsvHelper.CsvDataReader`. There is no `GetRecords` or `GetRecord`. This object is then passed down to `System.Data.SqlClient.SqlBulkCopy.WriteToServerAsync`. When there is a case of a missing field it still get read as a row with default values and fails. In this scenario I would like to be able to configure CsvHelper to skip rows in `MissingFieldFound`. But I guess this should rather be expressed in GitHub issues than here. – Laurynas Lazauskas Sep 22 '20 at 09:11
  • If you can formulate an [MRE] with a simple CSV, it's a valid question here too. But you should do both, both site use the same mark down so copy past and ad a link from gitbug to the respective SO question. I will advocate for SO as main question as it's rules allow better question that most of gitbug brain fart. – Drag and Drop Sep 22 '20 at 09:26
7

With the new version of csvhelper which I have (24.0.1) the below code will work to set MissingFieldFound

Dim textReader As TextReader = File.OpenText(filename)

Dim config = New CsvHelper.Configuration.CsvConfiguration(System.Globalization.CultureInfo.InvariantCulture)

        config.Delimiter = ","
        config.MissingFieldFound = Nothing
        config.TrimOptions = True
        config.HeaderValidated = Nothing

Dim csv = New CsvReader(textReader, config)
Abumoosa
  • 71
  • 1
  • 2
6

You can also configure missing fields using attribute

[Optional]

Public string Field {get; set;}

or

[Ignore]

public int Filed {get; set;}

Jay
  • 147
  • 2
  • 4
3

Another way to skip MissingFieldFound rows is by using ShouldSkipRecord and comparing header record length to row record length.

// Load header record if you haven't already (CsvDataReader loads it automatically).
csv.Read();
csv.ReadHeader();

// Then do this.
var expectedRecordLength = csv.Context.HeaderRecord.Length;
csv.Configuration.ShouldSkipRecord = rowRecord => rowRecord.Length != expectedRecordLength;

This is particularly useful if you do not control the reading code (ex. when using CsvDataReader with SqlBulkCopy).

Laurynas Lazauskas
  • 2,855
  • 2
  • 21
  • 26
0

Abumoosa's answer works for me except that was in VB. Here's the equivalent in csharp:

var config = new CsvHelper.Configuration.CsvConfiguration(System.Globalization.CultureInfo.InvariantCulture);    
config.MissingFieldFound = null;

using (var reader = new StreamReader("taskList.csv"))
using (var csv = new CsvReader(reader, config)) {
    csv.Read();
    csv.ReadHeader();
    while (csv.Read()){
       //get the record
    }
bobt
  • 411
  • 3
  • 8