20

So we have had a heated debate at work as to which DataAccess route to take: DataTable or DataReader.

DISCLAIMER I am on the DataReader side and these results have shaken my world.

We ended up writing some benchmarks to test the speed differences. It was generally agreed that a DataReader is faster, but we wanted to see how much faster.

The results surprised us. The DataTable was consistently faster than the DataReader. Approaching twice as fast sometimes.

So I turn to you, members of SO. Why, when most of the documentation and even Microsoft, state that a DataReader is faster are our test showing otherwise.

And now for the code:

The test harness:

    private void button1_Click(object sender, EventArgs e)
    {
        System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch();
        sw.Start();

        DateTime date = DateTime.Parse("01/01/1900");

        for (int i = 1; i < 1000; i++)
        {

            using (DataTable aDataTable = ArtifactBusinessModel.BusinessLogic.ArtifactBL.RetrieveDTModified(date))
            {
            }
        }
        sw.Stop();
        long dataTableTotalSeconds = sw.ElapsedMilliseconds;

        sw.Restart();


        for (int i = 1; i < 1000; i++)
        {
            List<ArtifactBusinessModel.Entities.ArtifactString> aList = ArtifactBusinessModel.BusinessLogic.ArtifactBL.RetrieveModified(date);

        }

        sw.Stop();

        long listTotalSeconds = sw.ElapsedMilliseconds;

        MessageBox.Show(String.Format("list:{0}, table:{1}", listTotalSeconds, dataTableTotalSeconds));
    }

This is the DAL for the DataReader:

        internal static List<ArtifactString> RetrieveByModifiedDate(DateTime modifiedLast)
        {
            List<ArtifactString> artifactList = new List<ArtifactString>();

            try
            {
                using (SqlConnection conn = SecuredResource.GetSqlConnection("Artifacts"))
                {
                    using (SqlCommand command = new SqlCommand("[cache].[Artifacts_SEL_ByModifiedDate]", conn))
                    {
                        command.CommandType = CommandType.StoredProcedure;
                        command.Parameters.Add(new SqlParameter("@LastModifiedDate", modifiedLast));
                        using (SqlDataReader reader = command.ExecuteReader())
                        {
                            int formNumberOrdinal = reader.GetOrdinal("FormNumber");
                            int formOwnerOrdinal = reader.GetOrdinal("FormOwner");
                            int descriptionOrdinal = reader.GetOrdinal("Description");
                            int descriptionLongOrdinal = reader.GetOrdinal("DescriptionLong");
                            int thumbnailURLOrdinal = reader.GetOrdinal("ThumbnailURL");
                            int onlineSampleURLOrdinal = reader.GetOrdinal("OnlineSampleURL");
                            int lastModifiedMetaDataOrdinal = reader.GetOrdinal("LastModifiedMetaData");
                            int lastModifiedArtifactFileOrdinal = reader.GetOrdinal("LastModifiedArtifactFile");
                            int lastModifiedThumbnailOrdinal = reader.GetOrdinal("LastModifiedThumbnail");
                            int effectiveDateOrdinal = reader.GetOrdinal("EffectiveDate");
                            int viewabilityOrdinal = reader.GetOrdinal("Viewability");
                            int formTypeOrdinal = reader.GetOrdinal("FormType");
                            int inventoryTypeOrdinal = reader.GetOrdinal("InventoryType");
                            int createDateOrdinal = reader.GetOrdinal("CreateDate");

                            while (reader.Read())
                            {
                                ArtifactString artifact = new ArtifactString();
                                ArtifactDAL.Map(formNumberOrdinal, formOwnerOrdinal, descriptionOrdinal, descriptionLongOrdinal, formTypeOrdinal, inventoryTypeOrdinal, createDateOrdinal, thumbnailURLOrdinal, onlineSampleURLOrdinal, lastModifiedMetaDataOrdinal, lastModifiedArtifactFileOrdinal, lastModifiedThumbnailOrdinal, effectiveDateOrdinal, viewabilityOrdinal, reader, artifact);
                                artifactList.Add(artifact);
                            }
                        }
                    }
                }
            }
            catch (ApplicationException)
            {
                throw;
            }
            catch (Exception e)
            {
                string errMsg = String.Format("Error in ArtifactDAL.RetrieveByModifiedDate. Date: {0}", modifiedLast);
                Logging.Log(Severity.Error, errMsg, e);
                throw new ApplicationException(errMsg, e);
            }

            return artifactList;
        }
    internal static void Map(int? formNumberOrdinal, int? formOwnerOrdinal, int? descriptionOrdinal, int? descriptionLongOrdinal, int? formTypeOrdinal, int? inventoryTypeOrdinal, int? createDateOrdinal,
        int? thumbnailURLOrdinal, int? onlineSampleURLOrdinal, int? lastModifiedMetaDataOrdinal, int? lastModifiedArtifactFileOrdinal, int? lastModifiedThumbnailOrdinal,
        int? effectiveDateOrdinal, int? viewabilityOrdinal, IDataReader dr, ArtifactString entity)
    {

            entity.FormNumber = dr[formNumberOrdinal.Value].ToString();
            entity.FormOwner = dr[formOwnerOrdinal.Value].ToString();
            entity.Description = dr[descriptionOrdinal.Value].ToString();
            entity.DescriptionLong = dr[descriptionLongOrdinal.Value].ToString();
            entity.FormType = dr[formTypeOrdinal.Value].ToString();
            entity.InventoryType = dr[inventoryTypeOrdinal.Value].ToString();
            entity.CreateDate = DateTime.Parse(dr[createDateOrdinal.Value].ToString());
            entity.ThumbnailURL = dr[thumbnailURLOrdinal.Value].ToString();
            entity.OnlineSampleURL = dr[onlineSampleURLOrdinal.Value].ToString();
            entity.LastModifiedMetaData = dr[lastModifiedMetaDataOrdinal.Value].ToString();
            entity.LastModifiedArtifactFile = dr[lastModifiedArtifactFileOrdinal.Value].ToString();
            entity.LastModifiedThumbnail = dr[lastModifiedThumbnailOrdinal.Value].ToString();
            entity.EffectiveDate = dr[effectiveDateOrdinal.Value].ToString();
            entity.Viewability = dr[viewabilityOrdinal.Value].ToString();
    }

This is the DAL for the DataTable:

        internal static DataTable RetrieveDTByModifiedDate(DateTime modifiedLast)
        {
            DataTable dt= new DataTable("Artifacts");

            try
            {
                using (SqlConnection conn = SecuredResource.GetSqlConnection("Artifacts"))
                {
                    using (SqlCommand command = new SqlCommand("[cache].[Artifacts_SEL_ByModifiedDate]", conn))
                    {
                        command.CommandType = CommandType.StoredProcedure;
                        command.Parameters.Add(new SqlParameter("@LastModifiedDate", modifiedLast));

                        using (SqlDataAdapter da = new SqlDataAdapter(command))
                        {
                            da.Fill(dt);
                        }
                    }
                }
            }
            catch (ApplicationException)
            {
                throw;
            }
            catch (Exception e)
            {
                string errMsg = String.Format("Error in ArtifactDAL.RetrieveByModifiedDate. Date: {0}", modifiedLast);
                Logging.Log(Severity.Error, errMsg, e);
                throw new ApplicationException(errMsg, e);
            }

            return dt;
        }

The results:

For 10 iterations within the Test Harness

For 10 iterations within the test harness

For 1000 iterations within the Test Harness

enter image description here

These results are the second run, to mitigate the differences due to creating the connection.

Shai Cohen
  • 6,074
  • 4
  • 31
  • 54
  • 3
    The end results are different. One gives you a DataTable and one gives you a List. For all I know DataTable stored everything unparsed and will parse it as you read (I actually have no clue how DataTables store their data internally, I always suspected it was XML-ish) I do know that DataTables waste memory where Readers don't. – MatthewMartin Nov 30 '12 at 17:54
  • Did you run the comparison inside or outside a debugger? A debugger will often slow down your own code in ways that it doesn't for already compiled framework code, even in release mode. – Bryce Wagner Jun 20 '16 at 13:35
  • The other thing I'm really surprised nobody has mentioned is that DataTable loads an entire row at a time, using GetValues(object[]), whereas your code is loading each field separately. There's actually some overhead with each individual call, and it's possible that overhead is large enough to make DataTable load faster. – Bryce Wagner Jun 20 '16 at 13:37

4 Answers4

27

I see three issues:

  1. the way you use a DataReader negates it's big single-item-in-memory advantage by converting it to list,
  2. you're running the benchmark in an environment that differs significantly from production in a way that favors the DataTable, and
  3. you're spending time converting DataReader record to Artifact objects that is not duplicated in the DataTable code.

The main advantage of a DataReader is that you don't have to load everything into memory at once. This should be a huge advantage for DataReader in web apps, where memory, rather than cpu, is often the bottleneck, but by adding each row to a generic list you've negated this. That also means that even after you change your code to only use one record at a time, the difference might not show up on your benchmarks because you're running them on a system with lot of free memory, which will favor the DataTable. Also, the DataReader version is spending time parsing the results into Artifact objects that the DataTable has not done yet.

To fix the DataReader usage issue, change List<ArtifactString> to IEnumerable<ArtifactString> everywhere, and in your DataReader DAL change this line:

artifactList.Add(artifact);

to this:

yield return artifact;

This means you also need to add code that iterates over the results to your DataReader test harness to keep things fair.

I'm not sure how to adjust the benchmark to create a more typical scenario that is fair to both DataTable and DataReader, except to build two versions of your page, and serve up each version for an hour under a similar production-level load so that we have real memory pressure... do some real A/B testing. Also, make sure you cover converting the DataTable rows to Artifacts... and if the argument is that you need to do this for a DataReader, but not for a DataTable, that is just plain wrong.

Igor Pashchuk
  • 2,455
  • 2
  • 22
  • 29
Joel Coehoorn
  • 399,467
  • 113
  • 570
  • 794
  • I disagree that this is "using the DataReader wrong". It's quite common for a DAL that uses a DataReader to return a list of entities. Lazy enumeration has its place - e.g. if the BLL is calculating aggregates, but it's not the only way to skin a cat. – Joe Nov 30 '12 at 18:20
  • 2
    @Joe - maybe it's not "wrong", but if this is you're "right", you have negated most of the normal datareader advantages, and there are better ways to write your DAL. However, I have rephrased that. – Joel Coehoorn Nov 30 '12 at 18:21
  • +1 Very interesting points Joel, thank you. Could you please expand a little more on _"the normal datareader advantages"_ and _"better ways to write your DAL"_? – Shai Cohen Nov 30 '12 at 18:27
  • 2
    @ShaiCohen both are covered in the answer: the big advantage is memory pressure, and the better way is to use IEnumerable rather than List all the way from DAL through to presentation. This isn't as much about lazy or just-in-time enumeration as it is about avoiding ever bringing a complete result set to RAM at all. – Joel Coehoorn Nov 30 '12 at 18:32
  • Whilst there is some good info in this answer the statement "you're running the benchmark in an environment that differs significantly from production" doesn't hold true for most applications I've been involved in. If you're passing datareaders up to the next level of your app to consume then sure you might get good performance but you're losing the benefits of strong typing and object oriented programming. In most applications I've seen read from the datareader into objects as effectively as this is less error prone to consume up the stack. – Alan Macdonald Jan 30 '15 at 09:17
  • @AlanMacdonald The "environment" I'm talking about isn't the code... it's the load on the server. The test load typically has lots of free memory compared to product. As for creating strongly typed objects... _yes_, please do that when using a DataReader. But you don't have to read the records into a list to create those objects. It's still possible to preserve the one-object-in-memory at a time feature of a datareader through the use of Iterator blocks and IEnumerable. – Joel Coehoorn Apr 15 '15 at 16:21
2

SqlDataAdapter.Fill calls SqlCommand.ExecuteReader with CommandBehavior.SequentialAccess set. Maybe that's enough to make the difference.

As an aside, I see your IDbReader implementation caches the ordinals of each field for performance reasons. An alternative to this approach is to use the DbEnumerator class.

DbEnumerator caches a field name -> ordinal dictionary internally, so gives you much of the performance benefit of using ordinals with the simplicity of using field names:

foreach(IDataRecord record in new DbEnumerator(reader))
{
    artifactList.Add(new ArtifactString() {
        FormNumber = (int) record["FormNumber"],
        FormOwner = (int) record["FormOwner"],
        ...
    });
}

or even:

return new DbEnumerator(reader)
    .Select(record => new ArtifactString() {
        FormNumber = (int) record["FormNumber"],
        FormOwner = (int) record["FormOwner"],
        ...
      })
    .ToList();
Joe
  • 122,218
  • 32
  • 205
  • 338
2

2 things could be slowing you down.

First, I wouldn't do a "find ordinal by name" for each column, if you're interested in performance. Note, the "layout" class below to take care of this lookup. And the layout providers later readability, instead of using "0", "1", "2", etc. And it allows me to code to an Interface (IDataReader) instead of the Concrete.

Second. You're using the ".Value" property. (and I would think this does make a difference)

You'll get better results (IMHO) if you use the concrete datatype "getters".

GetString, GetDateTime, GetInt32, etc,etc.

Here is my typical IDataReader to DTO/POCO code.

[Serializable]
public partial class Employee
{
    public int EmployeeKey { get; set; }                   
    public string LastName { get; set; }                   
    public string FirstName { get; set; }   
    public DateTime HireDate  { get; set; }  
}

[Serializable]
public class EmployeeCollection : List<Employee>
{
}   

internal static class EmployeeSearchResultsLayouts
{
    public static readonly int EMPLOYEE_KEY = 0;
    public static readonly int LAST_NAME = 1;
    public static readonly int FIRST_NAME = 2;
    public static readonly int HIRE_DATE = 3;
}


    public EmployeeCollection SerializeEmployeeSearchForCollection(IDataReader dataReader)
    {
        Employee item = new Employee();
        EmployeeCollection returnCollection = new EmployeeCollection();
        try
        {

            int fc = dataReader.FieldCount;//just an FYI value

            int counter = 0;//just an fyi of the number of rows

            while (dataReader.Read())
            {

                if (!(dataReader.IsDBNull(EmployeeSearchResultsLayouts.EMPLOYEE_KEY)))
                {
                    item = new Employee() { EmployeeKey = dataReader.GetInt32(EmployeeSearchResultsLayouts.EMPLOYEE_KEY) };

                    if (!(dataReader.IsDBNull(EmployeeSearchResultsLayouts.LAST_NAME)))
                    {
                        item.LastName = dataReader.GetString(EmployeeSearchResultsLayouts.LAST_NAME);
                    }

                    if (!(dataReader.IsDBNull(EmployeeSearchResultsLayouts.FIRST_NAME)))
                    {
                        item.FirstName = dataReader.GetString(EmployeeSearchResultsLayouts.FIRST_NAME);
                    }

                    if (!(dataReader.IsDBNull(EmployeeSearchResultsLayouts.HIRE_DATE)))
                    {
                        item.HireDate = dataReader.GetDateTime(EmployeeSearchResultsLayouts.HIRE_DATE);
                    }


                    returnCollection.Add(item);
                }

                counter++;
            }

            return returnCollection;

        }
        //no catch here... see  http://blogs.msdn.com/brada/archive/2004/12/03/274718.aspx
        finally
        {
            if (!((dataReader == null)))
            {
                try
                {
                    dataReader.Close();
                }
                catch
                {
                }
            }
        }
    }
Shai Cohen
  • 6,074
  • 4
  • 31
  • 54
granadaCoder
  • 26,328
  • 10
  • 113
  • 146
  • +1 for the GetValue(). I agree and I can't, for the life of me, figure out why I did. :). Although, I don't wholly agree with your statment re "find ordinal by name". Since this is only done once per call, the impact is minimal. In fact, I once did a test and the difference between calling by ordinal and by name was negligent at best. – Shai Cohen Apr 09 '13 at 18:39
  • Yeah, I did see you did the "get ordinals just once", which was good. I just like to tweak out every last bit when I can. And with the "Layouts" , I get the readability. And if the positions change, I only have one place to update them. To-may-toes, Toe-mat-toes I guess. – granadaCoder Apr 09 '13 at 18:58
0

I don't think it will account for all the difference, but try something like this to eliminate some of the extra variables and function calls:

using (SqlDataReader reader = command.ExecuteReader())
{
    while (reader.Read())
    {
        artifactList.Add(new ArtifactString
        {
            FormNumber = reader["FormNumber"].ToString(),
            //etc
        });
     }
}
Geoff
  • 9,340
  • 7
  • 38
  • 48