5

I'm currently learning Linq to Sql and Im very surprised by the performance of selecting data. I'm retreving joined data from few tables. I select about 40k of rows. Mapping this data to objects using ADO times about 35s, using NHbiernate times about 130s and what is suspicious using Linq To Sql only 3,5s. Additionally I would like to write that I'm using immediately loading which looks like:

THESIS th = new THESIS(connectionString);
DataLoadOptions dlo = new DataLoadOptions();
dlo.LoadWith<NumericFormula>(x => x.RPN);
dlo.LoadWith<RPN>(x => x.RPNDetails);
dlo.LoadWith<RPNDetail>(x => x.Parameter);
th.LoadOptions = dlo;
th.Log = Console.Out;

Looking to the logs when I'm iterating I can't see that Linq To Sql generate some additional queries to database.

I'm very surprised by huge differences in performance and I wonder that maybe I don't understand something.

Could someone explain me why it works so fast? To measure time I'm using Stopwatch class.

ADO.NET Code:

public static List<NumericFormulaDO> SelectAllNumericFormulas()
{
    var nFormulas = new List<NumericFormulaDO>();

    string queryString = @"
        SELECT *
        FROM    NumericFormula nf 
                Left Join Unit u on u.Unit_Id = nf.Unit_Id 
                Left Join UnitType ut on ut.UnitType_Id = u.UnitType_Id 
                Join RPN r on r.RPN_Id = nf.RPN_Id 
                Join RPNDetails rd on rd.RPN_Id = r.RPN_Id 
                Join Parameter par on par.Parameter_Id = rd.Parameter_Id where nf.NumericFormula_Id<=10000";

    using (var connection = new SqlConnection(connectionString))
    {
        var command = new SqlCommand(queryString, connection);
        connection.Open();
        using (var reader = command.ExecuteReader())
        {
            while (reader.Read())
            {
                var det = new RPNDetailsDO();
                det.RPNDetails_Id = Int32.Parse(reader["RPNDetails_Id"].ToString());
                det.RPN_Id = Int32.Parse(reader["RPN_Id"].ToString());
                det.Identifier = reader["Identifier"].ToString();
                det.Parameter.Architecture = reader["Architecture"].ToString();
                det.Parameter.Code = reader["Code"].ToString();
                det.Parameter.Description = reader["Description"].ToString();
                det.Parameter.Parameter_Id = Int32.Parse(reader["Parameter_Id"].ToString());
                det.Parameter.ParameterType = reader["ParameterType"].ToString();
                det.Parameter.QualityDeviationLevel = reader["QualityDeviationLevel"].ToString();

                if (nFormulas.Count > 0)
                {
                     if (nFormulas.Any(x => x.RPN.RPN_Id == Int32.Parse(reader["RPN_Id"].ToString())))
                     {
                         nFormulas.First(x=>x.RPN.RPN_Id == Int32.Parse(reader["RPN_Id"].ToString())).RPN.RPNDetails.Add(det);
                     }
                     else
                     {
                         NumericFormulaDO nFormula = CreatingNumericFormulaDO(reader, det);
                         nFormulas.Add(nFormula);
                         //System.Diagnostics.Trace.WriteLine(nFormulas.Count.ToString());
                     }
                }
                else
                {
                     NumericFormulaDO nFormula = CreatingNumericFormulaDO(reader, det);
                     nFormulas.Add(nFormula);
                     //System.Diagnostics.Trace.WriteLine(nFormulas.Count.ToString());
                }
            }
        }
    }

    return nFormulas;
}

private static NumericFormulaDO CreatingNumericFormulaDO(SqlDataReader reader, RPNDetailsDO det)
{
    var nFormula = new NumericFormulaDO();
    nFormula.CalculateDuringLoad = Boolean.Parse(reader["CalculateDuringLoad"].ToString());
    nFormula.NumericFormula_Id = Int32.Parse(reader["NumericFormula_Id"].ToString());
    nFormula.RPN.RPN_Id = Int32.Parse(reader["RPN_Id"].ToString());
    nFormula.RPN.Formula = reader["Formula"].ToString();
    nFormula.Unit.Name = reader["Name"].ToString();

    if (reader["Unit_Id"] != DBNull.Value)
    {
        nFormula.Unit.Unit_Id = Int32.Parse(reader["Unit_Id"].ToString());
        nFormula.Unit.UnitType.Type = reader["Type"].ToString();
        nFormula.Unit.UnitType.UnitType_Id = Int32.Parse(reader["UnitType_Id"].ToString());
    }
    nFormula.RPN.RPNDetails.Add(det);
    return nFormula;
}

LINQ to SQL Code:

THESIS th = new THESIS(connectionString);
DataLoadOptions dlo = new DataLoadOptions();
dlo.LoadWith<NumericFormula>(x => x.RPN);
dlo.LoadWith<RPN>(x => x.RPNDetails);
dlo.LoadWith<RPNDetail>(x => x.Parameter);
th.LoadOptions = dlo;
th.Log = Console.Out;
var nFormulas = 
    th.NumericFormulas.ToList<NumericFormula>();

NHibernate Code:

IQueryable<NumericFormulaDO> nFormulas =
    session.Query<NumericFormulaDO>()
        .Where(x=>x.NumericFormula_Id <=10000);

List<NumericFormulaDO> nForList =
    new List<NumericFormulaDO>();

nForList = nFormulas.ToList<NumericFormulaDO>();

Related to your comments you can see that in ADO I'm using SqlReader and in LINQ I try to use immediate execution.

Of course it is possible that my mapping "algorithm" in ADO part it's not very good but NHibernate is much more slow than ADO (4x slower) so I wonder if for sure is everything alright in LINQ to SQL part because I think in NHibernate is everything good and after all is much more slow than little confusing ADO part.

Thank you guys for responses.

Steven
  • 166,672
  • 24
  • 332
  • 435
Piotr Czarnecki
  • 1,688
  • 3
  • 14
  • 22
  • 2
    Can you post your ADO code, so we can see if you are doing something wrong? – Aleksandar Vucetic Feb 18 '12 at 13:53
  • 4
    Did you actually retrieve the data using LINQ-to-SQL, or did you just construct a query that, when executed, would retrieve the data (deferred execution)? – dtb Feb 18 '12 at 13:53
  • It will be helpful to compare ADO.NET, LINQ to SQL and NHibernate if you could post all your query codes. –  Feb 18 '12 at 14:03
  • 1
    You are not doing the same thing with ADO.NET execute reader and with linqtosql. When using reader, you have that whole `if(nFormulas.Count >0)` part...it probably takes some time...try modifying your queryString to achieve the same effect. – Aleksandar Vucetic Feb 18 '12 at 14:11
  • i suspect that what dtb said is what is happening, EF is using ADO.NET so it can't be faster than it self unless some caching has been involved which means you make a query but you never hit the db since EF uses Lazy Loading by default and will only query if you try to access the result set. – Joakim Feb 18 '12 at 14:12
  • Make sure you are comparing the same operations by running sql profiler. Make sure that the same number of queries are being run against the database. – Davin Tryon Feb 18 '12 at 14:17
  • In addition I would like to say that this time results are for the whole action not only for execute query. When I run query strictly on the database (for example in SQL Management Studio I get result in 3 seconds). My question is more focusing on very fast mapping by Linq To Sql, beacuse if I'm not wrong when I execute Linq To Sql Code wich I attached I've got List of objects in which I can make some actions. And it is about 50x faster than NHibernate this is the reason why I'm very confused :P – Piotr Czarnecki Feb 18 '12 at 14:34
  • 3
    @PiotrCzarnecki - Your ADO code is extremely inefficient. Yield the advice from the [MSDN Documentation](http://msdn.microsoft.com/en-us/library/system.data.sqlclient.sqldatareader.getordinal.aspx): *Because ordinal-based lookups are more efficient than named lookups, it is inefficient to call GetOrdinal within a loop. Save time by calling GetOrdinal once and assigning the results to an integer variable for use within the loop.* – M.Babcock Feb 18 '12 at 14:40
  • @M.Babcock thank you very much !!! I'll try it for sure But maybe some idea why this sample of Linq To Sql code is much more fast (about 50x) than NHibernate. I understand that in ADO code my mapping could be not very good. Thank you for responses guys. Any other ideas, advices? – Piotr Czarnecki Feb 18 '12 at 14:46
  • Why don't you compare execution plans? Also, you should attach a profiler or hit break in the debugger a few times to find the hot-spots. – usr Feb 18 '12 at 15:38

2 Answers2

7

LINQ-to-SQL consumes ADO.NET and has additional overheads, so no: it shouldn't be faster unless it isn't doing the same work. There was mention of access via ordinals vs names, but frankly that affects micro-seconds, not seconds. It won't explain an order of magnitude change.

The only way to answer this is to trace what LINQ-to-SQL is doing. Fortunately this is simple - you can just do:

dbContext.Log = Console.Out;

which will write the TSQL is executes to the console. There are two options then:

  1. you discover the TSQL isn't doing the same thing (maybe it isn't eager-loading)
  2. you discover the TSQL is valid (=doing the same), but has a better plan - in which case... "borrow" it :p

Once you have the TSQL to compare, test that side-by-side, so you are testing the same work. If you want the convenience without the overheads, I'd look at "dapper" - takes away the boring grunt-work of mapping readers to objects, but very optimised.

Marc Gravell
  • 1,026,079
  • 266
  • 2,566
  • 2,900
  • thank you for response I borrowed generated query by linq and yes you've got right that this query doing not exactly the same thing. But when I execute that query and my old ADO query this generated by linq times longer. – Piotr Czarnecki Feb 18 '12 at 18:51
  • I understand this situation in way that execution query maybe is faster in ADO but the whole mapping thing (iterating using reader and assigning values) is very time consuming. For simple selection (and mapping "algorithm") ADO is faster (without joining etc.). I still wonder why Linq To SQL maps so fast. Like I said earlier I understand that my ADO code can be not well but Linq in this sample is much more faster than NHibernate. Any other ideas? – Piotr Czarnecki Feb 18 '12 at 18:51
  • @Piotr oh, I wish it **did** map fast. Actually, it doesn't - which is exactly **why** we (Stack Exchange) wrote "dapper". LINQ-to-SQL is very slow at materialization, by comparison. And yes, NHibernate is slow (relatively) at this too (the "tax" for such a flexible/abstracted model). – Marc Gravell Feb 18 '12 at 19:04
  • Hello guys I profiled ADO application and profiler showed that 87% of time is spending on .Any() method and 10% on .First() method. So I think this is the reason why it takes such a long time. But it is hard to omit some additional conditions, inner loops or inner linq to objects expressions during iterating on Reader in complicate query result. I tried option with additional nested connection and two query strings to avoid linq to objects methods. But method with while(reader2.Read()) nested in other while(reader.Read()) loop times longer. – Piotr Czarnecki Feb 18 '12 at 22:01
  • Instead of using both Any and First, consider FirstOrDefault() and then checking the result for != null, I bet it saves you a ton of time. Also, Parse(ToString) is *very* inefficient. (See: http://fxcopcontrib.codeplex.com/wikipage?title=DoNotUseParseAndTostringToConvertTypes) And instead of iterating through the NumericFormula list each and every time to do the Any and the First, consider using a Dictionary so that you can efficiently look up the existing items using the id etc... These are things where Linq-2-sql is efficient. – jessehouwing Feb 20 '12 at 19:27
0

Rewritten ADO.NET code based on above remarks, this should be a lot faster. You could still improve by using the ordinal value instead of the column names and by reading the fields in exactly the same order as in the query, but those are micro optimizations.

I've also removed a couple of duplications. You might also want to check how to improve the performance of typecasting and conversions, as the Parse(ToString) route is very inefficient and can cause very strange issues when running with systems running in different languages. There's also a chance of dataloss when doing these conversions when decimal, float or doubles are involved, as not all of their values can be translated to strings correctly (or can't roundtrip back).

public static List<NumericFormulaDO> SelectAllNumericFormulas()
{
    var nFormulas = new Dictionary<int, NumericFormulaDO>();

    string queryString = @"
    SELECT *
    FROM    NumericFormula nf 
            Left Join Unit u on u.Unit_Id = nf.Unit_Id 
            Left Join UnitType ut on ut.UnitType_Id = u.UnitType_Id 
            Join RPN r on r.RPN_Id = nf.RPN_Id 
            Join RPNDetails rd on rd.RPN_Id = r.RPN_Id 
            Join Parameter par on par.Parameter_Id = rd.Parameter_Id where nf.NumericFormula_Id<=10000";

    using (var connection = new SqlConnection(connectionString))
    {  
        connection.Open();
        using (var command = new SqlCommand(queryString, connection));
        using (var reader = command.ExecuteReader())
        {
            while (reader.Read())
            {
                var det = new RPNDetailsDO();
                det.RPNDetails_Id = (int) reader.GetValue("RPNDetails_Id");
                det.RPN_Id = (int) reader.GetValue("RPN_Id");
                det.Identifier = (string) reader.GetValue("Identifier");
                det.Parameter.Architecture = (string)reader.GetValue("Architecture");
                det.Parameter.Code = (string)reader.GetValue("Code");
                det.Parameter.Description = (string)reader.GetValue("Description");
                det.Parameter.Parameter_Id = (int) reader.GetValue("Parameter_Id");
                det.Parameter.ParameterType = (string)reader.GetValue("ParameterType");
                det.Parameter.QualityDeviationLevel = (string)reader.GetValue("QualityDeviationLevel");

                NumericFormulaDO parent = null;
                if (!nFormulas.TryGetValue((int)reader.GetValue("RPN_Id"), out parent)
                {
                    parent = CreatingNumericFormulaDO(reader, det);
                    nFormulas.Add(parent.RPN.RPNID, parent);
                }
                else
                {
                    parent.RPN.RPNDetails.Add(det);
                }
            }
        }
    }

    return nFormulas.Values.ToList();
}
jessehouwing
  • 106,458
  • 22
  • 256
  • 341