1

Current project, broke head over this problem:

Client Repository:

public class ClientRepository
{
    // Members
    private masterDataContext _db;

    // Constructor
    public ClientRepository()
    {
        _db = new masterDataContext();
    }

    public IEnumerable<ClientName> GetCorporateClientNames()
    {
        return _db.corporate_client_tbs.Select(o => new ClientName { id = o.id, name = o.company_name }).AsEnumerable();
    }

    public IEnumerable<ClientName> GetRetailClientNames()
    {
        return _db.retail_client_tbs.Select(o => new ClientName { id = o.id, name = o.name }).AsEnumerable();
    }

    // Define return type
    public class ClientName
    {
        public int id { get; set; }
        public string name { get; set; }
    }
}

Now in the Controller I have the following:

public ActionResult Index()
{
    var _visits = _db.GetAllServiceVisits();
    return View(_visits);
}

Which takes approximately 4 seconds to load the view with the 200 odd rows currently present.

I want to add a property "client" to the visit model which contains the name of the client. The name of the client will come from one of two different tables which is fetched from one of two arrays of type "ClientName".

This is the first approach, which used LINQ :

public ActionResult Index()
{
    private ClientRepository _cr = new ClientRepository();
    var _retailclients = _cr.GetRetailClientNames().ToArray();
    var _corporateclients = _cr.GetCorporateClientNames().ToArray();
    var _visits = _db.GetAllServiceVisits();

    var _temp = _visits.Select(o => new ServiceVisitViewModel
        {
            service_visit = o,
            client = (o.client_type ? _corporateclients.Where(p => p.id == o.client_id).First().name : _retailclients.Where(p => p.id == o.client_id).First().name)
        }).ToArray();

    return View(_temp);
}

This is the second approach, using plain 'ol C# :

public ActionResult Index()
{
    private ClientRepository _cr = new ClientRepository();
    var _retailclients = _cr.GetRetailClientNames().ToArray();
    var _corporateclients = _cr.GetCorporateClientNames().ToArray();
    var _visits = _db.GetAllServiceVisits();

    List<ServiceVisitViewModel> _temp = new List<ServiceVisitViewModel>();
    foreach (service_visit_tb v in _visits)
    {
        _temp.Add(new ServiceVisitViewModel { service_visit = v, client = (v.client_type ? _corporateclients.Where(p => p.id == v.client_id).First().name : _retailclients.Where(p => p.id == v.client_id).First().name) });
    }

    return View(_temp);
}

The second approach is about 8 - 10 times faster based on my tests.

The only difference I can see is the .Select statement.

Can someone please tell me if I have done something wrong in the first approach or in the alternative, why the first approach is so !@#$ing slow?!

Edit: The _db.GetAllServiceVisits() definition is as follows:

public IEnumerable<service_visit_tb> GetAllServiceVisits()
{
    var _visits = _db.service_visit_tbs;
    return _visits.AsEnumerable();
}

End Edit

Second Edit: I have removed this line from every entry in the log:

-- Context: SqlProvider(Sql2008) Model: AttributedMetaModel Build: 4.0.30319.1

The Context Log is as follows:

// This query is to fetch all the clients of Type One (corresponding to _cr.GetRetailClientNames() )
SELECT [t0].[id], [t0].[name]
FROM [genii].[retail_client_tb] AS [t0]

// This query is to fetch all the clients of Type Two (corresponding to _cr.GetCorporateClientNames() )
SELECT [t0].[id], [t0].[company_name] AS [name]
FROM [genii].[corporate_client_tb] AS [t0]

// This is the main query (loading roughly 250 records) which fetchs all Visits
SELECT [t0].[id], [t0].[client_type], [t0].[client_id], [t0].[machine_type], [t0].[machineID], [t0].[visit_type], [t0].[scheduledon], [t0].[arrivedon], [t0].[completedon], [t0].[reported_problem], [t0].[diagnosed_problem], [t0].[action_taken], [t0].[visit_status], [t0].[engineer_id], [t0].[reference_id], [t0].[addedby], [t0].[addedon], [t0].[modifiedby], [t0].[modifiedon]
FROM [genii].[service_visit_tb] AS [t0]

// These next queries are not being manually called by me, I assume they are being
// called when the Razor view is compiled since I am calling the name value of a linked table as such:
// @item.service_visit.engineer_tb.name
SELECT [t0].[id], [t0].[type]
FROM [genii].[visit_type_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [8]

SELECT [t0].[id], [t0].[status]
FROM [genii].[visit_status_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [1]

SELECT [t0].[id], [t0].[name]
FROM [genii].[engineer_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [3]

SELECT [t0].[id], [t0].[type]
FROM [genii].[visit_type_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [11]

SELECT [t0].[id], [t0].[name]
FROM [genii].[engineer_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [2]

SELECT [t0].[id], [t0].[type]
FROM [genii].[visit_type_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [7]

SELECT [t0].[id], [t0].[type]
FROM [genii].[visit_type_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [2]

SELECT [t0].[id], [t0].[type]
FROM [genii].[visit_type_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [6]

SELECT [t0].[id], [t0].[type]
FROM [genii].[visit_type_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [3]

SELECT [t0].[id], [t0].[name]
FROM [genii].[engineer_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [5]

SELECT [t0].[id], [t0].[name]
FROM [genii].[engineer_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [4]

SELECT [t0].[id], [t0].[status]
FROM [genii].[visit_status_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [8]

SELECT [t0].[id], [t0].[status]
FROM [genii].[visit_status_tb] AS [t0]
WHERE [t0].[id] = @p0
-- @p0: Input Int (Size = -1; Prec = 0; Scale = 0) [2]

Addendum to the question: Is there a better way to pull this data? I always presumed (never checked) that the foreign key based data access that LINQ context gave me was as good as it gets, but seeing these additional queries, I'm not so sure anymore!

Will post speeds for the second part of execution later today (its a long weekend here in Mumbai but we are working right through)

End Edit

Third Edit (I am considering response from webserver to client, since all computation / fetching / binding / etc etc should be accounted for.)

Method One : 6.85 seconds (Call from 3 tables and then do casting into View-Model using C#)

public IEnumerable<service_visit_tb> GetAllServiceVisits()
{
    var _visits = _db.service_visit_tbs;
    _db.Log = new DebuggerWriter();
    return _visits.AsEnumerable();
}

public ActionResult Index()
{
    var _retailclients = _cr.GetRetailClientNames().ToArray();
    var _corporateclients = _cr.GetCorporateClientNames().ToArray();
    var _visits = _db.GetAllServiceVisits();
    List<ServiceVisitViewModel> _temp = new List<ServiceVisitViewModel>();
    foreach (service_visit_tb v in _visits)
    {
        _temp.Add(new ServiceVisitViewModel { service_visit = v, client = (v.client_type ? _corporateclients.Where(p => p.id == v.client_id).First().name : _retailclients.Where(p => p.id == v.client_id).First().name) });
    //}
    return View(_temp);
}

Method Two : 8.59 seconds (Call from 3 tables and then do casting into View-Model using LINQ)

public IEnumerable<service_visit_tb> GetAllServiceVisits()
{
    var _visits = _db.service_visit_tbs;
    _db.Log = new DebuggerWriter();
    return _visits.AsEnumerable();
}

public ActionResult Index()
{
    var _retailclients = _cr.GetRetailClientNames().ToArray();
    var _corporateclients = _cr.GetCorporateClientNames().ToArray();
    var _visits = _db.GetAllServiceVisits();
    var _temp = _visits.Select(o => new ServiceVisitViewModel
    {
        service_visit = o,
        client = (o.client_type ? _corporateclients.Where(p => p.id == o.client_id).First().name : _retailclients.Where(p => p.id == o.client_id).First().name)
    });
    return View(_temp);
}

Method Three : 5.76 seconds (Everything in a single LINQ query - executed on Database)

public IEnumerable<ServiceVisitViewModel> GetAllServiceVisitsNew()
{
    var _visits = _db.service_visit_tbs.Select(o => new ServiceVisitViewModel
    {
        service_visit = o,
        client = (o.client_type ? _db.corporate_client_tbs.Where(c=> c.id == o.client_id).First().company_name : _db.retail_client_tbs.Where(c=> c.id == o.client_id).First().name)
    });
    _db.Log = new DebuggerWriter();
    return _visits;
}

public ActionResult Index()
{
    var _visits = _db.GetAllServiceVisitsNew();
    return View(_visits());
}

Guess that decides it. Thanks to everyone for the help. I am marking Jon as correct answer since his approach of doing everything on database side is what brought home the bacon. Many thanks to anyone and everyone who took the trouble to reply.

End Edit

vvohra87
  • 5,594
  • 4
  • 22
  • 34
  • 1
    Looks odd, `.Select` itself should not add anything big. Such big difference indicates that the database queries differ. Can you run SQL Profiler and check whether the two alternatives generates the same SQL workload? – Albin Sunnanbo Aug 13 '11 at 15:19

2 Answers2

9

The Select statement makes a huge difference here - because it's changing what's being done at the database. (I'm assuming _db.GetAllServiceVisits() returns an IQueryable<T>.)

You're fetching all the retail and corporate clients into memory already because you're using ToArray. Your Select call will (I suspect) then be sending all that data back to the database so that the Select and Where can be performed there. I suspect if you look in SQL profiler, it'll be a pretty odd query.

If you force everything to be done client-side, it should be bassically the same as your latter approach. You can do that easily, with:

var _visits = _db.GetAllServiceVisits().ToList();

... however, that means you're pulling all of three tables from your database each time you hit this page. That doesn't sound like a good plan to me.

However, it would be better if you could do everything in the database without fetching all the retail and corportate clients into memory first.

That may be as simple as changing your repository methods to return IQueryable<T> instead of IEnumerable<T>, and removing the calls to AsEnumerable.

Jon Skeet
  • 1,421,763
  • 867
  • 9,128
  • 9,194
  • I tried doing everything in the database using LINQ and lambda functions but it failed miserably. Basically the complex ternary operator was not getting converted to SQL properly. And, to add to the puzzle - my _visits is a List(). I found a wonderful answer by you on another question which gave me the idea to split up lambda's before and after execution of the SQL! – vvohra87 Aug 13 '11 at 15:59
  • When I was pulling everything from the database, it was taking well over 60 seconds to run. And FYI - the server is MS SQL 2008, the network is 10 Mbps (in case it's relevant). – vvohra87 Aug 13 '11 at 16:07
  • @Varun: If `_visits` *is* a list then you've *already* grabbed everything from the database, basically. I can't see why using `Select` would be slower than looping... – Jon Skeet Aug 13 '11 at 17:18
  • Which is exactly why I never thought it would be. Left to myself I would have never tried any other approach. A colleague who works in php and ruby was taking a look at asked me to do it in regular c# without LINQ (he doesn't know LINQ). So I did and now I am honestly baffled. – vvohra87 Aug 14 '11 at 07:43
  • @Varun: Can you verify that nothing is hitting the database after you've called `GetAllServiceVisits`? (Put some logging in your context so you can see all queries executed.) Is the code you've shown *exactly* the code you've run? Which bit of code did you time? (It would be good to use a stopwatch *just* for the `Select`/`foreach` part.) – Jon Skeet Aug 14 '11 at 07:46
  • (Of course, a much better solution would be to try to do all of this work in the database, but it's interesting to investigate why the `Select` is taking so long... it really shouldn't be.) – Jon Skeet Aug 14 '11 at 07:50
  • Since you asked me to log - I did. Also I created another question which I would appreciate some input on [Logging LINQ-to-SQL Context](http://stackoverflow.com/questions/7055794/logging-linq-to-sql-context-execution-in-vs-2010). Also, is there something fundamentally wrong with the deisgn approach I have taken here? – vvohra87 Aug 14 '11 at 08:43
  • @Varun: Well, the fact that you're pulling all the data from the database (from those three tables) is fundamentally a problem IMO. Will look forward to hearing the results of the logging. – Jon Skeet Aug 14 '11 at 08:48
  • The logging results I posted in the edit of the question!! Or do you mean times? And is there any way I can get in touch with you to resolve the "fundamental problem"? I want to learn how to do it better. – vvohra87 Aug 14 '11 at 08:54
2

Wow. You do a lot of looping there. Each Where inside the loop ends up looping an array to find an item, every iteration in the loop.

Make dictionaries of the clients, so that you can look them up qickly. That should give you a dramatic increase in speed:

public ActionResult Index()
{
    private ClientRepository _cr = new ClientRepository();
    var _retailclients = _cr.GetRetailClientNames().ToDictionary(c => c.id);
    var _corporateclients = _cr.GetCorporateClientNames().ToDictionary(c => c.id);
    var _visits = _db.GetAllServiceVisits();

    var _temp = _visits.Select(o => new ServiceVisitViewModel
        {
            service_visit = o,
            client = (o.client_type ? _corporateclients[o.client_id].name : _retailclients[o.client_id].name)
        }).ToArray();

    return View(_temp);
}
Guffa
  • 687,336
  • 108
  • 737
  • 1,005
  • Interesting, I have not used dictionaries too much but it makes sense, I never thought of it here since I was earlier trying to dynamically pull the client details while looping through visits, instead of storing them locally in an array before hand. But will do this and let you know the results. – vvohra87 Aug 13 '11 at 16:00