There is one of the single most common wasting - sorting the whole sequence just to get the biggest item. In linq2sql this sin would be optimized away, but in linq2objects it is hardly the case. You should definitely get used to have some MaxBy in your tool set and use it every time you need the biggest item.
Just as a side note, you use Random in a wrong way. You should not create new instance of this class for every random number. Just create one instance and keep pulling numbers from that.
You should also avoid multiple calculation of the same thing multiple times. I would not even call this an optimalization. It is just writing code that does not waste, you should do it by habit most of the time I think.
If you need maximum speed, you have to sacrifice everything else. Throw out abstration layers, unroll everything, cut corners. If it is really needed for that 1% of the time critical code, it can be done, but it would not be pretty. It should definitely not be done frequently. You have been warned.
I implemented two ways to do that join. The first one uses hash dictionary to do that. It removes repeating of calculations and produces less iterators, as it calculates all subsums in one go. Note that .GroupBy.ToDictionary is roughly equal to .ToLookup, but Dictionary provides TryGetValue while lookup always return empty sequence. The second approach uses merge join. It sorts both sequences and merges them. You can conveniently fuse the merging and subsum calculation. It turns out this is the fastest approach I found here. And if you could get the sequences already sorted, it would be even faster.
I made slight change, the function returns list of named class instances, not anonymous, so I can verify the results. For mjwills code I had to add ToList, which mostly eliminated the speedup he provided. And the results are here
JoinTestClassOrig avg[ms]: 1161 / rounds[ms]: 1158, 1160, 1158, 1170
JoinTestClassHash_mjwills avg[ms]: 1158 / rounds[ms]: 1169, 1152, 1162, 1151
JoinTestClassHashMe avg[ms]: 857 / rounds[ms]: 865, 841, 840, 883
JoinTestClassMergeMe avg[ms]: 633 / rounds[ms]: 632, 631, 634, 636
and the code
class Program
{
static void Main(string[] args)
{
var testCases = new List<TestClass>()
{
new JoinTestClassOrig(),
new JoinTestClassHash_mjwills(),
new JoinTestClassHashMe(),
new JoinTestClassMergeMe(),
};
Stopwatch stopwatch = new Stopwatch();
List<List<ProductPurchaseOutp>> results = new List<List<ProductPurchaseOutp>>();
for (int i = 0; i < 5; i++)
{
foreach (var testClass in testCases)
{
testClass.GenerateRandomObjects(1);
GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce;
GC.Collect();
GC.WaitForFullGCComplete();
stopwatch.Restart();
var res = (testClass.ComputeMatches());
stopwatch.Stop();
if (i > 0)
{
results.Add(res);
testClass.results.Add(stopwatch.ElapsedMilliseconds);
}
}
}
Console.WriteLine("Checking results...");
int cnt = results
.Select(r => r
.OrderBy(o => o.ProductID)
.ThenBy(o => o.ROQ)
.ThenBy(o => o.RPQ)
.ThenBy(o => o.RUQ)
.ThenBy(o => o.RV)
.ToList()
)
.Aggregate((a, b) =>
{
for (int i = 0; i < a.Count; ++i)
{
if (!a[i].IsEqualTo(b[i])) { throw new Exception("Sequences are not equal"); }
}
return a;
}).Where(m => m.WAP != 0).Count();
Console.WriteLine("Count: " + cnt.ToString());
foreach (var test in testCases)
{
Console.WriteLine(test.name() + " avg[ms]: " + (int)test.results.Average() + " / rounds[ms]: " + string.Join(", ", test.results));
}
Console.ReadLine();
}
}
public abstract class TestClass
{
protected List<ProductPurchase> ListOne = new List<ProductPurchase>();
protected List<ProductPurchase> ListTwo = new List<ProductPurchase>();
public List<long> results = new List<long>();
public void GenerateRandomObjects(int seed)
{
Random rnd = new Random(seed);
ListOne.Clear();
ListTwo.Clear();
for (int i = 0; i < 500000; i++)
{
ListOne.Add(new ProductPurchase
{
ProductID = rnd.Next(1, 300000),
Price = rnd.Next(1, 100),
Quantity = rnd.Next(1, 30),
GlobalQuantity = rnd.Next(1, 5000)
});
ListTwo.Add(new ProductPurchase
{
ProductID = rnd.Next(1, 300000),
Price = rnd.Next(1, 100),
Quantity = rnd.Next(1, 30),
GlobalQuantity = rnd.Next(1, 10000)
});
}
}
public abstract List<ProductPurchaseOutp> ComputeMatches();
public string name()
{
return this.GetType().Name;
}
}
public class JoinTestClassOrig : TestClass
{
public override List<ProductPurchaseOutp> ComputeMatches()
{
var matched = (from listOne in ListOne
join listTwo in ListTwo on listOne.ProductID equals listTwo.ProductID into matches
select new ProductPurchaseOutp
{
ProductID = listOne.ProductID,
ROQ = listOne.Price,
RUQ = listOne.Quantity,
RPQ = listOne.GlobalQuantity,
RV = listOne.Price * listOne.Quantity * listOne.GlobalQuantity,
BMPProduct = matches.OrderBy(m => m.Price).FirstOrDefault(),
WAP = (matches.IsEmpty() || matches.Sum(m => m.Quantity * m.GlobalQuantity) == 0) ? 0 : matches.Sum(m => m.Quantity * m.GlobalQuantity * m.Price) / matches.Sum(m => m.Quantity * m.GlobalQuantity)
})
.ToList();
return matched;
}
}
public class JoinTestClassHash_mjwills : TestClass
{
public override List<ProductPurchaseOutp> ComputeMatches()
{
var dicTwo = ListTwo.ToLookup(z => z.ProductID);
var matched = (from listOne in ListOne
let matches = dicTwo[listOne.ProductID]
let sum = matches.Sum(m => m.Quantity * m.GlobalQuantity)
select new ProductPurchaseOutp
{
ProductID = listOne.ProductID,
ROQ = listOne.Price,
RUQ = listOne.Quantity,
RPQ = listOne.GlobalQuantity,
RV = listOne.Price * listOne.Quantity * listOne.GlobalQuantity,
BMPProduct = matches.OrderBy(m => m.Price).FirstOrDefault(),
WAP = sum == 0 ? 0 : matches.Sum(m => m.Quantity * m.GlobalQuantity * m.Price) / sum
});
return matched.ToList();
}
}
public class JoinTestClassMergeMe : TestClass
{
private IEnumerable<ProductPurchaseOutp> matched()
{
var l1 = ListOne
.OrderBy(p => p.ProductID);
var l2 = ListTwo
.OrderBy(p => p.ProductID);
bool eo2 = false;
using (var en1 = l1.GetEnumerator())
using (var en2 = l2.GetEnumerator())
{
if (!en1.MoveNext()) { yield break; }
var cur1 = en1.Current;
ProductPurchase cur2 = null;
if (en2.MoveNext())
{
cur2 = en2.Current;
}
else
{
eo2 = true;
}
do
{
int ID = cur1.ProductID;
long qsum = 0;
decimal psum = 0;
decimal min = decimal.MaxValue;
decimal wap = 0;
ProductPurchase minp = null;
while (!eo2 && cur2.ProductID <= ID)
{
if (cur2.ProductID == ID)
{
long quantity = (long)cur2.Quantity * cur2.GlobalQuantity;
var price = cur2.Price;
qsum += quantity;
psum += quantity * price;
if (price < min)
{
minp = cur2;
min = price;
}
}
if (en2.MoveNext())
{
cur2 = en2.Current;
}
else
{
eo2 = true;
}
};
if (qsum != 0)
{
wap = psum / qsum;
}
do
{
yield return new ProductPurchaseOutp()
{
ProductID = cur1.ProductID,
ROQ = cur1.Price,
RUQ = cur1.Quantity,
RPQ = cur1.GlobalQuantity,
RV = cur1.Price * cur1.Quantity * cur1.GlobalQuantity,
BMPProduct = minp,
WAP = wap
};
} while (en1.MoveNext() && (cur1 = en1.Current).ProductID == ID);
if (cur1.ProductID == ID) { yield break; }
} while (true);
}
}
public override List<ProductPurchaseOutp> ComputeMatches()
{
return matched().ToList();
}
}
public class JoinTestClassHashMe : TestClass
{
public override List<ProductPurchaseOutp> ComputeMatches()
{
var l2 = ListTwo
.GroupBy(l => l.ProductID)
.ToDictionary(p => p.Key);
return ListOne
.Select(listOne =>
{
decimal wap = 0;
ProductPurchase minp = null;
if (l2.TryGetValue(listOne.ProductID, out var matches))
{
long qsum = 0;
decimal psum = 0;
decimal min = decimal.MaxValue;
foreach (var m in matches)
{
long quantity = (long)m.Quantity * m.GlobalQuantity;
var price = m.Price;
qsum += quantity;
psum += quantity * price;
if (price < min)
{
minp = m;
min = price;
}
}
if (qsum != 0)
{
wap = psum / qsum;
}
}
return new ProductPurchaseOutp
{
ProductID = listOne.ProductID,
ROQ = listOne.Price,
RUQ = listOne.Quantity,
RPQ = listOne.GlobalQuantity,
RV = listOne.Price * listOne.Quantity * listOne.GlobalQuantity,
BMPProduct = minp,
WAP = wap
};
})
.ToList();
}
}
public static class Extensions
{
public static bool IsEmpty<T>(this IEnumerable<T> source)
{
return !source.Any();
}
}
public class ProductPurchase
{
public int ProductID
{
get;
set;
}
public decimal Price
{
get;
set;
}
public int Quantity
{
get;
set;
}
public int GlobalQuantity
{
get;
set;
}
}
public class ProductPurchaseOutp
{
public int ProductID
{
get;
set;
}
public decimal ROQ
{
get;
set;
}
public int RUQ
{
get;
set;
}
public int RPQ
{
get;
set;
}
public decimal RV
{
get;
set;
}
public ProductPurchase BMPProduct { get; set; }
public decimal WAP { get; set; }
public bool IsEqualTo(ProductPurchaseOutp b)
{
return this.ProductID == b.ProductID
&& this.ROQ == b.ROQ
&& this.RPQ == b.RPQ
&& this.RUQ == b.RUQ
&& this.RV == b.RV
&& this.WAP == b.WAP
&& (this.BMPProduct == null && b.BMPProduct == null ||
this.BMPProduct != null && b.BMPProduct != null
&& this.BMPProduct.GlobalQuantity == b.BMPProduct.GlobalQuantity
&& this.BMPProduct.Price == b.BMPProduct.Price
&& this.BMPProduct.ProductID == b.BMPProduct.ProductID
&& this.BMPProduct.Quantity == b.BMPProduct.Quantity);
}
}