在开发过程中,LINQ的ToList()方法经常被使用,帮助我们将将迭代器转换为具体的List对象。为了更好的了解该方法的工作原理,我们从源码的角度对其进行分析。
ToList作为IEnumerable的扩展方法,可以帮助我们将IEnumerable转换为List。
源码GIT地址:
public static List<TSource> ToList<TSource>(this IEnumerable<TSource> source)
{
if (source == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
}
return source is IIListProvider<TSource> listProvider ? listProvider.ToList() : new List<TSource>(source);
}
ToList本身代码很简单,就是判断参数source是否实现了IIListProvider接口,如果实现了该接口,则调用该方法进行转换,如果未实现该接口,则直接调用List的构造方法,创建一个新的List,然后返回。
internal interface IIListProvider<TElement> : IEnumerable<TElement>
{
TElement[] ToArray();
List<TElement> ToList();
int GetCount(bool onlyIfCheap);
}
IIListProvider接口定义了三个方法,其中包括ToList方法。
对于一个常见的LINQ操作xx.Where().Select().ToList(), 假设xx是一个IEnumerable, 我们希望可以一次遍历xx,实现过滤,投影和转List的操作。不希望在完成Select操作后,重新遍历xx进行List的转化。
在LINQ的源码中,我们可以找到许多包含Opt的文件,例如Select.SpeedOpt.cs,SelectMany.SpeedOpt.cs等等。在这些文件中,实现了IIListProvider的ToList接口,从而保证将ToList操作合并到前面的操作中,避免多次遍历数据源。
下面我们就分析一下,如何将ToList操作和ToList前面的方法进行合并。
我们使用如下数据进行分析:
List<Student> studentList = new List<Student>()
{
new Student("x001", "Tom", "CN-1" , 90),
new Student("x002", "Jack", "CN-1", 88),
new Student("x003", "Mary", "CN-2", 87),
new Student("x004", "Frank", "CN-2", 97),
};
Student和Teacher类代码见附录。
var list = studentList.
Select(s => new {Name= s.Name, Math = s.MathResult}).
ToList();
对于Select().ToList(),它的执行流程如下:
private sealed partial class SelectListIterator<TSource, TResult> : IPartition<TResult>{
public List<TResult> ToList()
{
int count = _source.Count;
var results = new List<TResult>(count);
for (int i = 0; i < count; i++)
{
results.Add(_selector(_source[i]));
}
return results;
}
}
var list = studentList.
Where(s => s.MathResult >= 90).
ToList();
对于Where().Tolist(),它的执行流程如下:
private sealed partial class WhereListIterator<TSource> : Iterator<TSource>, IIListProvider<TSource>{
public List<TSource> ToList()
{
var list = new List<TSource>();
for (int i = 0; i < _source.Count; i++)
{
TSource item = _source[i];
if (_predicate(item))
{
list.Add(item);
}
}
return list;
}
}
对于Where().Select().Tolist(),它的执行流程如下:
var list = studentList
.Where(s=>s.MathResult >= 90)
.Select(s => new {Name= s.Name, Math = s.MathResult}).ToList();
private sealed partial class WhereSelectListIterator<TSource, TResult> : IIListProvider<TResult>{
public List<TResult> ToList()
{
var list = new List<TResult>();
for (int i = 0; i < _source.Count; i++)
{
TSource item = _source[i];
if (_predicate(item))
{
list.Add(_selector(item));
}
}
return list;
}
}
List<Student> studentList1 = new List<Student>(){
new Student("x005", "Henry", "CN-1" , 90),
new Student("x006", "Lance", "CN-1", 88),
new Student("x007", "Steven", "CN-2", 87),
new Student("x008", "Carl", "CN-2", 97),
};
Teacher teacher1 = new Teacher{
Id = "t001",
Name = "Jane",
Students = studentList
};
Teacher teacher2 = new Teacher{
Id = "t002",
Name = "David",
Students = studentList1
};
List<Teacher> teachers = new List<Teacher>{
teacher1,teacher2
};
var students = teachers.SelectMany2(t => t.Students).ToList();
对于SelectMany().ToList(),它的执行流程如下:
private sealed partial class SelectManySingleSelectorIterator<TSource, TResult> : IIListProvider<TResult>{
public List<TResult> ToList()
{
var list = new List<TResult>();
foreach (TSource element in _source)
{
list.AddRange(_selector(element));
}
return list;
}
}
public class StudentEqualityComparer : IEqualityComparer<Student>
{
public bool Equals(Student b1, Student b2) {
return b1.Id.Equals(b2.Id);
}
public int GetHashCode(Student bx) => bx.Id.GetHashCode();
}
List<Student> studentList1 = new List<Student>(){
new Student("x005", "Henry", "CN-1" , 90),
new Student("x006", "Lance", "CN-1", 88),
new Student("x007", "Steven", "CN-2", 87),
new Student("x007", "Carl", "CN-2", 97),
};
var stuList = studentList.Distinct2(new StudentEqualityComparer());
对于Distinct().ToList(),它的执行流程如下:
private sealed partial class DistinctIterator<TSource> : IIListProvider<TSource>{
public List<TSource> ToList() => Enumerable.HashSetToList(new HashSet<TSource>(_source, _comparer));
}
public class Student {
public string Id { get; set; }
public string Name { get; set; }
public string Classroom { get; set; }
public int MathResult { get; set; }
}
public class Teacher{
public string Id { get; set; }
public string Name { get; set; }
public List<Student> Students { get; set; }
}