在Dotnet 开发过程中,无论是Web还是Form或是其他领域的开发,Where作为IEnumerable的扩展方法,十分常用。本文对Where方法的关键源码进行简要分析,以方便大家日后更好的使用该方法。
本文分析的源码来自 https://github.com/dotnet/runtime.git
Where方法的过滤功能主要通过迭代器实现,其源代码包含7个迭代器。按照功能划分如下:
类名或方法名 | 代码位置 | 基本描述 |
---|---|---|
public static IEnumerable Where(this IEnumerable source, Func |
Where.cs | 对外提供的扩展方法,参数是一个返回值为bool类型的委托。 |
public static IEnumerable Where(this IEnumerable source, Func |
Where.cs | 对外提供的扩展方法,参数是一个返回值为bool类型的委托,集合中每项的索引参与委托运算。 |
private static IEnumerable WhereIterator | Where.cs | 迭代器,仅用于带索引的Where扩展方法 |
private sealed partial class WhereEnumerableIterator : Iterator | Where.cs | 迭代器,用于Enumerable做为容器的数据迭代 |
internal sealed partial class WhereArrayIterator : Iterator | Where.cs | 迭代器,用于Array做为容器的数据迭代 |
private sealed partial class WhereListIterator : Iterator | Where.cs | 迭代器,用于List做为容器的数据迭代 |
private sealed partial class WhereSelectArrayIterator |
Where.cs | 迭代器,用于Array容器中的Where().Select()的情况 |
private sealed partial class WhereSelectListIterator |
Where.cs | 迭代器,用于List容器中的Where().Select()的情况 |
private sealed partial class WhereSelectEnumerableIterator |
Where.cs | 迭代器,用于Enumerable容器中的Where().Select()的情况 |
internal static class ThrowHelper | ThrowHelper.cs | 所有Linq方法的异常管理类 |
internal abstract class Iterator : IEnumerable, IEnumerator | Iterator.cs | 迭代器的基类 |
public static Func |
Utilities.cs | 如果有多个Where语句,将每个Where的过滤条件按照And方式合并 |
public static Func |
Utilities.cs | 如果Where语句后面有多个Select,将每个Select中的Selector进行合并 |
Enmuerable迭代器, List迭代器和 Array迭代器在代码实现上相差不大,都是通过设计模式中的迭代器模式实现具体的功能。区别是Enmuerable迭代器, List迭代器都是调用容器自身的迭代器实现逐个元素的比较和过滤,而Array迭代器是通过数组索引操作实现元素比较和过滤。
Iterator类是WhereEnumerableIterator , WhereSelectEnumerableIterator, WhereListIterator , WhereSelectListIterator, WhereArrayIterator 和WhereSelectArrayIterator的基类。
using System.Collections;
using System.Collections.Generic;
namespace System.Linq
{
public static partial class Enumerable
{
///
/// A base class for enumerables that are loaded on-demand.
///
/// The type of each item to yield.
///
///
///
/// The value of an iterator is immutable; the operation it represents cannot be changed.
///
///
/// However, an iterator also serves as its own enumerator, so the state of an iterator
/// may change as it is being enumerated.
///
///
/// Hence, state that is relevant to an iterator's value should be kept in readonly fields.
/// State that is relevant to an iterator's enumeration (such as the currently yielded item)
/// should be kept in non-readonly fields.
///
///
///
internal abstract class Iterator<TSource> : IEnumerable<TSource>, IEnumerator<TSource>
{
private readonly int _threadId;
internal int _state;
internal TSource _current = default!;
///
/// Initializes a new instance of the class.
///
protected Iterator()
{
_threadId = Environment.CurrentManagedThreadId;
}
///
/// The item currently yielded by this iterator.
///
public TSource Current => _current;
///
/// Makes a shallow copy of this iterator.
///
///
/// This method is called if is called more than once.
///
public abstract Iterator<TSource> Clone();
///
/// Puts this iterator in a state whereby no further enumeration will take place.
///
///
/// Derived classes should override this method if necessary to clean up any
/// mutable state they hold onto (for example, calling Dispose on other enumerators).
///
public virtual void Dispose()
{
_current = default!;
_state = -1;
}
///
/// Gets the enumerator used to yield values from this iterator.
///
///
/// If is called for the first time on the same thread
/// that created this iterator, the result will be this iterator. Otherwise, the result
/// will be a shallow copy of this iterator.
///
public IEnumerator<TSource> GetEnumerator()
{
Iterator<TSource> enumerator = _state == 0 && _threadId == Environment.CurrentManagedThreadId ? this : Clone();
enumerator._state = 1;
return enumerator;
}
///
/// Retrieves the next item in this iterator and yields it via .
///
/// true if there was another value to be yielded; otherwise, false .
public abstract bool MoveNext();
///
/// Returns an enumerable that maps each item in this iterator based on a selector.
///
/// The type of the mapped items.
/// The selector used to map each item.
public virtual IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector)
{
return new SelectEnumerableIterator<TSource, TResult>(this, selector);
}
///
/// Returns an enumerable that filters each item in this iterator based on a predicate.
///
/// The predicate used to filter each item.
public virtual IEnumerable<TSource> Where(Func<TSource, bool> predicate)
{
return new WhereEnumerableIterator<TSource>(this, predicate);
}
object? IEnumerator.Current => Current;
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
void IEnumerator.Reset() => ThrowHelper.ThrowNotSupportedException();
}
}
}
基类迭代器最核心的操作是定义迭代器与线程的关系,并且定义迭代器的相关操作。
public static IEnumerable<TSource> Where<TSource>(this IEnumerable<TSource> source, Func<TSource, bool> predicate)
{
if (source == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
}
if (predicate == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
}
if (source is Iterator<TSource> iterator)
{
return iterator.Where(predicate);
}
if (source is TSource[] array)
{
return array.Length == 0 ?
Empty<TSource>() :
new WhereArrayIterator<TSource>(array, predicate);
}
if (source is List<TSource> list)
{
return new WhereListIterator<TSource>(list, predicate);
}
return new WhereEnumerableIterator<TSource>(source, predicate);
}
///
/// An iterator that filters each item of a .
///
/// The type of the source list.
private sealed partial class WhereListIterator<TSource> : Iterator<TSource>
{
private readonly List<TSource> _source;
private readonly Func<TSource, bool> _predicate;
private List<TSource>.Enumerator _enumerator;
public WhereListIterator(List<TSource> source, Func<TSource, bool> predicate)
{
Debug.Assert(source != null);
Debug.Assert(predicate != null);
_source = source;
_predicate = predicate;
}
public override Iterator<TSource> Clone() =>
new WhereListIterator<TSource>(_source, _predicate);
public override bool MoveNext()
{
switch (_state)
{
case 1:
_enumerator = _source.GetEnumerator();
_state = 2;
goto case 2;
case 2:
while (_enumerator.MoveNext())
{
TSource item = _enumerator.Current;
if (_predicate(item))
{
_current = item;
return true;
}
}
Dispose();
break;
}
return false;
}
public override IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector) =>
new WhereSelectListIterator<TSource, TResult>(_source, _predicate, selector);
public override IEnumerable<TSource> Where(Func<TSource, bool> predicate) =>
new WhereListIterator<TSource>(_source, CombinePredicates(_predicate, predicate));
}
该迭代器将Where和Select两个操作合并,通过一个迭代器实现。
///
/// An iterator that filters, then maps, each item of a .
///
/// The type of the source list.
/// The type of the mapped items.
private sealed partial class WhereSelectListIterator<TSource, TResult> : Iterator<TResult>
{
private readonly List<TSource> _source;
private readonly Func<TSource, bool> _predicate;
private readonly Func<TSource, TResult> _selector;
private List<TSource>.Enumerator _enumerator;
public WhereSelectListIterator(List<TSource> source, Func<TSource, bool> predicate, Func<TSource, TResult> selector)
{
Debug.Assert(source != null);
Debug.Assert(predicate != null);
Debug.Assert(selector != null);
_source = source;
_predicate = predicate;
_selector = selector;
}
public override Iterator<TResult> Clone() =>
new WhereSelectListIterator<TSource, TResult>(_source, _predicate, _selector);
public override bool MoveNext()
{
switch (_state)
{
case 1:
_enumerator = _source.GetEnumerator();
_state = 2;
goto case 2;
case 2:
while (_enumerator.MoveNext())
{
TSource item = _enumerator.Current;
if (_predicate(item))
{
_current = _selector(item);
return true;
}
}
Dispose();
break;
}
return false;
}
public override IEnumerable<TResult2> Select<TResult2>(Func<TResult, TResult2> selector) =>
new WhereSelectListIterator<TSource, TResult2>(_source, _predicate, CombineSelectors(_selector, selector));
}
就是说形如Where(filter1).Where(filter2)…Where(filtern).Select(selector1)…Select(Selectorn)的调用, 无论有多少的个Select或多少的Where,在实际执行的时候,最后只有一个迭代器WhereSelectListIterator实例参与运算。所有的Where中的条件都会按照And的方式合并,所有Selector中的转化器都会按照Selectn(Selctorn-1(…Sector1))的方式进行合并。
上述结论对于Array和Enumerable容器同样适用,WhereEnumerableIterator ,WhereSelectEnumerableIterator, WhereArrayIterator ,WhereSelectArrayIterator源码与上述对应的迭代器对应代码类似,不再赘述。
上述结论不适用于Where的过滤方法需要索引参与运算的情况。
public static IEnumerable<TSource> Where<TSource>(this IEnumerable<TSource> source, Func<TSource, int, bool> predicate)
{
if (source == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
}
if (predicate == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.predicate);
}
return WhereIterator(source, predicate);
}
private static IEnumerable<TSource> WhereIterator<TSource>(IEnumerable<TSource> source, Func<TSource, int, bool> predicate)
{
int index = -1;
foreach (TSource element in source)
{
checked
{
index++;
}
if (predicate(element, index))
{
yield return element;
}
}
}
假设我有个一个List非空对象branchList,如果调用branchList.Where(s => s.BranchStatus == BranchStatus. Open ). Select(s => new {s.Name, S.Address}), 查询分行信息,为什么不加toList() 或foreach不能获取具体数据?
当前只能返回一个WhereSelectListIterator实例,只有通过toList或foreach循环,启动迭代器内部的MoveNext方法中启动该迭代器,从而获取数据。
在调用Where方法之前是否有必要进行List非空检查和List内元素个数不为0的检查?
假设我有个一个List非空对象branchList,如果调用branchList.Where(s => s.BranchStatus == BranchStatus. Open ).Where(s. City == “Beijing”).Select(s => new {s.Name, S.Address}), 查询正常运营并且坐落在北京的分行信息,并返回分行的名称和地址,具体执行流程是什么 ?
在Where扩展方法中,如果是一个和Where不相关的迭代器调用,例如DefaultIfEmptyIterator 也继承自 Iterator类,对于XX.DefaultIfEmpty().Where()的调用方式,由于类DefaultIfEmptyIterator并不包含Where方法, 所以一旦执行到Where扩展方法后, 下面Where扩展方法中的代码是否会报错?
if (source is Iterator iterator)
{
return iterator.Where(predicate);
}
程序不会报错。DefaultIfEmptyIterator迭代器或其它Linq中的迭代器都继承自Iterator,在Iterrator中已经定义了虚方法Where,派生类可以选择重写该方法,也可以不重写该方法。此处(iterator.Where(predicate))中的Where方法则是来自基类Iterator的Where方法,返回的是默认的WhereEnumerableIterator迭代器。
///
/// Combines two predicates.
///
/// The type of the predicate argument.
/// The first predicate to run.
/// The second predicate to run.
///
/// A new predicate that will evaluate to true only if both the first and
/// second predicates return true. If the first predicate returns false ,
/// the second predicate will not be run.
///
public static Func<TSource, bool> CombinePredicates<TSource>(Func<TSource, bool> predicate1, Func<TSource, bool> predicate2) =>
x => predicate1(x) && predicate2(x);
///
/// Combines two selectors.
///
/// The type of the first selector's argument.
/// The type of the second selector's argument.
/// The type of the second selector's return value.
/// The first selector to run.
/// The second selector to run.
///
/// A new selector that represents the composition of the first selector with the second selector.
///
public static Func<TSource, TResult> CombineSelectors<TSource, TMiddle, TResult>(Func<TSource, TMiddle> selector1, Func<TMiddle, TResult> selector2) =>
x => selector2(selector1(x));
public static IEnumerable<TResult> Select<TSource, TResult>(
this IEnumerable<TSource> source, Func<TSource, TResult> selector)
{
if (source == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
}
if (selector == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.selector);
}
if (source is Iterator<TSource> iterator)
{
return iterator.Select(selector);
}
if (source is IList<TSource> ilist)
{
if (source is TSource[] array)
{
return array.Length == 0 ?
Empty<TResult>() :
new SelectArrayIterator<TSource, TResult>(array, selector);
}
if (source is List<TSource> list)
{
return new SelectListIterator<TSource, TResult>(list, selector);
}
return new SelectIListIterator<TSource, TResult>(ilist, selector);
}
if (source is IPartition<TSource> partition)
{
IEnumerable<TResult>? result = null;
CreateSelectIPartitionIterator(selector, partition, ref result);
if (result != null)
{
return result;
}
}
return new SelectEnumerableIterator<TSource, TResult>(source, selector);
}