Distinct方法是LINQ代码库中的一个重要扩展方法,它可以帮忙我们将数组或其他集合中的重复元素过滤掉。
为了更好的使用该方法,我们从源码角度分析一下该方法,从而更好的了解其是如何将重复元素过滤,如何解决多线程同时读取的问题。
public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source) => Distinct(source, null);
public static IEnumerable<TSource> Distinct<TSource>(this IEnumerable<TSource> source, IEqualityComparer<TSource>? comparer)
{
if (source == null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
}
return new DistinctIterator<TSource>(source, comparer);
}
private sealed partial class DistinctIterator<TSource> : Iterator<TSource>
{
private readonly IEnumerable<TSource> _source;
private readonly IEqualityComparer<TSource>? _comparer;
private HashSet<TSource>? _set;
private IEnumerator<TSource>? _enumerator;
public DistinctIterator(IEnumerable<TSource> source, IEqualityComparer<TSource>? comparer)
{
Debug.Assert(source != null);
_source = source;
_comparer = comparer;
}
public override Iterator<TSource> Clone() {
return new DistinctIterator<TSource>(_source, _comparer);
}
public override bool MoveNext()
{
switch (_state)
{
case 1:
_enumerator = _source.GetEnumerator();
if (!_enumerator.MoveNext())
{
Dispose();
return false;
}
TSource element = _enumerator.Current;
_set = new HashSet<TSource>(DefaultInternalSetCapacity, _comparer);
_set.Add(element);
_current = element;
_state = 2;
return true;
case 2:
Debug.Assert(_enumerator != null);
Debug.Assert(_set != null);
while (_enumerator.MoveNext())
{
element = _enumerator.Current;
if (_set.Add(element))
{
_current = element;
return true;
}
}
break;
}
Dispose();
return false;
}
public override void Dispose()
{
if (_enumerator != null)
{
_enumerator.Dispose();
_enumerator = null;
_set = null;
}
base.Dispose();
}
}
}
}
结合DistinctIterator类的代码,如果我们通过foreach循环,逐个读取过滤后的元素,代码如下,其中Student类和比较器的代码见附录:
var stuList = studentList.Distinct(new StudentEqualityComparer());
foreach(var stu in stuList){
Console.WriteLine(stu.Name);
}
执行流程如下:
public static IEnumerable<TSource> DistinctBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector) => DistinctBy2(source, keySelector, null);
public static IEnumerable<TSource> DistinctBy<TSource, TKey>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey>? comparer)
{
if (source is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.source);
}
if (keySelector is null)
{
ThrowHelper.ThrowArgumentNullException(ExceptionArgument.keySelector);
}
return DistinctByIterator(source, keySelector, comparer);
}
private static IEnumerable<TSource> DistinctByIterator<TSource, TKey>(IEnumerable<TSource> source, Func<TSource, TKey> keySelector, IEqualityComparer<TKey>? comparer)
{
using IEnumerator<TSource> enumerator = source.GetEnumerator();
if (enumerator.MoveNext())
{
var set = new HashSet<TKey>(DefaultInternalSetCapacity, comparer);
do
{
TSource element = enumerator.Current;
if (set.Add(keySelector(element)))
{
yield return element;
}
}
while (enumerator.MoveNext());
}
}
Distinct或DistinctBy方法在实现上都是使用HasSet来过滤掉集合中的重复元素,支持延迟加载特性。
using System.Collections;
using System.Collections.Generic;
public static partial class Enumerable
{
internal abstract class Iterator<TSource> : IEnumerable<TSource>, IEnumerator<TSource>
{
private readonly int _threadId;
internal int _state;
internal TSource _current = default!;
///
/// Initializes a new instance of the class.
///
protected Iterator()
{
_threadId = Environment.CurrentManagedThreadId;
}
///
/// The item currently yielded by this iterator.
///
public TSource Current => _current;
///
/// Makes a shallow copy of this iterator.
///
///
/// This method is called if is called more than once.
///
public abstract Iterator<TSource> Clone();
///
/// Puts this iterator in a state whereby no further enumeration will take place.
///
///
/// Derived classes should override this method if necessary to clean up any
/// mutable state they hold onto (for example, calling Dispose on other enumerators).
///
public virtual void Dispose()
{
_current = default!;
_state = -1;
}
///
/// Gets the enumerator used to yield values from this iterator.
///
///
/// If is called for the first time on the same thread
/// that created this iterator, the result will be this iterator. Otherwise, the result
/// will be a shallow copy of this iterator.
///
public IEnumerator<TSource> GetEnumerator()
{
Iterator<TSource> enumerator = _state == 0 && _threadId == Environment.CurrentManagedThreadId ? this : Clone();
enumerator._state = 1;
return enumerator;
}
///
/// Retrieves the next item in this iterator and yields it via .
///
/// true if there was another value to be yielded; otherwise, false .
public abstract bool MoveNext();
///
/// Returns an enumerable that maps each item in this iterator based on a selector.
///
/// The type of the mapped items.
/// The selector used to map each item.
public virtual IEnumerable<TResult> Select<TResult>(Func<TSource, TResult> selector)
{
return new SelectEnumerableIterator<TSource, TResult>(this, selector);
}
///
/// Returns an enumerable that filters each item in this iterator based on a predicate.
///
/// The predicate used to filter each item.
public virtual IEnumerable<TSource> Where(Func<TSource, bool> predicate)
{
return new WhereEnumerableIterator<TSource>(this, predicate);
}
object? IEnumerator.Current => Current;
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
void IEnumerator.Reset() => throw new NotSupportedException();
}
}
Student类源码
public class Student : IEnumerator, IEnumerable {
public string Id { get; set; }
public string Name { get; set; }
public string Classroom { get; set; }
public Student(string id, string name, string classroom)
{
this.Id = id;
this.Name = name;
this.Classroom = classroom;
}
}
}