字符串搜索树

字符串搜索树支持使用字符串对对象建立索引,以便于查询。 这些字符串应该都是从对象的属性上获取的。而且可以按照指定的规则将字符串分解为单词。

Insert方法用来在一个单词和指定对象之间建立关联。

Remove方法用来在索引树中删除一个指定对象。

Prune方法用于在删除了大量对象后,优化索引树,删除那些没有作用的节点。

Find方法用来查找对象;支持通配符查找, 通配符为*,而且只能放在查找字符串的开始。


public class StringIndexTree<T>
  {
    public bool Insert(string key, T value);

    public bool Remove(T value);

    public void Prune();

    public IEnumerable<T> Find(string filter);
  }


为了测试, 演示窗口中还显示出了整个搜索树结构,包括每个节点下包含的对象个数。 下面就是demo的代码:

<Window x:Class="MySolution.Controls.Demo.StringIndexingWindow"
        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"        
        xmlns:infra="clr-namespace:Infrastructure;assembly=Infrastructure"        
        Title="StringIndexingWindow" Height="478" Width="862">
  <Grid>
    <Grid.ColumnDefinitions>
      <ColumnDefinition/>
      <ColumnDefinition/>
    </Grid.ColumnDefinitions>
    <Grid Grid.Column="1">
      <TreeView Name="tv">
        <TreeView.ItemTemplate>
          <HierarchicalDataTemplate ItemsSource="{Binding Children}">
            <Grid Width="100">
              <Grid.ColumnDefinitions>
                <ColumnDefinition/>
                <ColumnDefinition Width="Auto"/>
              </Grid.ColumnDefinitions>
              <TextBlock Text="{Binding Key}"/>
              <TextBlock Grid.Column="1" Text="{Binding Values.Count}" HorizontalAlignment="Right"/>
            </Grid>
          </HierarchicalDataTemplate>
        </TreeView.ItemTemplate>
      </TreeView>
    </Grid>
    <Grid>
      <Grid.RowDefinitions>
        <RowDefinition Height="30"/>
        <RowDefinition Height="30"/>
        <RowDefinition/>
      </Grid.RowDefinitions>


      <TextBox Grid.Row="0" Name="filterTextBox" TextChanged="filterTextBox_TextChanged"/>
      <StackPanel Grid.Row="1">
        <Button Content="Delete First Row" Click="Button_Click"/>
      </StackPanel>
      <ListView Name="lv" Grid.Row="2">
        <ListView.View>
          <GridView>
            <GridViewColumn Header="Name" DisplayMemberBinding="{Binding Name}"/>
            <GridViewColumn Header="Birth Date" DisplayMemberBinding="{Binding BirthDate}"/>
            <GridViewColumn Header="Address" DisplayMemberBinding="{Binding Address}"/>
          </GridView>
        </ListView.View>
      </ListView>


    </Grid>
  </Grid>
</Window>

后置代码如下:


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Shapes;
using Infrastructure;
using System.Collections.ObjectModel;


namespace MySolution.Controls.Demo
{
  /// <summary>
  /// Interaction logic for StringIndexingWindow.xaml
  /// </summary>
  public partial class StringIndexingWindow : Window
  {
    public StringIndexingWindow()
    {
      InitializeComponent();


      Loaded += new RoutedEventHandler(StringIndexingWindow_Loaded);
    }


    private ObservableCollection<Employee> emps;


    void StringIndexingWindow_Loaded(object sender, RoutedEventArgs e)
    {
      emps = new ObservableCollection<Employee>();
      Random r = new Random();


      for (int i = 0; i < 10000; i++)
      {
        Employee emp = new Employee();
        emp.Name = "zhang san" + i;
        emp.Address = "Beijing";


        emp.BirthDate = DateTime.Now.Subtract(TimeSpan.FromDays(r.Next(1000, 10000)));


        emps.Add(emp);
      }


      lv.ItemsSource = emps;


      BuildStringIndex();


      tv.ItemsSource = _indexs.Select(i => i.Root);
    }


    private List<StringIndexTree<Employee>> _indexs = new List<StringIndexTree<Employee>>();


    private void BuildStringIndex()
    {
      StringIndexTree<Employee> nameIndex = new StringIndexTree<Employee>();
      StringIndexTree<Employee> dateIndex = new StringIndexTree<Employee>();
      StringIndexTree<Employee> addrIndex = new StringIndexTree<Employee>();


      foreach (var item in emps)
      {       
        foreach (var word in item.Name.Split(new [] { ' '}, StringSplitOptions.RemoveEmptyEntries))
        {
          nameIndex.Insert(word, item);
        }


        foreach (var word in item.BirthDate.ToString().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
        {
          dateIndex.Insert(word, item);
        }


        foreach (var word in item.Address.ToString().Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries))
        {
          addrIndex.Insert(word, item);
        }
      }


      _indexs.Add(nameIndex);
      _indexs.Add(dateIndex);
      _indexs.Add(addrIndex);
    }


    private void filterTextBox_TextChanged(object sender, TextChangedEventArgs e)
    {
      if (filterTextBox.Text != string.Empty)
      {
        lv.ItemsSource = _indexs.SelectMany(i => i.Find(filterTextBox.Text)).Distinct();
      }
      else
      {
        lv.ItemsSource = emps;
      }
    }


    private void Button_Click(object sender, RoutedEventArgs e)
    {
      Employee emp = emps[0];
      emps.RemoveAt(0);
      //_index.Remove(emp);
      //_index.Prune();
      //tv.ItemsSource = new[] { _index.Root };
    }
  }


  public class Employee
  {
    public string Name { get; set; }


    public DateTime BirthDate { get; set; }


    public string Address { get; set; }
  }
}


StringIndexTree的代码:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;


namespace Infrastructure
{
  public class StringIndexTree<T>
  {
    public const char WildcardChar = '*';


    public StringIndexTree()
    {
      Root = new StringIndexTreeNode<T>(string.Empty);
    }


    public StringIndexTreeNode<T> Root { get; private set; }


    public bool Insert(string key, T value)
    {
      return Root.Insert(key, value);
    }    


    public bool Remove(T value)
    {
      return Root.Remove(value);
    }


    public void Prune()
    {
      Root.Prune();
    }


    public IEnumerable<T> Find(string filter)
    {
      if (string.IsNullOrWhiteSpace(filter))
      {
        throw new ArgumentException("key cannot be empty or white space", "key");
      }


      if (filter[0] == WildcardChar)
      {
        filter = filter.Substring(1);


        if (filter == string.Empty)
        {
          return Root.SelfAndDecendentValues;
        }
        else
        {
          var retNodes = Root.FindWithContain(filter);
          var result = new List<T>();


          foreach (var retNode in retNodes)
          {
            result.AddRange(retNode.SelfAndDecendentValues);
          }


          return result;
        }
      }
      else
      {
        var node = Root.FindFromStart(filter);
        return node != null ? node.SelfAndDecendentValues : new List<T>();
      }
    }
  }
}

StringIndexTreeNode的代码:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;


namespace Infrastructure
{
  public class StringIndexTreeNode<T>
  {
    public StringIndexTreeNode(string key)
    {
      if (key == null)
      {
        throw new ArgumentNullException("key");
      }


      Key = key;
      Children = new List<StringIndexTreeNode<T>>();
      Values = new HashSet<T>();
    }


    public StringIndexTreeNode(string key, T value)
      : this(key)
    {
      Values.Add(value);
    }


    public string Key { get; private set; }
    public HashSet<T> Values { get; private set; }
    public List<StringIndexTreeNode<T>> Children { get; private set; }


    public bool Insert(string key, T value)
    {
      if (key == null)
      {
        throw new ArgumentNullException("key");
      }


      //New key cannot be a child of current node
      if (key.Length < Key.Length)
      {
        return false;
      }


      //If key is same with current Key, add value; 
      //else the key is not child of current node
      if (key.Length == Key.Length)
      {
        if (Key.Equals(key, StringComparison.CurrentCultureIgnoreCase))
        {
          Values.Add(value);


          return true;
        }


        return false;
      }


      //Make sure current Key is start string of target key
      if (!key.StartsWith(Key, StringComparison.CurrentCultureIgnoreCase))
      {
        return false;
      }


      if (Children.Count == 0)
      {
        Children.Add(new StringIndexTreeNode<T>(key, value));
        return true;
      }


      //Check if child can accept the key
      foreach (var child in Children)
      {
        if (child.Insert(key, value))
        {
          return true;
        }
      }


      string bestCommonStart = Key;
      StringIndexTreeNode<T> sibling = null;


      foreach (var child in Children)
      {
        var commonStart = GetLongestCommonStartString(child.Key, key);
        if (commonStart.Length > bestCommonStart.Length)
        {
          bestCommonStart = commonStart;
          sibling = child;
        }
      }


      //No common with children, so add to current node
      if (bestCommonStart.Length == Key.Length)
      {
        Children.Add(new StringIndexTreeNode<T>(key, value));
        return true;
      }


      //has common with children            
      Children.Remove(sibling);
      var commonNode = new StringIndexTreeNode<T>(bestCommonStart, value);
      Children.Add(commonNode);


      commonNode.Children.Add(new StringIndexTreeNode<T>(key, value));
      commonNode.Children.Add(sibling);


      return true;
    }


    private string GetLongestCommonStartString(string str1, string str2)
    {
      var str1Lower = str1.ToLower();
      var str2Lower = str2.ToLower();


      for (int i = 0; i < Math.Min(str1.Length, str2.Length); i++)
      {
        if (str1Lower[i] != str2Lower[i])
        {
          return str1.Substring(0, i);
        }
      }


      return string.Empty;
    }


    public bool Remove(T value)
    {
      bool result = Values.Remove(value);


      foreach (var child in Children)
      {
        if (child.Remove(value))
        {
          result = true;
        }
      }


      return result;
    }


    public void Prune()
    {
      for (int i = 0; i < Children.Count; i++)
      {
        if (Children[i].Values.Count == 0)
        {
          if (Children[i].Children.Count == 0)
          {
            Children.RemoveAt(i--);
            continue;
          }
          else if (Children[i].Children.Count == 1)
          {
            var grandchild = Children[i].Children[0];
            Children.RemoveAt(i--);
            Children.Add(grandchild);
            continue;
          }
        }


        Children[i].Prune();
      }
    }


    public StringIndexTreeNode<T> FindFromStart(string filter)
    {
      if (filter == null)
      {
        throw new ArgumentNullException("filter");
      }


      if (filter.Length <= Key.Length)
      {
        return Key.StartsWith(filter, StringComparison.CurrentCultureIgnoreCase) ? this : null;
      }


      foreach (var child in Children)
      {
        var result = child.FindFromStart(filter);


        if (result != null)
        {
          return result;
        }
      }


      return null;
    }


    public IEnumerable<StringIndexTreeNode<T>> FindWithContain(string filter)
    {
      var returnValue = new List<StringIndexTreeNode<T>>();


      if (Key.Contains(filter))
      {
        returnValue.Add(this);
      }


      foreach (var child in Children)
      {
        returnValue.AddRange(child.FindWithContain(filter));
      }


      return returnValue;
    }


    public List<T> SelfAndDecendentValues
    {
      get
      {
        return Values
          .Concat(Children.SelectMany(c => c.SelfAndDecendentValues))
          .ToList();
      }
    }
  }
}


你可能感兴趣的:(.net,String)