FastStringCollection

image6Hi, today I would to share with you some very interesting piece of code that is very advanced one. A long time ago I fail with trying building replacement of StringCollection collection. I do not know how this works but today I prepare entire code in about 4 hours and everything works pretty amazing. So I will show you at the beginning console application for testing benchmarking and so on… later console application output and at the end about 560 lines of code that is FastStringCollection implementation.

So, let’s start with testing application.

using System;
using System.Collections.Specialized;
using System.Diagnostics;
using System.Text;
using System.Collections;

namespace FastStringCollectionSandbox {
  class Program {
    static string CreatePrintFor(IList collection) {
      var builder = new StringBuilder();
      for (var i = 0; i < collection.Count; ++i) {
        var element = collection[i];
        builder.Append(element);
        builder.Append(" ");
      }
      if (builder.Length != 0)
        builder.Remove(builder.Length - 1, 1);
      return builder.ToString();
    }

    static string CreatePrintIdxFor(IList collection) {
      var builder = new StringBuilder();
      for (var i = 0; i < collection.Count; ++i) {
        var element = collection[i];
        builder.Append(collection.IndexOf(element));
        builder.Append(" ");
      }
      if (builder.Length != 0)
        builder.Remove(builder.Length - 1, 1);
      return builder.ToString();
    }

    static string CreatePrint(IList collection) {
      var builder = new StringBuilder();
      foreach (var element in collection) {
        builder.Append(element);
        builder.Append(" ");
      }
      if (builder.Length != 0)
        builder.Remove(builder.Length - 1, 1);
      return builder.ToString();
    }

    static string CreatePrintIdx(IList collection) {
      var builder = new StringBuilder();
      foreach (var element in collection) {
        builder.Append(collection.IndexOf(element));
        builder.Append(" ");
      }
      if (builder.Length != 0)
        builder.Remove(builder.Length - 1, 1);
      return builder.ToString();
    }

    static void WriteColor(Action writeAction, ConsoleColor color) {
      var ocolor = Console.ForegroundColor;
      Console.ForegroundColor = color;
      writeAction.Invoke();
      Console.ForegroundColor = ocolor;
    }

    static StringCollection cSlowDebug;
    static FastStringCollection cFastDebug;

    static StringCollection cSlow;
    static FastStringCollection cFast;

    static void TestInit() {
      WriteColor(() => Console.WriteLine("CLEAN"),
        ConsoleColor.White);

      cSlowDebug = new StringCollection();
      cFastDebug = new FastStringCollection();

      cSlow = new StringCollection();
      cFast = new FastStringCollection();
    }

    static void Debug(bool debug, Action<IList> action) {
      action.Invoke(cSlowDebug);
      if (debug)
        Debugger.Break();
      // step into twice.
      action.Invoke(cFastDebug);
    }

    static void Test(string msg, Action<IList> action) {
      action.Invoke(cSlow);

      Exception exception = null;
      try {
        action.Invoke(cFast);
      }
      catch (Exception e) {
        exception = e;
      }

      var contentSlowFor = CreatePrintFor(cSlow);
      var indexesSlowFor = CreatePrintIdxFor(cSlow);
      var contentFastFor = CreatePrintFor(cFast);
      var indexesFastFor = CreatePrintIdxFor(cFast);
      var contentSlow = CreatePrint(cSlow);
      var indexesSlow = CreatePrintIdx(cSlow);
      var contentFast = CreatePrint(cFast);
      var indexesFast = CreatePrintIdx(cFast);

      var ok =
        contentSlow == contentFast
        && indexesSlow == indexesFast
        && contentSlowFor == contentFastFor
        && indexesSlowFor == indexesFastFor
        ;

      var ex = exception != null;

      if (ex)
        WriteColor(() => Console.Write("EXCEPTION "),
          ConsoleColor.Red);
      else
        WriteColor(() => Console.Write(ok ? "OK " : "FAIL "),
          ok ? ConsoleColor.Green : ConsoleColor.Red);

      Console.WriteLine(msg);

      if (ex)
        Console.WriteLine(exception);

      if (!ok) {
        WriteColor(() => Console.WriteLine("BEFORE"),
          ConsoleColor.Yellow);
        Console.WriteLine("SLOWI: {0}", CreatePrintFor(cSlowDebug));
        Console.WriteLine("SLOWI: {0}", CreatePrintIdxFor(cSlowDebug));
        Console.WriteLine("FASTI: {0}", CreatePrintFor(cFastDebug));
        Console.WriteLine("FASTI: {0}", CreatePrintIdxFor(cFastDebug));
        Console.WriteLine("SLOWL: {0}", CreatePrint(cSlowDebug));
        Console.WriteLine("SLOWL: {0}", CreatePrintIdx(cSlowDebug));
        Console.WriteLine("FASTL: {0}", CreatePrint(cFastDebug));
        Console.WriteLine("FASTL: {0}", CreatePrintIdx(cFastDebug));
        WriteColor(() => Console.WriteLine("AFTER"),
                  ConsoleColor.Yellow);
        Console.WriteLine("SLOWI: {0}", contentSlowFor);
        Console.WriteLine("SLOWI: {0}", indexesSlowFor);
        Console.WriteLine("FASTI: {0}", contentFastFor);
        Console.WriteLine("FASTI: {0}", indexesFastFor);
        Console.WriteLine("SLOWL: {0}", contentSlow);
        Console.WriteLine("SLOWL: {0}", indexesSlow);
        Console.WriteLine("FASTL: {0}", contentFast);
        Console.WriteLine("FASTL: {0}", indexesFast);

        Console.WriteLine("Press any key to debug again this test.");
        Console.ReadKey();
      }

      Debug(!ok, action);
    }

    static void TestCollectionBasic() {
      TestInit();

      Test(@"c.Add(""A"")",
        c => c.Add("A"));
      Test(@"c.Add(""AA"")",
        c => c.Add("AA"));
      Test(@"c.Add(""AAA"")",
        c => c.Add("AAA"));
      Test(@"c.Remove(""AA"")",
        c => c.Remove("AA"));
      Test(@"c.Add(""AA"")",
        c => c.Add("AA"));
    }

    static void TestCollectionsAdvance() {
      TestInit();

      Test(@"c.Add(""AA"")",
        c => c.Add("AA"));
      Test(@"c.Add(""AB"")",
        c => c.Add("AB"));
      Test(@"c.Remove(""AA"")",
        c => c.Remove("AA"));
      Test(@"c.Remove(""AB"")",
        c => c.Remove("AB"));
      Test(@"c.Add(""A"")",
        c => c.Add("A"));
      Test(@"c.Remove(""A"")",
        c => c.Remove("A"));
      Test(@"c.Add(""A"")",
        c => c.Add("A"));
      Test(@"c.RemoveAt(0)",
        c => c.RemoveAt(0));
      Test(@"c.Add(""B"")",
        c => c.Add("B"));
      Test(@"c.Add(""C"")",
        c => c.Add("C"));
      Test(@"c.Remove(""B"")",
        c => c.Remove("B"));
      Test(@"c.Add(""D"")",
        c => c.Add("D"));
      Test(@"c.Add(""E"")",
        c => c.Add("E"));
      Test(@"c.Add(""D"")",
        c => c.Add("D"));
      Test(@"c.Remove(""C"")",
        c => c.Remove("C"));
      Test(@"c.Remove(""D"")",
        c => c.Remove("D"));
      Test(@"c.Remove(""E"")",
        c => c.Remove("E"));
      Test(@"c.Remove(""D"")",
        c => c.Remove("D"));
      Test(@"c.Add(""F"")",
        c => c.Add("F"));
      Test(@"c.RemoveAt(0)",
        c => c.RemoveAt(0));
      Test(@"c.Add(""G"")",
        c => c.Add("G"));
      Test(@"c.RemoveAt(c.IndexOf(""G""))",
        c => c.RemoveAt(c.IndexOf("G")));
      Test(@"c.Add(""F"")",
        c => c.Add("F"));
      Test(@"c.Add(""G"")",
        c => c.Add("G"));
      Test(@"c => c[c.IndexOf(""F"")] = ""K""",
        c => c[c.IndexOf("F")] = "K");
      // not implemented
      //Test(@"c.Insert(1, ""X"")",
      //  c => c.Insert(1, "X"));
      Test(@"c.Add(""Y"")",
        c => c.Add("Y"));
      // not implemented
      //Test(@"c.Insert(1, ""Z"")",
      //  c => c.Insert(1, "Z"));
      Test(@"c.RemoveAt(0)",
        c => c.RemoveAt(0));
      Test(@"For all: Remove(c[0])",
        c =>
        {
          for (int i = 0; i < c.Count; ++i) {
            var elem = c[0].ToString();
            c.Remove(elem);
          }
        }
      );
    }

    static void BenchmarkAction<T>(
      string actionName, Action<IList, T> action,
      IList collection, T[] args, int count) {
      GC.Collect();
      var meter = Stopwatch.StartNew();
      for (var i = 0; i < count; ++i)
        action.Invoke(collection, args[i]);
      meter.Stop();
      Console.Write("{0} {1} elements took {2} ms, ",
        actionName, count, meter.ElapsedMilliseconds);
      meter.Reset();
      var mem = GC.GetTotalMemory(true);
      Console.WriteLine("(MEM: {0}).", mem);
    }

    static void BenchmarkCollection(IList collection, int count) {
      Console.WriteLine("Test of {0} {1} times.", collection.GetType().Name, count);
      var meter = new Stopwatch();

      var argsS = new string[count];
      var argsI = new int[count];
      for (var i = 0; i < count; ++i) {
        argsS[i] = i.ToString();
        argsI[i] = count - i - 1;
      }
      BenchmarkAction<string>("Add", (c, t) => c.Add(t), collection, argsS, count);
      BenchmarkAction<string>("Contains", (c, t) => c.Contains(t), collection, argsS, count);
      BenchmarkAction<string>("Remove", (c, t) => c.Remove(t), collection, argsS, count);
      // TODO
      //BenchmarkAction<string>("Insert", (c, t) => c.Insert(0, t), collection, argsS, count);
      BenchmarkAction<string>("Add", (c, t) => c.Add(t), collection, argsS, count);
      BenchmarkAction<int>("c[t]", (c, t) => { var e = c[t]; }, collection, argsI, count);
      BenchmarkAction<string>("IndexOf", (c, t) => { var i = c.IndexOf(t); }, collection, argsS, count);
      BenchmarkAction<int>("RemoveAt", (c, t) => c.RemoveAt(t), collection, argsI, count);
    }


    static void Main() {
      WriteColor(() => Console.WriteLine("TESTS:"),
        ConsoleColor.Magenta);

      TestCollectionBasic();
      TestCollectionsAdvance();

      WriteColor(() => Console.WriteLine("BENCHMARKS (StringCollection):"),
        ConsoleColor.Magenta);

      BenchmarkCollection(new StringCollection(), 500);
      BenchmarkCollection(new StringCollection(), 1000);
      BenchmarkCollection(new StringCollection(), 50000);
      BenchmarkCollection(new StringCollection(), 100000);
      BenchmarkCollection(new StringCollection(), 500000);

      WriteColor(() => Console.WriteLine("BENCHMARKS (FastStringCollection):"),
        ConsoleColor.Magenta);

      BenchmarkCollection(new FastStringCollection(), 500);
      BenchmarkCollection(new FastStringCollection(), 1000);
      BenchmarkCollection(new FastStringCollection(), 50000);
      BenchmarkCollection(new FastStringCollection(), 100000);
      BenchmarkCollection(new FastStringCollection(), 500000);

      WriteColor(() => Console.WriteLine("Press any key to continue..."),
        ConsoleColor.White);

      Console.ReadKey();
    }
  }
}

As you can see it is not a big deal. I simply check all methods and behaviors of the StringCollection and I try to compare results with FastStringCollection that is my implementation. Results of the testing application is shown below.

TESTS:
CLEAN
OK c.Add("A")
OK c.Add("AA")
OK c.Add("AAA")
OK c.Remove("AA")
OK c.Add("AA")
CLEAN
OK c.Add("AA")
OK c.Add("AB")
OK c.Remove("AA")
OK c.Remove("AB")
OK c.Add("A")
OK c.Remove("A")
OK c.Add("A")
OK c.RemoveAt(0)
OK c.Add("B")
OK c.Add("C")
OK c.Remove("B")
OK c.Add("D")
OK c.Add("E")
OK c.Add("D")
OK c.Remove("C")
OK c.Remove("D")
OK c.Remove("E")
OK c.Remove("D")
OK c.Add("F")
OK c.RemoveAt(0)
OK c.Add("G")
OK c.RemoveAt(c.IndexOf("G"))
OK c.Add("F")
OK c.Add("G")
OK c => c[c.IndexOf("F")] = "K"
OK c.Add("Y")
OK c.RemoveAt(0)
OK For all: Remove(c[0])
BENCHMARKS (StringCollection):
Test of StringCollection 500 times.
Add 500 elements took 0 ms, (MEM: 73116).
Contains 500 elements took 0 ms, (MEM: 73148).
Remove 500 elements took 0 ms, (MEM: 73180).
Add 500 elements took 0 ms, (MEM: 73212).
c[t] 500 elements took 0 ms, (MEM: 73244).
IndexOf 500 elements took 0 ms, (MEM: 73276).
RemoveAt 500 elements took 0 ms, (MEM: 61332).
Test of StringCollection 1000 times.
Add 1000 elements took 0 ms, (MEM: 89356).
Contains 1000 elements took 3 ms, (MEM: 89356).
Remove 1000 elements took 0 ms, (MEM: 89356).
Add 1000 elements took 0 ms, (MEM: 89356).
c[t] 1000 elements took 0 ms, (MEM: 89356).
IndexOf 1000 elements took 3 ms, (MEM: 89356).
RemoveAt 1000 elements took 0 ms, (MEM: 65380).
Test of StringCollection 50000 times.
Add 50000 elements took 0 ms, (MEM: 1915408).
Contains 50000 elements took 9144 ms, (MEM: 1915408).
Remove 50000 elements took 1676 ms, (MEM: 1915408).
Add 50000 elements took 0 ms, (MEM: 1915408).
c[t] 50000 elements took 0 ms, (MEM: 1915408).
IndexOf 50000 elements took 8237 ms, (MEM: 1915408).
RemoveAt 50000 elements took 0 ms, (MEM: 519432).
Test of StringCollection 100000 times.
Add 100000 elements took 1 ms, (MEM: 3777552).
Contains 100000 elements took 37617 ms, (MEM: 3777552).
Remove 100000 elements took 6839 ms, (MEM: 3777552).
Add 100000 elements took 1 ms, (MEM: 3777552).
c[t] 100000 elements took 1 ms, (MEM: 3777552).
IndexOf 100000 elements took 34249 ms, (MEM: 3777552).
RemoveAt 100000 elements took 1 ms, (MEM: 981576).
Test of StringCollection 500000 times.
Add 500000 elements took 9 ms, (MEM: 19750416).
Contains 500000 elements took 1002587 ms, (MEM: 19750416).
Remove 500000 elements took 165937 ms, (MEM: 19750416).
Add 500000 elements took 7 ms, (MEM: 19750416).
c[t] 500000 elements took 5 ms, (MEM: 19750416).
IndexOf 500000 elements took 913897 ms, (MEM: 19750416).
RemoveAt 500000 elements took 5 ms, (MEM: 4154440).
BENCHMARKS (FastStringCollection):
Test of FastStringCollection 500 times.
Add 500 elements took 0 ms, (MEM: 234352).
Contains 500 elements took 0 ms, (MEM: 234352).
Remove 500 elements took 1 ms, (MEM: 76740).
Add 500 elements took 0 ms, (MEM: 236412).
c[t] 500 elements took 0 ms, (MEM: 236412).
IndexOf 500 elements took 0 ms, (MEM: 236412).
RemoveAt 500 elements took 1 ms, (MEM: 64764).
Test of FastStringCollection 1000 times.
Add 1000 elements took 1 ms, (MEM: 410800).
Contains 1000 elements took 0 ms, (MEM: 410800).
Remove 1000 elements took 3 ms, (MEM: 94836).
Add 1000 elements took 1 ms, (MEM: 414908).
c[t] 1000 elements took 0 ms, (MEM: 414908).
IndexOf 1000 elements took 0 ms, (MEM: 414908).
RemoveAt 1000 elements took 3 ms, (MEM: 70860).
Test of FastStringCollection 50000 times.
Add 50000 elements took 188 ms, (MEM: 17956056).
Contains 50000 elements took 20 ms, (MEM: 17956056).
Remove 50000 elements took 710 ms, (MEM: 2178944).
Add 50000 elements took 181 ms, (MEM: 18218216).
c[t] 50000 elements took 24 ms, (MEM: 18218216).
IndexOf 50000 elements took 1117 ms, (MEM: 18218216).
RemoveAt 50000 elements took 1309 ms, (MEM: 782968).
Test of FastStringCollection 100000 times.
Add 100000 elements took 440 ms, (MEM: 35858200).
Contains 100000 elements took 40 ms, (MEM: 35858200).
Remove 100000 elements took 2595 ms, (MEM: 4303232).
Add 100000 elements took 440 ms, (MEM: 36382504).
c[t] 100000 elements took 57 ms, (MEM: 36382504).
IndexOf 100000 elements took 4427 ms, (MEM: 36382504).
RemoveAt 100000 elements took 4852 ms, (MEM: 1507256).
Test of FastStringCollection 500000 times.
Add 500000 elements took 2356 ms, (MEM: 180151064).
Contains 500000 elements took 226 ms, (MEM: 180151064).
Remove 500000 elements took 57909 ms, (MEM: 21848960).
Add 500000 elements took 2407 ms, (MEM: 182248232).
c[t] 500000 elements took 314 ms, (MEM: 182248232).
IndexOf 500000 elements took 110035 ms, (MEM: 182248232).
RemoveAt 500000 elements took 112580 ms, (MEM: 6252984).
Press any key to continue...

Results are great I think. All operations on the FastStringCollection collection takes time that grow up linear. As you can see all behaviors of the collection are also the same. Ok so right now is time to show you my 4 hours production.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;

namespace FastStringCollectionSandbox {
  public class FastStringCollection : IList, ICollection, IEnumerable {
    private const int DefKey = -1;
    private const string DefValue = null;

    public FastStringCollection() {
      Count = 0;
      IsReadOnly = false;
      IsFixedSize = false;
      IsSynchronized = true;
      SyncRoot = new object();
    }

    public int Count { get; private set; }
    public bool IsReadOnly { get; private set; }

    public bool IsFixedSize { get; private set; }

    public bool IsSynchronized { get; private set; }
    public object SyncRoot { get; private set; }

    private IndexStrings<int> _keys = new IndexStrings<int>(DefKey);
    private IndexIntegers<string> _data = new IndexIntegers<string>(DefValue);

    private Stack<int> _indexes = new Stack<int>();
    private List<int> _keysMap = new List<int>();

    public string this[int index] {
      get {
        var idx = _keysMap[index];
        var val = _data.IndexOf(idx);
        return val;
      }
      set {
        lock (SyncRoot) {
          var idx = _keysMap[index];
          var val = _data.IndexOf(idx);
          _keys.Replace(index, idx, value);
          _data.Replace(val, value, idx);
        }
      }
    }

    object IList.this[int index] {
      get { return this[index]; }
      set { this[index] = value.ToString(); }
    }

    public int Add(string value) {
      lock (SyncRoot) {
        var index = Count;
        var reuse = _indexes.Count != 0;
        if (reuse)
          index = _indexes.Pop();
        var exist = _keys.IndexOf(value);
        if (exist == DefKey)
          _keys.Add(index, value);
        else
          _keys.AddExist(exist, index, value);
        _keysMap.Add(index);
        _data.Add(value, index);
        ++Count;
        return index;
      }
    }

    public int Add(object value) {
      return Add(value.ToString());
    }

    public void AddRange(string[] value) {
      lock (SyncRoot) {
        foreach (string item in value)
          Add(item);
      }
    }

    public void Clear() {
      lock (SyncRoot) {
        Count = 0;
        _keys = new IndexStrings<int>(DefKey);
        _data = new IndexIntegers<string>(DefValue);
        _indexes = new Stack<int>();
        _keysMap = new List<int>();
      }
    }

    public bool Contains(string value) {
      lock (SyncRoot) {
        return _keys.IndexOf(value) != DefKey;
      }
    }

    public bool Contains(object value) {
      return Contains(value.ToString());
    }

    public int IndexOf(string value) {
      lock (SyncRoot) {
        if (value == null) return -1;
        var index = _keys.IndexOf(value);
        index = _keysMap.IndexOf(index);
        return index;
      }
    }

    public int IndexOf(object value) {
      if (value == null) return -1;
      return IndexOf(value.ToString());
    }

    public void Remove(string value) {
      lock (SyncRoot) {
        var index = _keys.IndexOf(value);
        Remove(index, value);
      }
    }

    public void Remove(object value) {
      Remove(value.ToString());
    }

    public void RemoveAt(int index) {
      lock (SyncRoot) {
        index = _keysMap[index];
        var value = _data.IndexOf(index);
        Remove(index, value);
      }
    }

    private void Remove(int index, string value) {
      _keys.Remove(index, value);
      _keysMap.Remove(index);
      _data.Remove(value, index);
      _indexes.Push(index);
      --Count;
    }

    public IEnumerator GetEnumerator() {
      return _data.GetEnumerator().GetEnumerator();
    }

    public void CopyTo(string[] array, int index) {
      lock (SyncRoot) {
        for (var i = 0; i < array.Length; ++i)
          array[i] = this[index + i];
      }
    }

    public void CopyTo(Array array, int index) {
      lock (SyncRoot) {
        for (var i = 0; i < array.Length; ++i)
          array.SetValue(this[index + i], i);
      }
    }

    // TODO
    public void Insert(int index, string value) {
      lock (SyncRoot) {
        throw new NotImplementedException();
      }
    }

    public void Insert(int index, object value) {
      Insert(index, value.ToString());
    }
  }

  internal class IndexStrings<T> {
    private readonly IndexStrings<T> _indexes;
    private IndexStrings<T> _parent;
    private Dictionary<char, IndexStrings<T>> _tree;
    private T[] _nodes;
    private T _value;
    private T _def;
    private char[] _keys;
    private char? _nodeId;

    public IndexStrings(T def, IndexStrings<T> indexes = null) {
      _indexes = indexes ?? new IndexStrings<T>(def, this);
      _tree = new Dictionary<char, IndexStrings<T>>();
      _keys = new char[0];
      _value = def;
      _def = def;
    }

    private void AddTree(IndexStrings<T> node, char idx, IndexStrings<T> child) {
      node._tree.Add(idx, child);
      var keys = node._keys.ToList();
      keys.Add(idx);
      node._keys = keys.ToArray();
    }

    private void RemoveTree(IndexStrings<T> node, char idx) {
      node._tree.Remove(idx);
      var keys = node._keys.ToList();
      keys.Remove(idx);
      node._keys = keys.ToArray();
    }

    private IndexStrings<T> Add(char idx) {
      if (_tree.ContainsKey(idx))
        return _tree[idx];

      var child = new IndexStrings<T>(_def, _indexes) { _parent = this, _nodeId = idx, _nodes = null };

      AddTree(this, idx, child);

      return child;
    }

    private IndexStrings<T> Get(char idx) {
      if (!_tree.ContainsKey(idx))
        return this;

      return _tree[idx];
    }

    private IndexStrings<T> GetVal(char idx, out T val) {
      if (!_tree.ContainsKey(idx)) {
        val = _def;
        return this;
      }

      val = _tree[idx]._value;
      return _tree[idx];
    }

    private IndexStrings<T> GetIndexChar(char idx, out char index) {
      index = idx;
      if (_tree.ContainsKey(idx))
        return this;

      return _tree[idx];
    }

    public void Add(T key, string value) {
      var t = _indexes;
      var idx = char.MinValue;

      for (var i = 0; i < value.Length; i++) {
        idx = value[i];
        t = t.Add(idx);
      }

      if (t._nodes != null) {
        var nodes = t._nodes.ToList();
        nodes.Add(key);
        t._nodes = nodes.ToArray();
      }
      else
        t._nodes = new[] { key };

      t._value = key;
    }

    public void AddExist(T exist, T key, string value) {
      var t = _indexes;
      var idx = char.MinValue;

      for (var i = 0; i < value.Length; i++) {
        idx = value[i];
        t = t.Add(idx);
      }

      if (t._nodes != null) {
        var nodes = t._nodes.ToList();
        nodes.Add(key);
        t._nodes = nodes.ToArray();
      }
      else
        t._nodes = new[] { key };

      t._value = exist;
    }

    public void Replace(T key, T keyNew, string value) {
      var t = _indexes;
      var idx = char.MinValue;

      for (var i = 0; i < value.Length; ++i) {
        idx = value[i];
        t = t.Add(idx);
      }

      if (t._nodes != null) {
        var nodes = t._nodes.ToList();
        nodes[nodes.IndexOf(key)] = keyNew;
        t._nodes = nodes.ToArray();
      }
      else
        t._nodes = new[] { key };

      t._value = keyNew;
    }

    public void Remove(T key, string value) {
      var t = _indexes;
      var idx = char.MinValue;

      for (var i = 0; i < value.Length; ++i) {
        idx = value[i];
        t = t.Get(idx);
      }

      if (t._nodes != null) {
        var nodes = t._nodes.ToList();
        nodes.Remove(key);
        t._nodes = nodes.Count == 0 ? null : nodes.ToArray();
      }
      t._value = t._nodes != null ? t._nodes[0] : _def;

      CleanUp(t);
    }

    private void CleanUp(IndexStrings<T> node) {
      var t = node;
      while (t._nodeId != null && t._nodes == null) {
        var parent = t._parent;
        if (t._tree.Count == 0)
          RemoveTree(parent, t._nodeId.Value);
        t = parent;
      }
    }

    public T[] Get(string value) {
      var nodeIdx = char.MinValue;
      var t = _indexes;
      var idx = char.MinValue;

      for (var i = 0; i < value.Length; i++) {
        idx = value[i];
        t = t.GetIndexChar(idx, out nodeIdx);
        t = t.Get(idx);
      }

      return t._nodes ?? new T[0];
    }

    public T IndexOf(string value) {
      var t = _indexes;
      var idx = char.MinValue;
      var val = _def;

      for (var i = 0; i < value.Length; ++i) {
        idx = value[i];
        t = t.GetVal(idx, out val);
      }

      return val;
    }

    public IEnumerable<T> GetEnumerator() {
      for (var i = 0; i < _indexes._keys.Length; ++i) {
        var key = _indexes._keys[i];
        var nodes = _indexes._tree[key]._nodes;

        for (var j = 0; j < nodes.Length; ++j) {
          yield return nodes[j];
        }
      }
    }

    public override int GetHashCode() {
      return _tree == null ? 0 : _tree.Keys.GetHashCode();
    }
  }

  internal class IndexIntegers<T> {
    private readonly IndexIntegers<T> _indexes;
    private IndexIntegers<T> _parent;
    private Dictionary<char, IndexIntegers<T>> _tree;
    private T[] _nodes;
    private T _value;
    private T _def;
    private char[] _keys;
    private char? _nodeId;

    public IndexIntegers(T def, IndexIntegers<T> indexes = null) {
      _indexes = indexes ?? new IndexIntegers<T>(def, this);
      _tree = new Dictionary<char, IndexIntegers<T>>();
      _keys = new char[0];
      _value = def;
      _def = def;
    }

    private void AddTree(IndexIntegers<T> node, char idx, IndexIntegers<T> child) {
      node._tree.Add(idx, child);
      var keys = node._keys.ToList();
      keys.Add(idx);
      node._keys = keys.ToArray();
    }

    private void RemoveTree(IndexIntegers<T> node, char idx) {
      node._tree.Remove(idx);
      var keys = node._keys.ToList();
      keys.Remove(idx);
      node._keys = keys.ToArray();
    }

    private IndexIntegers<T> Add(char idx) {
      if (_tree.ContainsKey(idx))
        return _tree[idx];

      var child = new IndexIntegers<T>(_def, _indexes) { _parent = this, _nodeId = idx, _nodes = null };

      AddTree(this, idx, child);

      return child;
    }

    private IndexIntegers<T> Get(char idx) {
      if (!_tree.ContainsKey(idx))
        return this;

      return _tree[idx];
    }

    private IndexIntegers<T> GetVal(char idx, out T val) {
      if (!_tree.ContainsKey(idx)) {
        val = _def;
        return this;
      }

      val = _tree[idx]._value;
      return _tree[idx];
    }

    private IndexIntegers<T> GetIndexChar(char idx, out char index) {
      if (!_tree.ContainsKey(idx)) {
        index = idx;
        return this;
      }

      index = idx;
      return _tree[idx];
    }

    public void Add(T key, int value) {
      var t = _indexes;
      var idx = char.MinValue;

      var svalue = value.ToString();
      for (var i = 0; i < svalue.Length; ++i) {
        idx = svalue[i];
        t = t.Add(idx);
      }

      if (t._nodes != null) {
        var nodes = t._nodes.ToList();
        nodes.Add(key);
        t._nodes = nodes.ToArray();
      }
      else
        t._nodes = new[] { key };

      t._value = key;
    }

    public void Replace(T key, T keyNew, int value) {
      var t = _indexes;
      var idx = char.MinValue;

      var svalue = value.ToString();
      for (var i = 0; i < svalue.Length; ++i) {
        idx = svalue[i];
        t = t.Add(idx);
      }

      if (t._nodes != null) {
        var nodes = t._nodes.ToList();
        nodes[nodes.IndexOf(key)] = keyNew;
        t._nodes = nodes.ToArray();
      }
      else
        t._nodes = new[] { key };

      t._value = keyNew;
    }

    public void Remove(T key, int value) {
      var t = _indexes;
      var idx = char.MinValue;

      var svalue = value.ToString();
      for (var i = 0; i < svalue.Length; ++i) {
        idx = svalue[i];
        t = t.Get(idx);
      }

      if (t._nodes != null) {
        var nodes = t._nodes.ToList();
        nodes.Remove(key);
        t._nodes = nodes.Count == 0 ? null : nodes.ToArray();
      }
      t._value = t._nodes != null ? t._nodes[0] : _def;

      CleanUp(t);
    }

    private void CleanUp(IndexIntegers<T> node) {
      var t = node;
      while (t._nodeId != null && t._nodes == null) {
        var parent = t._parent;
        if (t._tree.Count == 0)
          RemoveTree(parent, t._nodeId.Value);
        t = parent;
      }
    }

    public T[] Get(int value) {
      var nodeIdx = char.MinValue;
      var t = _indexes;
      var idx = char.MinValue;

      var svalue = value.ToString();
      for (var i = 0; i < svalue.Length; i++) {
        idx = svalue[i];
        t = t.GetIndexChar(idx, out nodeIdx);
        t = t.Get(idx);
      }

      return t._nodes ?? new T[0];
    }

    public T IndexOf(int value) {
      var t = _indexes;
      var idx = char.MinValue;
      var val = _def;

      var svalue = value.ToString();
      for (var i = 0; i < svalue.Length; ++i) {
        idx = svalue[i];
        t = t.GetVal(idx, out val);
      }

      return val;
    }

    public IEnumerable<T> GetEnumerator() {
      for (var i = 0; i < _indexes._keys.Length; ++i) {
        var key = _indexes._keys[i];
        var nodes = _indexes._tree[key]._nodes;

        for (var j = 0; j < nodes.Length; ++j) {
          yield return nodes[j];
        }
      }
    }

    public override int GetHashCode() {
      return _tree == null ? 0 : _tree.Keys.GetHashCode();
    }
  }
}

This is production of all my experience right now. As I mention before I fail a few time with the same problem. Todays implementation works great, except two thing, Remove and RemoveAt methods work as the same speed as similar methods on List collection. Another thing is that Insert method not work yet. Implementation of it cannot be trivial I think, so it is something for you. StringCollection can store null and in my implementation it not working. I have a few ideas how to implement stroring of null, but I decide to not implement this functionality yet. If you have too much time you can examine StringCollection with 1 million of Contains method invocation on 1 million elements in collections. I gave up. I hope you enjoy this blog entry.

P ;).

Leave a Reply

Your email address will not be published. Required fields are marked *

*