Skip to content

Optimize ImmutableHashSet<T>.IsProperSubsetOf to avoid unnecessary allocations#127368

Draft
aw0lid wants to merge 2 commits intodotnet:mainfrom
aw0lid:fix-immutablehashset-IsProperSubsetOf-allocs
Draft

Optimize ImmutableHashSet<T>.IsProperSubsetOf to avoid unnecessary allocations#127368
aw0lid wants to merge 2 commits intodotnet:mainfrom
aw0lid:fix-immutablehashset-IsProperSubsetOf-allocs

Conversation

@aw0lid
Copy link
Copy Markdown
Contributor

@aw0lid aw0lid commented Apr 24, 2026

Part of #127279

Summary

ImmutableHashSet<T>.IsProperSubsetOf always creates a new intermediate HashSet<T> for the other collection, leading to avoidable allocations and GC pressure, especially for large datasets

Optimization Logic

  • O(1) Pre-Scan: Immediately returns false if other is an ICollection with a smaller or equal Count. By performing this validation upfront, the need for tracking variables like matches and extraFound is eliminated, as any complete match is now mathematically guaranteed to be a proper subset.

  • Fast-Path Pattern Matching: Detects ImmutableHashSet<T> and HashSet<T> to bypass intermediate allocations.

  • Comparer Guard: Validates EqualityComparer compatibility before triggering fast paths to ensure logical consistency.

  • Short-Circuit Validation: Re-validates Count within specialized paths for an immediate exit before $O(n)$ enumeration.

  • Reverse-Lookup Strategy: An architectural shift where the ImmutableHashSet (The Source) iterates and queries the other collection if was Hashset. This leverages the O(1) lookup of the HashSet instead of the O(log N) lookup of the immutable tree.

  • Zero-Allocation Execution: Direct iteration over compatible collections, eliminating the costly new HashSet<T>(other) fallback.

  • Deferred fallback: Reserves the expensive allocation solely for general IEnumerable types.

Click to expand Benchmark Source Code
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Order;
using BenchmarkDotNet.Running;
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Linq;

namespace ImmutableHashSetBenchmarks
{
    [MemoryDiagnoser]
    [Orderer(SummaryOrderPolicy.FastestToSlowest)]
    [RankColumn]
    public class ImmutableHashSetIsProperSubsetOfBenchmark
    {
        private ImmutableHashSet<int> _sourceSet = null!;
        private ImmutableHashSet<int> _immutableLarger = null!;
        private HashSet<int> _bclHashSetLarger = null!;
        private List<int> _listLarger = null!;
        private int[] _arrayLarger = null!;

        private ImmutableHashSet<int> _immutableSmaller = null!;
        private ImmutableHashSet<int> _immutableSameCount = null!;
        
        private HashSet<int> _bclHashSetLargerDiffComparer = null!;

        private List<int> _listWithDuplicatesButProper = null!;
        private ImmutableHashSet<int> _emptySource = null!;
        private List<int> _listSameElementsWithDuplicates = null!;

        [Params(100000)]
        public int Size { get; set; }

        [GlobalSetup]
        public void Setup()
        {
            var elements = Enumerable.Range(0, Size).ToList();
            var largerElements = Enumerable.Range(0, Size + 10).ToList();
            var smallerElements = Enumerable.Range(0, Size - 10).ToList();
            var reverseComparer = new ReverseComparer<int>();

            _sourceSet = ImmutableHashSet.CreateRange(elements);
            
            _immutableLarger = ImmutableHashSet.CreateRange(largerElements);
            _bclHashSetLarger = new HashSet<int>(largerElements);
            _listLarger = largerElements;
            _arrayLarger = largerElements.ToArray();

            _immutableSmaller = ImmutableHashSet.CreateRange(smallerElements);
            _immutableSameCount = ImmutableHashSet.CreateRange(elements);

            _bclHashSetLargerDiffComparer = new HashSet<int>(largerElements, reverseComparer);

            _listWithDuplicatesButProper = elements.Concat(new[] { Size + 1, Size + 1, Size + 1 }).ToList();
            _emptySource = ImmutableHashSet<int>.Empty;
            _listSameElementsWithDuplicates = elements.Concat(elements).ToList();
        }

        #region Fast Path: Same Type and Comparer (Optimized)

        [Benchmark(Description = "ImmutableHashSet (Proper Subset - O(N))")]
        public bool Case_ImmutableHashSet_Proper() => _sourceSet.IsProperSubsetOf(_immutableLarger);

        [Benchmark(Description = "BCL HashSet (Proper Subset - O(N))")]
        public bool Case_BclHashSet_Proper() => _sourceSet.IsProperSubsetOf(_bclHashSetLarger);

        #endregion

        #region Early Exit: Count Check (O(1))

        [Benchmark(Description = "Empty Source (O(1) Check)")]
        public bool Case_EmptySource_Proper() => _emptySource.IsProperSubsetOf(_bclHashSetLarger);

        [Benchmark(Description = "List (Same Elements with Duplicates - Not Proper)")]
        public bool Case_List_Duplicates_NotProper() => _sourceSet.IsProperSubsetOf(_listSameElementsWithDuplicates);

        [Benchmark(Description = "Early Exit (Other is Smaller)")]
        public bool Case_SmallerCount() => _sourceSet.IsProperSubsetOf(_immutableSmaller);

        [Benchmark(Description = "Early Exit (Same Count - Cannot be Proper)")]
        public bool Case_SameCount() => _sourceSet.IsProperSubsetOf(_immutableSameCount);

        #endregion

        #region Fallback Path: Non-Set or Different Comparer

        [Benchmark(Description = "List (Proper - Fallback to HashSet)")]
        public bool Case_List_Proper() => _sourceSet.IsProperSubsetOf(_listLarger);

        [Benchmark(Description = "Array (Proper - Fallback to HashSet)")]
        public bool Case_Array_Proper() => _sourceSet.IsProperSubsetOf(_arrayLarger);

        [Benchmark(Description = "HashSet (Different Comparer - Force Fallback)")]
        public bool Case_HashSet_DiffComparer() => _sourceSet.IsProperSubsetOf(_bclHashSetLargerDiffComparer);

        [Benchmark(Description = "List with Duplicates (Proper Subset)")]
        public bool Case_List_Duplicates_Proper() => _sourceSet.IsProperSubsetOf(_listWithDuplicatesButProper);

        #endregion
    }

    public class ReverseComparer<T> : IEqualityComparer<T> where T : IComparable<T>
    {
        public bool Equals(T? x, T? y) => x?.CompareTo(y) == 0;
        public int GetHashCode(T? obj) => obj?.GetHashCode() ?? 0;
    }

    public class Program
    {
        public static void Main(string[] args)
        {
            BenchmarkRunner.Run<ImmutableHashSetIsProperSubsetOfBenchmark>();
        }
    }
}
Click to expand Benchmark Results

Benchmark Results (Before Optimization)

Method Size Mean Error StdDev Rank Gen0 Gen1 Gen2 Allocated
'Empty Source (O(1) Check)' 100000 3.020 ns 0.0522 ns 0.0463 ns 1 - - - -
'List (Same Elements with Duplicates - Not Proper)' 100000 2,253,302.064 ns 43,996.1710 ns 83,707.2944 ns 2 85.9375 85.9375 85.9375 3605725 B
'BCL HashSet (Proper Subset - O(N))' 100000 7,143,590.897 ns 55,965.9984 ns 52,350.6297 ns 3 62.5000 62.5000 62.5000 1738869 B
'Array (Proper - Fallback to HashSet)' 100000 7,191,053.721 ns 60,956.8792 ns 54,036.6857 ns 3 70.3125 70.3125 70.3125 1738731 B
'Early Exit (Other is Smaller)' 100000 7,252,623.997 ns 127,108.7726 ns 112,678.6161 ns 3 70.3125 70.3125 70.3125 1738868 B
'Early Exit (Same Count - Cannot be Proper)' 100000 7,254,561.478 ns 73,899.0961 ns 57,695.5534 ns 3 78.1250 78.1250 78.1250 1738874 B
'List (Proper - Fallback to HashSet)' 100000 8,628,967.714 ns 103,114.2276 ns 96,453.1125 ns 4 78.1250 78.1250 78.1250 1738861 B
'List with Duplicates (Proper Subset)' 100000 8,979,029.530 ns 124,859.7911 ns 104,263.5805 ns 4 78.1250 78.1250 78.1250 1738861 B
'HashSet (Different Comparer - Force Fallback)' 100000 9,236,192.453 ns 94,804.2142 ns 88,679.9208 ns 4 78.1250 78.1250 78.1250 1738861 B
'ImmutableHashSet (Proper Subset - O(N))' 100000 14,980,671.116 ns 155,763.4001 ns 145,701.1812 ns 5 78.1250 78.1250 78.1250 1738897 B

Benchmark Results (After Optimization)

Method Size Mean Error StdDev Rank Gen0 Gen1 Gen2 Allocated
'Empty Source (O(1) Check)' 100000 1.769 ns 0.0372 ns 0.0348 ns 1 - - - -
'Early Exit (Same Count - Cannot be Proper)' 100000 2.261 ns 0.0409 ns 0.0363 ns 2 - - - -
'Early Exit (Other is Smaller)' 100000 2.401 ns 0.0800 ns 0.0748 ns 2 - - - -
'List (Same Elements with Duplicates - Not Proper)' 100000 2,226,632.494 ns 44,488.2292 ns 94,808.0507 ns 3 82.0313 82.0313 82.0313 3605636 B
'Array (Proper - Fallback to HashSet)' 100000 4,128,214.156 ns 40,633.6128 ns 36,020.6393 ns 4 62.5000 62.5000 62.5000 1738710 B
'HashSet (Different Comparer - Force Fallback)' 100000 4,310,472.716 ns 39,782.7924 ns 33,220.4335 ns 5 70.3125 70.3125 70.3125 1738810 B
'BCL HashSet (Proper Subset - O(N))' 100000 5,622,467.743 ns 43,428.7884 ns 36,265.0052 ns 6 - - - -
'List (Proper - Fallback to HashSet)' 100000 6,933,851.438 ns 51,688.7203 ns 43,162.4224 ns 7 62.5000 62.5000 62.5000 1738734 B
'List with Duplicates (Proper Subset)' 100000 7,455,423.480 ns 92,394.8283 ns 81,905.6087 ns 8 70.3125 70.3125 70.3125 1738817 B
'ImmutableHashSet (Proper Subset - O(N))' 100000 13,207,980.537 ns 130,270.0225 ns 115,480.9819 ns 9 - - - -

Performance Analysis Summary (100,000 Elements)

Case / Method Before (ns) After (ns) Speedup Ratio Memory Improvement
Early Exit (Other is Smaller) 7,252,623 2.401 ~3,020,667x -100% (Zero Alloc)
Early Exit (Same Count) 7,254,561 2.261 ~3,208,563x -100% (Zero Alloc)
Empty Source 3.020 1.769 1.71x Zero Alloc
BCL HashSet (Proper Subset) 7,143,590 5,622,467 1.27x -100% (Zero Alloc)
List (Duplicates - Not Proper) 2,253,302 2,226,632 1.01x Stable (3.6 MB)
Array (Fallback to HashSet) 7,191,053 4,128,214 1.74x Stable (1.7 MB)
List (Proper - Fallback) 8,628,967 6,933,851 1.24x Stable (1.7 MB)
List with Duplicates (Proper) 8,979,029 7,455,423 1.20x Stable (1.7 MB)
HashSet (Diff Comparer) 9,236,192 4,310,472 2.14x Stable (1.7 MB)
ImmutableHashSet (Proper) 14,980,671 13,207,980 1.13x -100% (Zero Alloc)

@dotnet-policy-service dotnet-policy-service Bot added the community-contribution Indicates that the PR has been added by a community member label Apr 24, 2026
@dotnet-policy-service
Copy link
Copy Markdown
Contributor

Tagging subscribers to this area: @dotnet/area-system-collections
See info in area-owners.md if you want to be subscribed.

Comment on lines +893 to +902
using (var e = new ImmutableHashSet<T>.Enumerator(origin.Root))
{
while (e.MoveNext())
{
if (!otherAsImmutableHashSet.Contains(e.Current))
{
return false;
}
}
}
Copy link
Copy Markdown
Member

@MihaZupan MihaZupan Apr 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
using (var e = new ImmutableHashSet<T>.Enumerator(origin.Root))
{
while (e.MoveNext())
{
if (!otherAsImmutableHashSet.Contains(e.Current))
{
return false;
}
}
}
foreach (T value in origin.Root)
{
if (!otherAsImmutableHashSet.Contains(value))
{
return false;
}
}

Any reason why all of the loops are manually deconstructed instead of using foreach?

Copy link
Copy Markdown
Contributor Author

@aw0lid aw0lid Apr 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the existing implementation, the code iterates over other and queries the ImmutableHashSet, which costs $O(logN)$ per lookup. By manually instantiating the enumerator from the root, I’ve reversed this logic to iterate over the ImmutableHashSet and query the HashSet<T> instead. This shifts the lookup cost to $O(1)$ reducing the overall complexity from $O(NlogN)$ to $O(N)$.

Also, origin (the MutationInput) does not implement IEnumerable<T>, so a direct foreach is not possible. I had to deconstruct it this way to access the underlying nodes while maintaining the performance gains.

That said, I am waiting for my other PR (#126309) to be merged so I can leverage its logic here and further refine this implementation like following this:

private static bool IsProperSubsetOf(IEnumerable<T> other, MutationInput origin)
{
    Requires.NotNull(other, nameof(other));

    if (origin.Root.IsEmpty)
    {
        return other.Any();
    }

    if (other is ICollection<T> otherAsICollectionGeneric)
    {
        // We check for < instead of != because other is not guaranteed to be a set, it could be a collection with duplicates.
        if (otherAsICollectionGeneric.Count <= origin.Count)
        {
            return false;
        }

        switch (other)
        {
            case ImmutableHashSet<T> otherAsImmutableHashSet:
                if (origin.EqualityComparer.Equals(otherAsImmutableHashSet.KeyComparer))
                {
                    return SetEqualsWithImmutableHashset(otherAsImmutableHashSet, origin);
                }
                break;

            case HashSet<T> otherAsHashset:
                if (origin.EqualityComparer.Equals(otherAsHashset.Comparer))
                {
                    return SetEqualsWithHashset(otherAsHashset, origin);
                }
                break;
        }
    }
    else if (other is ICollection otherAsICollection && otherAsICollection.Count <= origin.Count)
    {
        return false;
    }

    var otherSet = new HashSet<T>(other, origin.EqualityComparer);
    if (otherSet.Count <= origin.Count)
    {
        return false;
    }

    return SetEqualsWithHashset(otherSet, origin);
}

return IsProperSubsetOfFastPath(otherAsICollectionGeneric, this.Origin);
}

else if (other is ICollection otherAsICollection && otherAsICollection.Count <= this.Origin.Count)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it common for types to implement ICollection but not ICollection<T> if they're already implementing IEnumerable<T>?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, it’s not very common, but these types still exist and are in use. Since we can easily capture the Count property from them to trigger an early exit, I thought: why not include them? It broadens the optimization's reach with very little overhead

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

area-System.Collections community-contribution Indicates that the PR has been added by a community member

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants