From 0e97426f5d666bbd00b01cf02e2430ec57c1e810 Mon Sep 17 00:00:00 2001 From: Steve Temple Date: Tue, 16 Nov 2021 14:02:20 +0000 Subject: [PATCH 1/6] WIP v48 --- MultiFacetLucene/MultiFacetLucene.csproj | 3 +- MultiFacetLucene/app.config | 8 +- .../FacetSearcherConfiguration.cs | 20 ++ .../MemoryOptimizer/DefaultMemoryOptimizer.cs | 37 +++ .../MemoryOptimizer/IMemoryOptimizer.cs | 9 + .../MemoryOptimizer/NoMemoryOptimizer.cs | 13 + .../FacetBitSetCalculatorProvider.cs | 21 ++ MultiFacetLucene4/FacetFieldInfo.cs | 38 +++ MultiFacetLucene4/FacetMatch.cs | 9 + MultiFacetLucene4/FacetSearchResult.cs | 11 + MultiFacetLucene4/FacetSearcher.cs | 262 +++++++++++++++++ MultiFacetLucene4/Filter/ChainedFilter.cs | 273 ++++++++++++++++++ MultiFacetLucene4/IFacetBitSetCalculator.cs | 17 ++ .../IFacetBitSetCalculatorProvider.cs | 13 + MultiFacetLucene4/MultiFacetLucene4.csproj | 12 + .../RangeFacetBitSetCalculator.cs | 58 ++++ MultiFacetLucene4/ResultCollection.cs | 82 ++++++ .../TermFacetBitSetCalulcator.cs | 63 ++++ .../MultiFacetLuceneNet.Tests.csproj | 3 +- MultiFacetLuceneNet.Tests/app.config | 8 +- MultiFacetLuceneNet.sln | 13 +- PerformanceTest/App.config | 10 +- PerformanceTest/PerformanceTest.csproj | 3 +- 23 files changed, 968 insertions(+), 18 deletions(-) create mode 100644 MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs create mode 100644 MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs create mode 100644 MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs create mode 100644 MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs create mode 100644 MultiFacetLucene4/FacetBitSetCalculatorProvider.cs create mode 100644 MultiFacetLucene4/FacetFieldInfo.cs create mode 100644 MultiFacetLucene4/FacetMatch.cs create mode 100644 MultiFacetLucene4/FacetSearchResult.cs create mode 100644 MultiFacetLucene4/FacetSearcher.cs create mode 100644 MultiFacetLucene4/Filter/ChainedFilter.cs create mode 100644 MultiFacetLucene4/IFacetBitSetCalculator.cs create mode 100644 MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs create mode 100644 MultiFacetLucene4/MultiFacetLucene4.csproj create mode 100644 MultiFacetLucene4/RangeFacetBitSetCalculator.cs create mode 100644 MultiFacetLucene4/ResultCollection.cs create mode 100644 MultiFacetLucene4/TermFacetBitSetCalulcator.cs diff --git a/MultiFacetLucene/MultiFacetLucene.csproj b/MultiFacetLucene/MultiFacetLucene.csproj index 8eeda7d..bbeca5a 100644 --- a/MultiFacetLucene/MultiFacetLucene.csproj +++ b/MultiFacetLucene/MultiFacetLucene.csproj @@ -9,7 +9,7 @@ Properties MultiFacetLucene MultiFacetLucene - v4.5.1 + v4.8 512 @@ -21,6 +21,7 @@ ..\..\GibeCommerce\GibeCommerce\ true + true diff --git a/MultiFacetLucene/app.config b/MultiFacetLucene/app.config index fe339ee..bbc3602 100644 --- a/MultiFacetLucene/app.config +++ b/MultiFacetLucene/app.config @@ -1,11 +1,11 @@ - + - - + + - \ No newline at end of file + diff --git a/MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs b/MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs new file mode 100644 index 0000000..e105616 --- /dev/null +++ b/MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs @@ -0,0 +1,20 @@ +using MultiFacetLucene.Configuration.MemoryOptimizer; + +namespace MultiFacetLucene.Configuration +{ + public class FacetSearcherConfiguration + { + public FacetSearcherConfiguration() + { + MinimumCountInTotalDatasetForFacet = 1; + MemoryOptimizer = null; + } + public static FacetSearcherConfiguration Default() + { + return new FacetSearcherConfiguration { MinimumCountInTotalDatasetForFacet = 1, MemoryOptimizer = null}; + } + public int MinimumCountInTotalDatasetForFacet { get; set; } + + public IMemoryOptimizer MemoryOptimizer { get; set; } + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs b/MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs new file mode 100644 index 0000000..00b2f4f --- /dev/null +++ b/MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace MultiFacetLucene.Configuration.MemoryOptimizer +{ + public class DefaultMemoryOptimizer : IMemoryOptimizer + { + private readonly int _keepPercent; + private readonly int _optimizeIfTotalCountIsGreaterThan; + + public DefaultMemoryOptimizer(int keepPercent, int optimizeIfTotalCountIsGreaterThan) + { + _keepPercent = keepPercent; + _optimizeIfTotalCountIsGreaterThan = optimizeIfTotalCountIsGreaterThan; + } + + //Flag certain bitsets as lazyload (recalculate) + //If total number of facet values is larger than... + //have X percent removed + public IEnumerable SetAsLazyLoad(List facetValuesList) + { + var totalCount = facetValuesList.Sum(a => a.FacetValueBitSetList.Count); + if (totalCount < _optimizeIfTotalCountIsGreaterThan) yield break; + foreach (var facetValues in facetValuesList) + { + var index = 0; + var percent = Convert.ToInt32(totalCount * _keepPercent / 100.0); + foreach (var value in facetValues.FacetValueBitSetList) + { + if (index++ > percent) + yield return value; + } + } + } + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs b/MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs new file mode 100644 index 0000000..3eaaa8a --- /dev/null +++ b/MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs @@ -0,0 +1,9 @@ +using System.Collections.Generic; + +namespace MultiFacetLucene.Configuration.MemoryOptimizer +{ + public interface IMemoryOptimizer + { + IEnumerable SetAsLazyLoad(List facetValuesList); + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs b/MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs new file mode 100644 index 0000000..2a16dc4 --- /dev/null +++ b/MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs @@ -0,0 +1,13 @@ +using System.Collections.Generic; + +namespace MultiFacetLucene.Configuration.MemoryOptimizer +{ + public class NoMemoryOptimizer : IMemoryOptimizer + { + //Never flag any facetvalues as lazyload (recalculate) + public IEnumerable SetAsLazyLoad(List facetValuesList) + { + yield break; + } + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetBitSetCalculatorProvider.cs b/MultiFacetLucene4/FacetBitSetCalculatorProvider.cs new file mode 100644 index 0000000..03eb244 --- /dev/null +++ b/MultiFacetLucene4/FacetBitSetCalculatorProvider.cs @@ -0,0 +1,21 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using MultiFacetLucene.Configuration; + +namespace MultiFacetLucene +{ + public class FacetBitSetCalculatorProvider : IFacetBitSetCalculatorProvider + { + public IFacetBitSetCalculator GetFacetBitSetCalculator(FacetFieldInfo info) + { + if (info.IsRange) + { + return new RangeFacetBitSetCalculator(new FacetSearcherConfiguration()); + } + return new TermFacetBitSetCalulcator(new FacetSearcherConfiguration()); + } + } +} diff --git a/MultiFacetLucene4/FacetFieldInfo.cs b/MultiFacetLucene4/FacetFieldInfo.cs new file mode 100644 index 0000000..3ce4ec2 --- /dev/null +++ b/MultiFacetLucene4/FacetFieldInfo.cs @@ -0,0 +1,38 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace MultiFacetLucene +{ + public class FacetFieldInfo + { + public FacetFieldInfo() + { + Selections = new List(); + Ranges = new List(); + MaxToFetchExcludingSelections = 20; + } + public string FieldName { get; set; } + public List Selections { get; set; } + public int MaxToFetchExcludingSelections { get; set; } + public bool IsRange { get; set; } + public List Ranges { get; set; } + + public virtual bool HasSelections + { + get + { + return Selections.Any(); + } + } + } + + public class Range + { + public string Id { get; set; } + public string From { get; set; } + public string To { get; set; } + } + + +} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetMatch.cs b/MultiFacetLucene4/FacetMatch.cs new file mode 100644 index 0000000..76249fb --- /dev/null +++ b/MultiFacetLucene4/FacetMatch.cs @@ -0,0 +1,9 @@ +namespace MultiFacetLucene +{ + public class FacetMatch + { + public string FacetFieldName { get; set; } + public string Value { get; set; } + public long Count { get; set; } + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetSearchResult.cs b/MultiFacetLucene4/FacetSearchResult.cs new file mode 100644 index 0000000..8a520d9 --- /dev/null +++ b/MultiFacetLucene4/FacetSearchResult.cs @@ -0,0 +1,11 @@ +using System.Collections.Generic; +using Lucene.Net.Search; + +namespace MultiFacetLucene +{ + public class FacetSearchResult + { + public List Facets { get; set; } + public TopDocs Hits { get; set; } + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetSearcher.cs b/MultiFacetLucene4/FacetSearcher.cs new file mode 100644 index 0000000..52a2603 --- /dev/null +++ b/MultiFacetLucene4/FacetSearcher.cs @@ -0,0 +1,262 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Lucene.Net.Analysis; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Util; +using MultiFacetLucene.Configuration; + +namespace MultiFacetLucene +{ + public class FacetSearcher : IndexSearcher + { + private readonly ConcurrentDictionary _facetBitSetDictionary = new ConcurrentDictionary(); + + public FacetSearcher(IndexReader reader, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(reader) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcher(IndexReader reader, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(reader, scheduler) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcher(IndexReaderContext context, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(context) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcher(IndexReaderContext context, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(context, scheduler) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcherConfiguration FacetSearcherConfiguration { get; protected set; } + + private void Initialize(FacetSearcherConfiguration? facetSearcherConfiguration) + { + FacetSearcherConfiguration = facetSearcherConfiguration ?? FacetSearcherConfiguration.Default(); + } + + public virtual FacetSearchResult SearchWithFacets(Query baseQueryWithoutFacetDrilldown, int topResults, IList facetFieldInfos, bool includeEmptyFacets = false, Filter filter = null, Dictionary docIdMappingTable = null) + { + var hits = Search(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, null), topResults); + + var facets = GetAllFacetsValues(baseQueryWithoutFacetDrilldown, facetFieldInfos, filter, docIdMappingTable); + if (!includeEmptyFacets) + { + facets = facets.Where(x => x.Count > 0); + } + + return new FacetSearchResult + { + Facets = facets.ToList(), + Hits = hits + }; + } + + private FacetValues GetOrCreateFacetBitSet(FacetFieldInfo fieldInfo) + { + return _facetBitSetDictionary.GetOrAdd(fieldInfo.FieldName, ReadBitSetsForValues(fieldInfo)); + } + + private IFacetBitSetCalculator GetFacetBitSetCalculator(FacetFieldInfo fieldInfo) + { + return new FacetBitSetCalculatorProvider().GetFacetBitSetCalculator(fieldInfo); + } + + private FacetValues ReadBitSetsForValues(FacetFieldInfo fieldInfo) + { + var facetValues = new FacetValues { Term = fieldInfo.FieldName }; + + facetValues.FacetValueBitSetList.AddRange(GetFacetBitSetCalculator(fieldInfo).GetFacetValueBitSets(IndexReader, fieldInfo).OrderByDescending(x => x.Count)); + + if (FacetSearcherConfiguration.MemoryOptimizer == null) return facetValues; + foreach (var facetValue in FacetSearcherConfiguration.MemoryOptimizer.SetAsLazyLoad(_facetBitSetDictionary.Values.ToList())) + facetValue.Bitset = null; + + return facetValues; + } + + private IEnumerable GetAllFacetsValues(Query baseQueryWithoutFacetDrilldown, IList facetFieldInfos, Filter filter, Dictionary docIdMappingTable) + { + return + facetFieldInfos.SelectMany( + facetFieldInfo => + FindMatchesInQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, facetFieldInfo, filter, docIdMappingTable)); + } + + private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor, Filter filter, Dictionary docIdMappingTable) + { + var calculations = 0; + var queryFilter = new CachingWrapperFilter(CombineQueryWithFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName), filter)); + var bitsQueryWithoutFacetDrilldown = new OpenBitSetDISI(queryFilter.GetDocIdSet(IndexReader).GetIterator(), IndexReader.MaxDoc); + var baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(bitsQueryWithoutFacetDrilldown.Bits.Length); + baseQueryWithoutFacetDrilldownCopy.Bits = new long[bitsQueryWithoutFacetDrilldown.Bits.Length]; + + var docIdMappingArray = GetDocIdMappingArray(docIdMappingTable); + + var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor); + foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor).FacetValueBitSetList) + { + var isSelected = calculatedFacetCounts.IsSelected(facetValueBitSet.Value); + + if (!isSelected && facetValueBitSet.Count < calculatedFacetCounts.MinCountForNonSelected) //Impossible to get a better result + { + if (calculatedFacetCounts.HaveEnoughResults) + break; + } + + bitsQueryWithoutFacetDrilldown.Bits.CopyTo(baseQueryWithoutFacetDrilldownCopy.Bits, 0); + baseQueryWithoutFacetDrilldownCopy.NumWords = bitsQueryWithoutFacetDrilldown.NumWords; + + var bitset = facetValueBitSet.Bitset ?? GetFacetBitSetCalculator(facetFieldInfoToCalculateFor).GetFacetBitSet(IndexReader, facetFieldInfoToCalculateFor, facetValueBitSet.Value); + baseQueryWithoutFacetDrilldownCopy.And(bitset); + + if (docIdMappingArray != null) + { + CascadeVariantValuesToParent(baseQueryWithoutFacetDrilldownCopy, docIdMappingArray); + } + var count = baseQueryWithoutFacetDrilldownCopy.Cardinality(); + + var match = new FacetMatch + { + Count = count, + Value = facetValueBitSet.Value, + FacetFieldName = facetFieldInfoToCalculateFor.FieldName + }; + + calculations++; + if (isSelected) + calculatedFacetCounts.AddToSelected(match); + else + calculatedFacetCounts.AddToNonSelected(match); + } + + return calculatedFacetCounts.GetList(); + } + + private int[] GetDocIdMappingArray(Dictionary docIdMappingTable) + { + if (docIdMappingTable == null) + { + return null; + } + var mappingArray = new int[docIdMappingTable.Keys.Max() + 1]; + foreach (var kvp in docIdMappingTable) + { + mappingArray[kvp.Key] = kvp.Value; + } + + return mappingArray; + } + + private void CascadeVariantValuesToParent(OpenBitSetDISI bitset, int[] docIdMappingTable) + { + var capacity = Math.Min(bitset.Capacity, docIdMappingTable.Length); + if (bitset.IsEmpty) + return; + for (int i = 0; i < capacity; i++) + { + if (docIdMappingTable[i] != i) + { + if (bitset.FastGet(i)) + { + bitset.FastSet(docIdMappingTable[i]); + } + bitset.FastClear(i); + } + } + } + private Filter CombineQueryWithFilter(Query query, Filter filter) + { + if (filter == null) + { + return new QueryWrapperFilter(query); + } + + return new ChainedFilter(new [] + { + new QueryWrapperFilter(query), + filter + }, + ChainedFilter.Logic.AND + ); + + } + + protected Query CreateFacetedQuery(Query baseQueryWithoutFacetDrilldown, IList facetFieldInfos, string facetAttributeFieldName) + { + var facetsToAdd = facetFieldInfos.Where(x => x.FieldName != facetAttributeFieldName && x.HasSelections).ToList(); + if (!facetsToAdd.Any()) return baseQueryWithoutFacetDrilldown; + var booleanQuery = new BooleanQuery(); + booleanQuery.Add(baseQueryWithoutFacetDrilldown, Occur.MUST); + foreach (var facetFieldInfo in facetsToAdd) + { + if (facetFieldInfo.IsRange) + { + var selectedRanges = facetFieldInfo.Ranges.Where(r => facetFieldInfo.Selections.Contains(r.Id)).ToList(); + if (selectedRanges.Count == 1) + { + booleanQuery.Add( + new TermRangeQuery(facetFieldInfo.FieldName, new BytesRef(selectedRanges[0].From), new BytesRef(selectedRanges[0].To), true, true), Occur.MUST); + } + else + { + var valuesQuery = new BooleanQuery(); + foreach (var range in selectedRanges) + { + valuesQuery.Add(new TermRangeQuery(facetFieldInfo.FieldName, new BytesRef(range.From), new BytesRef(range.To), true, true), + Occur.SHOULD); + } + booleanQuery.Add(valuesQuery, Occur.MUST); + } + } + else + { + if (facetFieldInfo.Selections.Count == 1) + { + booleanQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, facetFieldInfo.Selections[0])), Occur.MUST); + } + else + { + var valuesQuery = new BooleanQuery(); + foreach (var value in facetFieldInfo.Selections) + { + valuesQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, value)), Occur.SHOULD); + } + booleanQuery.Add(valuesQuery, Occur.MUST); + } + } + } + return booleanQuery; + } + + public class FacetValues + { + public FacetValues() + { + FacetValueBitSetList = new List(); + } + + public string Term { get; set; } + + public List FacetValueBitSetList { get; set; } + + public class FacetValueBitSet + { + public string Value { get; set; } + public OpenBitSetDISI Bitset { get; set; } + public long Count { get; set; } + } + } + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/Filter/ChainedFilter.cs b/MultiFacetLucene4/Filter/ChainedFilter.cs new file mode 100644 index 0000000..b26706d --- /dev/null +++ b/MultiFacetLucene4/Filter/ChainedFilter.cs @@ -0,0 +1,273 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +using System; +using System.Text; + +using Lucene.Net.Search; +using Lucene.Net.Index; +using Lucene.Net.Util; + +namespace Lucene.Net.Analysis +{ + + /// + ///*

+ /// * Allows multiple {@link Filter}s to be chained. + /// * Logical operations such as NOT and XOR + /// * are applied between filters. One operation can be used + /// * for all filters, or a specific operation can be declared + /// * for each filter. + /// *

+ /// *

+ /// * Order in which filters are called depends on + /// * the position of the filter in the chain. It's probably + /// * more efficient to place the most restrictive filters + /// * /least computationally-intensive filters first. + /// *

+ ///
+ public class ChainedFilter : Filter + { + public enum Logic + { + NONE = -1, + OR = 0, + AND = 1, + ANDNOT = 2, + XOR = 3 + }; + + ///Logical operation when none is declared. Defaults to OR + public const Logic DEFAULT = Logic.OR; + + /** The filter chain */ + private Filter[] chain = null; + + private Logic[] logicArray; + + private Logic logic = Logic.NONE; + + ///CtorThe chain of filters + public ChainedFilter(Filter[] chain) + { + this.chain = chain; + } + + ///ctor + ///The chain of filters + ///Logical operations to apply between filters + public ChainedFilter(Filter[] chain, Logic[] logicArray) + { + this.chain = chain; + this.logicArray = logicArray; + } + + ///ctor + ///The chain of filters + ///Logical operation to apply to ALL filters + public ChainedFilter(Filter[] chain, Logic logic) + { + this.chain = chain; + this.logic = logic; + } + + /// + public override DocIdSet GetDocIdSet(AtomicReaderContext reader, IBits acceptDocs) + { + int[] index = new int[1]; // use array as reference to modifiable int; + index[0] = 0; // an object attribute would not be thread safe. + if (logic != Logic.NONE) + return GetDocIdSet(reader, logic, index); + else if (logicArray != null) + return GetDocIdSet(reader, logicArray, index); + else + return GetDocIdSet(reader, DEFAULT, index); + } + + private DocIdSetIterator GetDISI(Filter filter, IndexReader reader) + { + DocIdSet docIdSet = filter.GetDocIdSet(reader); + if (docIdSet == null) + { + return DocIdSet.EMPTY_DOCIDSET.Iterator(); + } + else + { + DocIdSetIterator iter = docIdSet.GetIterator(); + if (iter == null) + { + return DocIdSet.EMPTY_DOCIDSET.Iterator(); + } + else + { + return iter; + } + } + } + + private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index) + { + OpenBitSetDISI result; + /** + * First AND operation takes place against a completely false + * bitset and will always return zero results. + */ + if (logic == Logic.AND) + { + result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc); + ++index[0]; + } + else if (logic == Logic.ANDNOT) + { + result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc); + result.Flip(0, reader.MaxDoc); // NOTE: may set bits for deleted docs. + ++index[0]; + } + else + { + result = new OpenBitSetDISI(reader.MaxDoc); + } + return result; + } + + + /// + /// * Provide a SortedVIntList when it is definitely + /// * smaller than an OpenBitSet + /// * @deprecated Either use CachingWrapperFilter, or + /// * switch to a different DocIdSet implementation yourself. + /// * This method will be removed in Lucene 4.0 + /// + protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs) + { + return result; + } + + + /** + * Delegates to each filter in the chain. + * @param reader IndexReader + * @param logic Logical operation + * @return DocIdSet + */ + private DocIdSet GetDocIdSet(IndexReader reader, Logic logic, int[] index) + { + OpenBitSetDISI result = InitialResult(reader, logic, index); + for (; index[0] < chain.Length; index[0]++) + { + DoChain(result, logic, chain[index[0]].GetDocIdSet(reader)); + } + return FinalResult(result, reader.MaxDoc); + } + + /** + * Delegates to each filter in the chain. + * @param reader IndexReader + * @param logic Logical operation + * @return DocIdSet + */ + private DocIdSet GetDocIdSet(IndexReader reader, Logic[] logic, int[] index) + { + if (logic.Length != chain.Length) + throw new ArgumentException("Invalid number of elements in logic array"); + + OpenBitSetDISI result = InitialResult(reader, logic[0], index); + for (; index[0] < chain.Length; index[0]++) + { + DoChain(result, logic[index[0]], chain[index[0]].GetDocIdSet(reader)); + } + return FinalResult(result, reader.MaxDoc); + } + + public override String ToString() + { + StringBuilder sb = new StringBuilder(); + sb.Append("ChainedFilter: ["); + for (int i = 0; i < chain.Length; i++) + { + sb.Append(chain[i]); + sb.Append(' '); + } + sb.Append(']'); + return sb.ToString(); + } + + private void DoChain(OpenBitSetDISI result, Logic logic, DocIdSet dis) + { + + if (dis is OpenBitSet) + { + // optimized case for OpenBitSets + switch (logic) + { + case Logic.OR: + result.Or((OpenBitSet)dis); + break; + case Logic.AND: + result.And((OpenBitSet)dis); + break; + case Logic.ANDNOT: + result.AndNot((OpenBitSet)dis); + break; + case Logic.XOR: + result.Xor((OpenBitSet)dis); + break; + default: + DoChain(result, DEFAULT, dis); + break; + } + } + else + { + DocIdSetIterator disi; + if (dis == null) + { + disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); + } + else + { + disi = dis.Iterator(); + if (disi == null) + { + disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); + } + } + + switch (logic) + { + case Logic.OR: + result.InPlaceOr(disi); + break; + case Logic.AND: + result.InPlaceAnd(disi); + break; + case Logic.ANDNOT: + result.InPlaceNot(disi); + break; + case Logic.XOR: + result.InPlaceXor(disi); + break; + default: + DoChain(result, DEFAULT, dis); + break; + } + } + } + + } + +} \ No newline at end of file diff --git a/MultiFacetLucene4/IFacetBitSetCalculator.cs b/MultiFacetLucene4/IFacetBitSetCalculator.cs new file mode 100644 index 0000000..2c41259 --- /dev/null +++ b/MultiFacetLucene4/IFacetBitSetCalculator.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Lucene.Net.Index; +using Lucene.Net.Util; +using MultiFacetLucene.Configuration; + +namespace MultiFacetLucene +{ + public interface IFacetBitSetCalculator + { + IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info); + OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value); + } +} diff --git a/MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs b/MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs new file mode 100644 index 0000000..baffff8 --- /dev/null +++ b/MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs @@ -0,0 +1,13 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace MultiFacetLucene +{ + public interface IFacetBitSetCalculatorProvider + { + IFacetBitSetCalculator GetFacetBitSetCalculator(FacetFieldInfo info); + } +} diff --git a/MultiFacetLucene4/MultiFacetLucene4.csproj b/MultiFacetLucene4/MultiFacetLucene4.csproj new file mode 100644 index 0000000..312d19f --- /dev/null +++ b/MultiFacetLucene4/MultiFacetLucene4.csproj @@ -0,0 +1,12 @@ + + + + netstandard2.1 + enable + + + + + + + diff --git a/MultiFacetLucene4/RangeFacetBitSetCalculator.cs b/MultiFacetLucene4/RangeFacetBitSetCalculator.cs new file mode 100644 index 0000000..b484d32 --- /dev/null +++ b/MultiFacetLucene4/RangeFacetBitSetCalculator.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Util; +using MultiFacetLucene.Configuration; + +namespace MultiFacetLucene +{ + public class RangeFacetBitSetCalculator : IFacetBitSetCalculator + { + private FacetSearcherConfiguration _facetSearcherConfiguration; + + public RangeFacetBitSetCalculator(FacetSearcherConfiguration configuration) + { + _facetSearcherConfiguration = configuration; + } + + public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) + { + foreach (var range in info.Ranges) + { + var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); + var cnt = bitset.Cardinality; + if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) + { + yield return + new FacetSearcher.FacetValues.FacetValueBitSet {Value = range.Id, Bitset = bitset, Count = cnt}; + } + else + { + bitset = null; + } + } + } + + public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) + { + var range = info.Ranges.FirstOrDefault(r => r.Id == value); + return CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); + } + + protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string from, string to) + { + var facetQuery = new TermRangeQuery(facetAttributeFieldName, new BytesRef(from), new BytesRef(to), true, true); + var facetQueryFilter = new QueryWrapperFilter(facetQuery); + var disi = new OpenBitSetDISI(indexReader.MaxDoc); + foreach (var leaf in indexReader.Leaves) + { + disi.InPlaceOr(facetQueryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs).GetIterator()); + } + return disi; + } + } +} diff --git a/MultiFacetLucene4/ResultCollection.cs b/MultiFacetLucene4/ResultCollection.cs new file mode 100644 index 0000000..c9aaeee --- /dev/null +++ b/MultiFacetLucene4/ResultCollection.cs @@ -0,0 +1,82 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace MultiFacetLucene +{ + internal class ResultCollection + { + private readonly FacetFieldInfo _facetFieldInfoToCalculateFor; + private int _uncalculatedSelectedCount; + public long MinCountForNonSelected { get; protected set; } + + public ResultCollection(FacetFieldInfo facetFieldInfoToCalculateFor) + { + MinCountForNonSelected = 0; + _facetFieldInfoToCalculateFor = facetFieldInfoToCalculateFor; + _uncalculatedSelectedCount = facetFieldInfoToCalculateFor.Selections.Count; + NonSelectedMatches = new List(); + SelectedMatches = new List(); + } + + public bool HaveEnoughResults + { + get { return _uncalculatedSelectedCount == 0 && NonSelectedMatches.Count >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections; } + } + + public bool IsSelected(string facetValue) + { + return _uncalculatedSelectedCount > 0 && _facetFieldInfoToCalculateFor.Selections.Contains(facetValue); + } + + + + public void AddToNonSelected(FacetMatch match) + { + if (NonSelectedMatches.Count >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections) + { + if (match.Count < MinCountForNonSelected) + return; + if (match.Count > MinCountForNonSelected) + { + //Remove tail if possible + while (true) + { + var allWithMinCount = NonSelectedMatches.Where(x => x.Count == MinCountForNonSelected).ToList(); + if (allWithMinCount.Count == 0) + break; + var countWhenAddingThisAndRemovingMin = NonSelectedMatches.Count - allWithMinCount.Count + 1; + if (countWhenAddingThisAndRemovingMin >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections) + { + allWithMinCount.ForEach(x => NonSelectedMatches.Remove(x)); + MinCountForNonSelected = NonSelectedMatches.Min(x => x.Count); + } + else + { + break; + } + } + + } + } + + MinCountForNonSelected = MinCountForNonSelected == 0 ? match.Count : Math.Min(MinCountForNonSelected, match.Count); + + NonSelectedMatches.Add(match); + } + + public void AddToSelected(FacetMatch match) + { + SelectedMatches.Add(match); + _uncalculatedSelectedCount--; + } + + protected List NonSelectedMatches { get; set; } + private List SelectedMatches { get; set; } + + public IEnumerable GetList() + { + return SelectedMatches.Union(NonSelectedMatches).OrderByDescending(x => x.Count); + } + } +} \ No newline at end of file diff --git a/MultiFacetLucene4/TermFacetBitSetCalulcator.cs b/MultiFacetLucene4/TermFacetBitSetCalulcator.cs new file mode 100644 index 0000000..4739461 --- /dev/null +++ b/MultiFacetLucene4/TermFacetBitSetCalulcator.cs @@ -0,0 +1,63 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Util; +using MultiFacetLucene.Configuration; + +namespace MultiFacetLucene +{ + public class TermFacetBitSetCalulcator : IFacetBitSetCalculator + { + private readonly FacetSearcherConfiguration _facetSearcherConfiguration; + + public TermFacetBitSetCalulcator(FacetSearcherConfiguration configuration) + { + _facetSearcherConfiguration = configuration; + } + + public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) + { + + foreach(var leaf in indexReader.Leaves) + { + + var termReader = leaf.AtomicReader.GetTerms(info.FieldName).GetEnumerator(); + do + { + var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, termReader.Term.Utf8ToString()); + var cnt = bitset.Cardinality; + if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) + yield return new FacetSearcher.FacetValues.FacetValueBitSet + {Value = termReader.Term.Utf8ToString(), Bitset = bitset, Count = cnt}; + else + { + bitset = null; + } + } while (termReader.MoveNext()); + + } + } + + public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) + { + return CalculateOpenBitSetDisi(indexReader, info.FieldName, value); + } + + + protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string value) + { + var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); + var facetQueryFilter = new QueryWrapperFilter(facetQuery); + var disi = new OpenBitSetDISI(indexReader.MaxDoc); + foreach (var leaf in indexReader.Leaves) + { + disi.InPlaceOr(facetQueryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs).GetIterator()); + } + return disi; + } + } +} diff --git a/MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj b/MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj index eb581ec..8dfc243 100644 --- a/MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj +++ b/MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj @@ -8,7 +8,7 @@ Properties MultiFacetLuceneNet.Tests MultiFacetLuceneNet.Tests - v4.5.1 + v4.8 512 {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} 10.0 @@ -26,6 +26,7 @@ ..\ true +
true diff --git a/MultiFacetLuceneNet.Tests/app.config b/MultiFacetLuceneNet.Tests/app.config index 3ccce54..5d92d73 100644 --- a/MultiFacetLuceneNet.Tests/app.config +++ b/MultiFacetLuceneNet.Tests/app.config @@ -1,12 +1,12 @@ - + - - + + - \ No newline at end of file + diff --git a/MultiFacetLuceneNet.sln b/MultiFacetLuceneNet.sln index f177bfb..07bc0c0 100644 --- a/MultiFacetLuceneNet.sln +++ b/MultiFacetLuceneNet.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2013 -VisualStudioVersion = 12.0.31101.0 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLucene", "MultiFacetLucene\MultiFacetLucene.csproj", "{086093FD-D444-48BE-B897-7FAC14133C23}" EndProject @@ -16,6 +16,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".nuget", ".nuget", "{B13A81 .nuget\NuGet.targets = .nuget\NuGet.targets EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLucene4", "MultiFacetLucene4\MultiFacetLucene4.csproj", "{87059512-913A-4B3D-BD07-C13419852B81}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -34,8 +36,15 @@ Global {0065E899-596D-4B16-9CF5-E917DD957DAB}.Debug|Any CPU.Build.0 = Debug|Any CPU {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.ActiveCfg = Release|Any CPU {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.Build.0 = Release|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Debug|Any CPU.Build.0 = Debug|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Release|Any CPU.ActiveCfg = Release|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {D0C27F1D-550D-403C-9243-107D171EE003} + EndGlobalSection EndGlobal diff --git a/PerformanceTest/App.config b/PerformanceTest/App.config index 6fad07f..416fb85 100644 --- a/PerformanceTest/App.config +++ b/PerformanceTest/App.config @@ -1,14 +1,14 @@ - + - + - - + + - \ No newline at end of file + diff --git a/PerformanceTest/PerformanceTest.csproj b/PerformanceTest/PerformanceTest.csproj index 829b1fc..af650ee 100644 --- a/PerformanceTest/PerformanceTest.csproj +++ b/PerformanceTest/PerformanceTest.csproj @@ -9,11 +9,12 @@ Properties PerformanceTest PerformanceTest - v4.5.1 + v4.8 512 true ..\ true + AnyCPU From d3ceb16363a71c93d0c84417cd289ef50f720d8c Mon Sep 17 00:00:00 2001 From: Steve Temple Date: Tue, 16 Nov 2021 15:42:17 +0000 Subject: [PATCH 2/6] Tests running, and named to match old --- MultiFacetLucene.Tests/DocumentExtensions.cs | 17 + MultiFacetLucene.Tests/FacetSearcherTest.cs | 347 ++++++++++++++++++ .../MultiFacetLucene.Tests.csproj | 21 ++ MultiFacetLucene.Tests/PerformanceTest.cs | 110 ++++++ MultiFacetLucene.sln | 44 +++ .../FacetBitSetCalculatorProvider.cs | 7 +- MultiFacetLucene/FacetSearcher.cs | 63 ++-- MultiFacetLucene/Filter/ChainedFilter.cs | 273 -------------- MultiFacetLucene/MultiFacetLucene.csproj | 130 +------ MultiFacetLucene/Properties/AssemblyInfo.cs | 36 -- .../RangeFacetBitSetCalculator.cs | 11 +- MultiFacetLucene/TermFacetBitSetCalulcator.cs | 53 ++- MultiFacetLucene/app.config | 11 - MultiFacetLucene/packages.config | 6 - PerformanceTest/PerformanceTest.csproj | 6 - 15 files changed, 631 insertions(+), 504 deletions(-) create mode 100644 MultiFacetLucene.Tests/DocumentExtensions.cs create mode 100644 MultiFacetLucene.Tests/FacetSearcherTest.cs create mode 100644 MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj create mode 100644 MultiFacetLucene.Tests/PerformanceTest.cs create mode 100644 MultiFacetLucene.sln delete mode 100644 MultiFacetLucene/Filter/ChainedFilter.cs delete mode 100644 MultiFacetLucene/Properties/AssemblyInfo.cs delete mode 100644 MultiFacetLucene/app.config delete mode 100644 MultiFacetLucene/packages.config diff --git a/MultiFacetLucene.Tests/DocumentExtensions.cs b/MultiFacetLucene.Tests/DocumentExtensions.cs new file mode 100644 index 0000000..1d195fc --- /dev/null +++ b/MultiFacetLucene.Tests/DocumentExtensions.cs @@ -0,0 +1,17 @@ +using System; +using Lucene.Net.Documents; + +namespace MultiFacetLucene.Tests +{ + public static class DocumentExtensions + { + public static Document AddField(this Document document, string name, string value, Field.Store store, Field.Index index) + { + if (String.IsNullOrEmpty(value)) + return document; + + document.Add(new Field(name, value, store, index)); + return document; + } + } +} \ No newline at end of file diff --git a/MultiFacetLucene.Tests/FacetSearcherTest.cs b/MultiFacetLucene.Tests/FacetSearcherTest.cs new file mode 100644 index 0000000..e475284 --- /dev/null +++ b/MultiFacetLucene.Tests/FacetSearcherTest.cs @@ -0,0 +1,347 @@ +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Store; +using Lucene.Net.Util; +using NUnit.Framework; +using Lucene.Net.Analysis.Standard; +using Lucene.Net.Analysis.Core; +using Lucene.Net.QueryParsers.Classic; + +namespace MultiFacetLucene.Tests +{ + [TestFixture] + public class FacetSearcherTest + { + private FacetSearcher _target; + + [SetUp] + public void TestInitialize() + { + _target = new FacetSearcher(SetupIndex()); + } + + [Test] + public void MatchAllQueryShouldReturnCorrectFacetsAndDocuments() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color"}, + new FacetFieldInfo{ FieldName = "type"}, + }; + + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); + + Assert.AreEqual(5, actual.Hits.TotalHits); + + Assert.AreEqual(3, colorFacets.Count); + Assert.AreEqual(4, typeFacets.Count); + + Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); + + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "drink").Count); + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); + Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); + } + + [Test] + public void DrilldownSingleFacetSingleValueShouldReturnCorrectFacetsAndDocuments() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow"}}, + new FacetFieldInfo{ FieldName = "type"}, + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); + + Assert.AreEqual(3, actual.Hits.TotalHits); + + Assert.AreEqual(3, colorFacets.Count); + Assert.AreEqual(3, typeFacets.Count); + + Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); + + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); + Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); + } + + [Test] + public void DrilldownSingleFacetMultiValueShouldReturnCorrectFacetsAndDocuments() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow", "none"}}, + new FacetFieldInfo{ FieldName = "type"}, + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); + + Assert.AreEqual(4, actual.Hits.TotalHits); + + Assert.AreEqual(3, colorFacets.Count); + Assert.AreEqual(4, typeFacets.Count); + + Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); + + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); + Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "drink").Count); + } + + [Test] + public void MaxFacetRestrictionShouldReturnCorrectFacetsAndDocuments() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color", MaxToFetchExcludingSelections = 1}, + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + + Assert.AreEqual(5, actual.Hits.TotalHits); + Assert.AreEqual(1, colorFacets.Count); + Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); + } + + [Test] + public void MaxFacetRestrictionShouldStillReturnSelectedFacet() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color", Selections = new List{"none"}, MaxToFetchExcludingSelections = 1}, + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + + Assert.AreEqual(2, colorFacets.Count); + Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); + } + + [Test] + public void MaxFacetRestrictionShouldReturnSelectedFacetAsWell() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow", "none"}}, + new FacetFieldInfo{ FieldName = "type", MaxToFetchExcludingSelections = 2}, + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); + + Assert.AreEqual(4, actual.Hits.TotalHits); + + Assert.AreEqual(3, colorFacets.Count); + Assert.AreEqual(2, typeFacets.Count); + + Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); + + Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); + } + + + [Test] + public void MatchSpecifiedQueryShouldReturnCorrectFacetsAndDocuments() + { + var query = new QueryParser(LuceneVersion.LUCENE_48, string.Empty, new KeywordAnalyzer()).Parse("keywords:apa"); + + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color"}, + new FacetFieldInfo{ FieldName = "type"}, + }; + var actual = _target.SearchWithFacets(query, 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); + + Assert.AreEqual(2, actual.Hits.TotalHits); + Assert.AreEqual("Banana", _target.Doc(actual.Hits.ScoreDocs[0].Doc).GetField("title").GetStringValue()); + Assert.AreEqual("Water", _target.Doc(actual.Hits.ScoreDocs[1].Doc).GetField("title").GetStringValue()); + + Assert.AreEqual(2, colorFacets.Count); + Assert.AreEqual(3, typeFacets.Count); + + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "yellow").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); + + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "drink").Count); + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "food").Count); + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "fruit").Count); + } + + [Test] + public void DrilldownMultiFacetsShouldReturnCorrectFacetsAndDocuments() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow"}}, + new FacetFieldInfo{ FieldName = "type", Selections = new List{"fruit"}}, + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); + + Assert.AreEqual(2, actual.Hits.TotalHits); + + Assert.AreEqual(1, colorFacets.Count); + Assert.AreEqual(3, typeFacets.Count); + + Assert.AreEqual(2, colorFacets.Single(x => x.Value == "yellow").Count); // only fruits + + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); //only yellow + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count);//only yellow + Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count);//only yellow + } + + [Test] + public void IncludeEmptyFacetsShouldIncludeEmptyFacets() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "type", Selections = new List{"drink"} }, + new FacetFieldInfo{ FieldName = "color"} + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos, true); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + + Assert.AreEqual(1, colorFacets.Count(x => x.Count == 0)); + } + + [Test] + public void DoNotIncludeEmptyFacetsShouldNotIncludeEmptyFacets() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "type", Selections = new List{"drink"} }, + new FacetFieldInfo{ FieldName = "color"} + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos, false); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + + Assert.AreEqual(0, colorFacets.Count(x => x.Count == 0)); + } + + [Test] + public void RangeFacetsShouldReturnCorrectFacetsAndDocument() + { + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "price", IsRange = true, Ranges = new List + { + new MultiFacetLucene.Range { Id = "A", From = "0", To = "10"}, + new MultiFacetLucene.Range { Id = "B", From = "0", To = "20"}, + new MultiFacetLucene.Range { Id = "C", From = "0", To = "30"} + }, + Selections = new List + { + "B" + }}, + new FacetFieldInfo{ FieldName = "color", Selections = new List()}, + new FacetFieldInfo{ FieldName = "type", Selections = new List()}, + }; + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); + var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); + var priceFacets = actual.Facets.Where(x => x.FacetFieldName == "price").ToList(); + + Assert.AreEqual(3, actual.Hits.TotalHits); + + Assert.AreEqual(2, colorFacets.Count); + Assert.AreEqual(3, typeFacets.Count); + Assert.AreEqual(3, priceFacets.Count); + + Assert.AreEqual(2, priceFacets.Single(x => x.Value == "A").Count); + Assert.AreEqual(3, priceFacets.Single(x => x.Value == "B").Count); + Assert.AreEqual(4, priceFacets.Single(x => x.Value == "C").Count); + + Assert.AreEqual(2, colorFacets.Single(x => x.Value == "yellow").Count); + Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); + + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "food").Count); + Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); + Assert.AreEqual(1, typeFacets.Single(x => x.Value == "drink").Count); + } + + + protected static IndexReader SetupIndex() + { + var directory = new RAMDirectory(); + var writer = new IndexWriter(directory, new IndexWriterConfig(LuceneVersion.LUCENE_48, new StandardAnalyzer(LuceneVersion.LUCENE_48))); + writer.AddDocument(new Document() + .AddField("title", "Banana", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("color", "yellow", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "food", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "fruit", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("keywords", "apa hello whatever", Field.Store.YES, Field.Index.ANALYZED) + .AddField("price", "10", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.AddDocument(new Document() + .AddField("title", "Apple", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("color", "yellow", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "food", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "fruit", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("price", "20", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.AddDocument(new Document() + .AddField("title", "Burger", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("color", "yellow", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "food", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "meat", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("price", "30", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.AddDocument(new Document() + .AddField("title", "Milk", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("color", "white", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "drink", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("price", "40", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.AddDocument(new Document() + .AddField("title", "Water", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("color", "none", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", "drink", Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("keywords", "apa hello cars", Field.Store.YES, Field.Index.ANALYZED) + .AddField("price", "0", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.Flush(true, true); + writer.Commit(); + return DirectoryReader.Open(directory); + + } + } +} diff --git a/MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj b/MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj new file mode 100644 index 0000000..f542322 --- /dev/null +++ b/MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj @@ -0,0 +1,21 @@ + + + + net5.0 + enable + + + + + + + + + + + + + + + + diff --git a/MultiFacetLucene.Tests/PerformanceTest.cs b/MultiFacetLucene.Tests/PerformanceTest.cs new file mode 100644 index 0000000..54d5fbc --- /dev/null +++ b/MultiFacetLucene.Tests/PerformanceTest.cs @@ -0,0 +1,110 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using Lucene.Net.Analysis.Standard; +using Lucene.Net.Documents; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Store; +using NUnit.Framework; + +namespace MultiFacetLucene.Tests +{ + //[Ignore] + [TestFixture] + public class PerformanceTest + { + private static FacetSearcher _target; + private static readonly Random _rnd = new Random(Guid.NewGuid().GetHashCode()); + + public void Warmup() + { + //Warmup to prefetch facet bitset + var facetFieldInfos = new List + { + new FacetFieldInfo{ FieldName = "color"}, + new FacetFieldInfo{ FieldName = "type"}, + }; + _target.SearchWithFacets(new TermQuery(new Term("Price", "5")), 100, facetFieldInfos); + } + + [SetUp] + public void TestInitialize() + { + _target = new FacetSearcher(SetupIndex()); + Warmup(); + } + + [Test] + public void MatchAllDocsAndCalculateFacetsPerformanceTest() + { + Warmup(); + var stopwatch = new Stopwatch(); + stopwatch.Start(); + + var facetFieldInfos = new List + { + new() { FieldName = "color"}, + new() { FieldName = "type"}, + }; + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); + + + stopwatch.Stop(); + var vs = stopwatch.ElapsedMilliseconds; + Trace.WriteLine("Took " + vs + " ms"); + } + + [Test] + public void MatchAllDocsPerformanceTest() + { + var stopwatch = new Stopwatch(); + stopwatch.Start(); + + var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, new List()); + + + stopwatch.Stop(); + var vs = stopwatch.ElapsedMilliseconds; + Trace.WriteLine("Took " + vs + " ms"); + } + + + protected static IndexReader SetupIndex() + { + var directory = new RAMDirectory(); + var writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.Net.Util.LuceneVersion.LUCENE_48, new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48))); + for (var i = 0; i < 50000; i++) + writer.AddDocument(new Document() + .AddField("title", Guid.NewGuid().ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("color", GenerateColor(), Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", GenerateFood(), Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("type", GenerateFruit(), Field.Store.YES, Field.Index.NOT_ANALYZED) + .AddField("price", "10", Field.Store.YES, Field.Index.NOT_ANALYZED)); + writer.Flush(true, true); + writer.Commit(); + return DirectoryReader.Open(directory); + } + + + private static string GenerateFruit() + { + return "fruit" + GetRandom(1000, 2000); // 1000 different values + } + + private static string GenerateFood() + { + return "food" + GetRandom(1000, 1100); // 100 different values + } + + private static string GenerateColor() + { + return "color" + GetRandom(1000, 1100); // 30 different values + } + + private static string GetRandom(int i, int i1) + { + return _rnd.Next(i, i1).ToString("00000"); + } + } +} \ No newline at end of file diff --git a/MultiFacetLucene.sln b/MultiFacetLucene.sln new file mode 100644 index 0000000..9a5982d --- /dev/null +++ b/MultiFacetLucene.sln @@ -0,0 +1,44 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PerformanceTest", "PerformanceTest\PerformanceTest.csproj", "{0065E899-596D-4B16-9CF5-E917DD957DAB}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".nuget", ".nuget", "{B13A814A-4ECA-4A5F-A340-FF4C6DCE5145}" + ProjectSection(SolutionItems) = preProject + .nuget\NuGet.Config = .nuget\NuGet.Config + .nuget\NuGet.exe = .nuget\NuGet.exe + .nuget\NuGet.targets = .nuget\NuGet.targets + EndProjectSection +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MultiFacetLucene", "MultiFacetLucene\MultiFacetLucene.csproj", "{87059512-913A-4B3D-BD07-C13419852B81}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "MultiFacetLucene.Tests", "MultiFacetLucene.Tests\MultiFacetLucene.Tests.csproj", "{B2573CD0-8A9E-43EA-BFB3-6056FB41C2D2}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {0065E899-596D-4B16-9CF5-E917DD957DAB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {0065E899-596D-4B16-9CF5-E917DD957DAB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.Build.0 = Release|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Debug|Any CPU.Build.0 = Debug|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Release|Any CPU.ActiveCfg = Release|Any CPU + {87059512-913A-4B3D-BD07-C13419852B81}.Release|Any CPU.Build.0 = Release|Any CPU + {B2573CD0-8A9E-43EA-BFB3-6056FB41C2D2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B2573CD0-8A9E-43EA-BFB3-6056FB41C2D2}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B2573CD0-8A9E-43EA-BFB3-6056FB41C2D2}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B2573CD0-8A9E-43EA-BFB3-6056FB41C2D2}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {D0C27F1D-550D-403C-9243-107D171EE003} + EndGlobalSection +EndGlobal diff --git a/MultiFacetLucene/FacetBitSetCalculatorProvider.cs b/MultiFacetLucene/FacetBitSetCalculatorProvider.cs index 03eb244..d33100a 100644 --- a/MultiFacetLucene/FacetBitSetCalculatorProvider.cs +++ b/MultiFacetLucene/FacetBitSetCalculatorProvider.cs @@ -1,9 +1,4 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using MultiFacetLucene.Configuration; +using MultiFacetLucene.Configuration; namespace MultiFacetLucene { diff --git a/MultiFacetLucene/FacetSearcher.cs b/MultiFacetLucene/FacetSearcher.cs index 6cc202f..e4c0f64 100644 --- a/MultiFacetLucene/FacetSearcher.cs +++ b/MultiFacetLucene/FacetSearcher.cs @@ -2,10 +2,10 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; -using Lucene.Net.Analysis; +using System.Threading.Tasks; using Lucene.Net.Index; +using Lucene.Net.Queries; using Lucene.Net.Search; -using Lucene.Net.Store; using Lucene.Net.Util; using MultiFacetLucene.Configuration; @@ -15,27 +15,33 @@ public class FacetSearcher : IndexSearcher { private readonly ConcurrentDictionary _facetBitSetDictionary = new ConcurrentDictionary(); - public FacetSearcher(Directory path, FacetSearcherConfiguration facetSearcherConfiguration = null) - : base(path) + public FacetSearcher(IndexReader reader, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(reader) { Initialize(facetSearcherConfiguration); } - public FacetSearcher(Directory path, bool readOnly, FacetSearcherConfiguration facetSearcherConfiguration = null) - : base(path, readOnly) + public FacetSearcher(IndexReader reader, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(reader, scheduler) { Initialize(facetSearcherConfiguration); } - public FacetSearcher(IndexReader r, FacetSearcherConfiguration facetSearcherConfiguration = null) - : base(r) + public FacetSearcher(IndexReaderContext context, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(context) { Initialize(facetSearcherConfiguration); } - + + public FacetSearcher(IndexReaderContext context, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) + : base(context, scheduler) + { + Initialize(facetSearcherConfiguration); + } + public FacetSearcherConfiguration FacetSearcherConfiguration { get; protected set; } - private void Initialize(FacetSearcherConfiguration facetSearcherConfiguration) + private void Initialize(FacetSearcherConfiguration? facetSearcherConfiguration) { FacetSearcherConfiguration = facetSearcherConfiguration ?? FacetSearcherConfiguration.Default(); } @@ -92,10 +98,24 @@ private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDr { var calculations = 0; var queryFilter = new CachingWrapperFilter(CombineQueryWithFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName), filter)); - var bitsQueryWithoutFacetDrilldown = new OpenBitSetDISI(queryFilter.GetDocIdSet(IndexReader).Iterator(), IndexReader.MaxDoc); + + var bitsQueryWithoutFacetDrilldown = new OpenBitSetDISI(IndexReader.MaxDoc); + foreach (var leaf in IndexReader.Leaves) + { + var docSet = queryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs); + if (docSet == null) + { + continue; + } + var iterator = docSet.GetIterator(); + if (iterator == null) + { + continue; + } + bitsQueryWithoutFacetDrilldown.InPlaceOr(iterator); + } var baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(bitsQueryWithoutFacetDrilldown.Bits.Length); - baseQueryWithoutFacetDrilldownCopy.Bits = new long[bitsQueryWithoutFacetDrilldown.Bits.Length]; - + var docIdMappingArray = GetDocIdMappingArray(docIdMappingTable); var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor); @@ -109,9 +129,8 @@ private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDr break; } - bitsQueryWithoutFacetDrilldown.Bits.CopyTo(baseQueryWithoutFacetDrilldownCopy.Bits, 0); - baseQueryWithoutFacetDrilldownCopy.NumWords = bitsQueryWithoutFacetDrilldown.NumWords; - + baseQueryWithoutFacetDrilldownCopy.Union(bitsQueryWithoutFacetDrilldown); + var bitset = facetValueBitSet.Bitset ?? GetFacetBitSetCalculator(facetFieldInfoToCalculateFor).GetFacetBitSet(IndexReader, facetFieldInfoToCalculateFor, facetValueBitSet.Value); baseQueryWithoutFacetDrilldownCopy.And(bitset); @@ -119,7 +138,7 @@ private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDr { CascadeVariantValuesToParent(baseQueryWithoutFacetDrilldownCopy, docIdMappingArray); } - var count = baseQueryWithoutFacetDrilldownCopy.Cardinality(); + var count = baseQueryWithoutFacetDrilldownCopy.Cardinality; var match = new FacetMatch { @@ -155,8 +174,8 @@ private int[] GetDocIdMappingArray(Dictionary docIdMappingTable) private void CascadeVariantValuesToParent(OpenBitSetDISI bitset, int[] docIdMappingTable) { - var capacity = Math.Min(bitset.Capacity(), docIdMappingTable.Length); - if (bitset.IsEmpty()) + var capacity = Math.Min(bitset.Capacity, docIdMappingTable.Length); + if (bitset.IsEmpty) return; for (int i = 0; i < capacity; i++) { @@ -182,7 +201,7 @@ private Filter CombineQueryWithFilter(Query query, Filter filter) new QueryWrapperFilter(query), filter }, - ChainedFilter.Logic.AND + ChainedFilter.AND ); } @@ -201,14 +220,14 @@ protected Query CreateFacetedQuery(Query baseQueryWithoutFacetDrilldown, IList - ///*

- /// * Allows multiple {@link Filter}s to be chained. - /// * Logical operations such as NOT and XOR - /// * are applied between filters. One operation can be used - /// * for all filters, or a specific operation can be declared - /// * for each filter. - /// *

- /// *

- /// * Order in which filters are called depends on - /// * the position of the filter in the chain. It's probably - /// * more efficient to place the most restrictive filters - /// * /least computationally-intensive filters first. - /// *

- /// - public class ChainedFilter : Filter - { - public enum Logic - { - NONE = -1, - OR = 0, - AND = 1, - ANDNOT = 2, - XOR = 3 - }; - - ///Logical operation when none is declared. Defaults to OR - public const Logic DEFAULT = Logic.OR; - - /** The filter chain */ - private Filter[] chain = null; - - private Logic[] logicArray; - - private Logic logic = Logic.NONE; - - ///CtorThe chain of filters - public ChainedFilter(Filter[] chain) - { - this.chain = chain; - } - - ///ctor - ///The chain of filters - ///Logical operations to apply between filters - public ChainedFilter(Filter[] chain, Logic[] logicArray) - { - this.chain = chain; - this.logicArray = logicArray; - } - - ///ctor - ///The chain of filters - ///Logical operation to apply to ALL filters - public ChainedFilter(Filter[] chain, Logic logic) - { - this.chain = chain; - this.logic = logic; - } - - /// - public override DocIdSet GetDocIdSet(IndexReader reader) - { - int[] index = new int[1]; // use array as reference to modifiable int; - index[0] = 0; // an object attribute would not be thread safe. - if (logic != Logic.NONE) - return GetDocIdSet(reader, logic, index); - else if (logicArray != null) - return GetDocIdSet(reader, logicArray, index); - else - return GetDocIdSet(reader, DEFAULT, index); - } - - private DocIdSetIterator GetDISI(Filter filter, IndexReader reader) - { - DocIdSet docIdSet = filter.GetDocIdSet(reader); - if (docIdSet == null) - { - return DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - else - { - DocIdSetIterator iter = docIdSet.Iterator(); - if (iter == null) - { - return DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - else - { - return iter; - } - } - } - - private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index) - { - OpenBitSetDISI result; - /** - * First AND operation takes place against a completely false - * bitset and will always return zero results. - */ - if (logic == Logic.AND) - { - result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc); - ++index[0]; - } - else if (logic == Logic.ANDNOT) - { - result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc); - result.Flip(0, reader.MaxDoc); // NOTE: may set bits for deleted docs. - ++index[0]; - } - else - { - result = new OpenBitSetDISI(reader.MaxDoc); - } - return result; - } - - - /// - /// * Provide a SortedVIntList when it is definitely - /// * smaller than an OpenBitSet - /// * @deprecated Either use CachingWrapperFilter, or - /// * switch to a different DocIdSet implementation yourself. - /// * This method will be removed in Lucene 4.0 - /// - protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs) - { - return result; - } - - - /** - * Delegates to each filter in the chain. - * @param reader IndexReader - * @param logic Logical operation - * @return DocIdSet - */ - private DocIdSet GetDocIdSet(IndexReader reader, Logic logic, int[] index) - { - OpenBitSetDISI result = InitialResult(reader, logic, index); - for (; index[0] < chain.Length; index[0]++) - { - DoChain(result, logic, chain[index[0]].GetDocIdSet(reader)); - } - return FinalResult(result, reader.MaxDoc); - } - - /** - * Delegates to each filter in the chain. - * @param reader IndexReader - * @param logic Logical operation - * @return DocIdSet - */ - private DocIdSet GetDocIdSet(IndexReader reader, Logic[] logic, int[] index) - { - if (logic.Length != chain.Length) - throw new ArgumentException("Invalid number of elements in logic array"); - - OpenBitSetDISI result = InitialResult(reader, logic[0], index); - for (; index[0] < chain.Length; index[0]++) - { - DoChain(result, logic[index[0]], chain[index[0]].GetDocIdSet(reader)); - } - return FinalResult(result, reader.MaxDoc); - } - - public override String ToString() - { - StringBuilder sb = new StringBuilder(); - sb.Append("ChainedFilter: ["); - for (int i = 0; i < chain.Length; i++) - { - sb.Append(chain[i]); - sb.Append(' '); - } - sb.Append(']'); - return sb.ToString(); - } - - private void DoChain(OpenBitSetDISI result, Logic logic, DocIdSet dis) - { - - if (dis is OpenBitSet) - { - // optimized case for OpenBitSets - switch (logic) - { - case Logic.OR: - result.Or((OpenBitSet)dis); - break; - case Logic.AND: - result.And((OpenBitSet)dis); - break; - case Logic.ANDNOT: - result.AndNot((OpenBitSet)dis); - break; - case Logic.XOR: - result.Xor((OpenBitSet)dis); - break; - default: - DoChain(result, DEFAULT, dis); - break; - } - } - else - { - DocIdSetIterator disi; - if (dis == null) - { - disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - else - { - disi = dis.Iterator(); - if (disi == null) - { - disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - } - - switch (logic) - { - case Logic.OR: - result.InPlaceOr(disi); - break; - case Logic.AND: - result.InPlaceAnd(disi); - break; - case Logic.ANDNOT: - result.InPlaceNot(disi); - break; - case Logic.XOR: - result.InPlaceXor(disi); - break; - default: - DoChain(result, DEFAULT, dis); - break; - } - } - } - - } - -} \ No newline at end of file diff --git a/MultiFacetLucene/MultiFacetLucene.csproj b/MultiFacetLucene/MultiFacetLucene.csproj index bbeca5a..1c5a951 100644 --- a/MultiFacetLucene/MultiFacetLucene.csproj +++ b/MultiFacetLucene/MultiFacetLucene.csproj @@ -1,125 +1,13 @@ - - - + + - Debug - AnyCPU - {086093FD-D444-48BE-B897-7FAC14133C23} - Library - Properties - MultiFacetLucene - MultiFacetLucene - v4.8 - 512 - - - - - - - - - ..\..\GibeCommerce\GibeCommerce\ - true - + netstandard2.1 + enable - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - ..\packages\SharpZipLib.0.86.0\lib\20\ICSharpCode.SharpZipLib.dll - - - ..\packages\Lucene.Net.3.0.3\lib\NET40\Lucene.Net.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Analyzers.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Core.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.FastVectorHighlighter.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Highlighter.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Memory.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Queries.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Regex.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.SimpleFacetedSearch.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Snowball.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.SpellChecker.dll - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - + + - - - - - This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - - - - \ No newline at end of file + + diff --git a/MultiFacetLucene/Properties/AssemblyInfo.cs b/MultiFacetLucene/Properties/AssemblyInfo.cs deleted file mode 100644 index de7255d..0000000 --- a/MultiFacetLucene/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("MultiFacetLucene")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Systementor AB")] -[assembly: AssemblyProduct("MultiFacetLucene")] -[assembly: AssemblyCopyright("Copyright © Stefan Holmberg 2014")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("f17322c5-71e1-446e-94c8-31f3cf50e8c4")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("3.0.*")] -[assembly: AssemblyFileVersion("3.0.0.0")] diff --git a/MultiFacetLucene/RangeFacetBitSetCalculator.cs b/MultiFacetLucene/RangeFacetBitSetCalculator.cs index 43d5fc6..b484d32 100644 --- a/MultiFacetLucene/RangeFacetBitSetCalculator.cs +++ b/MultiFacetLucene/RangeFacetBitSetCalculator.cs @@ -24,7 +24,7 @@ public RangeFacetBitSetCalculator(FacetSearcherConfiguration configuration) foreach (var range in info.Ranges) { var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); - var cnt = bitset.Cardinality(); + var cnt = bitset.Cardinality; if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) { yield return @@ -45,9 +45,14 @@ public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo inf protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string from, string to) { - var facetQuery = new TermRangeQuery(facetAttributeFieldName, from, to, true, true); + var facetQuery = new TermRangeQuery(facetAttributeFieldName, new BytesRef(from), new BytesRef(to), true, true); var facetQueryFilter = new QueryWrapperFilter(facetQuery); - return new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc); + var disi = new OpenBitSetDISI(indexReader.MaxDoc); + foreach (var leaf in indexReader.Leaves) + { + disi.InPlaceOr(facetQueryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs).GetIterator()); + } + return disi; } } } diff --git a/MultiFacetLucene/TermFacetBitSetCalulcator.cs b/MultiFacetLucene/TermFacetBitSetCalulcator.cs index b560df3..f6b3388 100644 --- a/MultiFacetLucene/TermFacetBitSetCalulcator.cs +++ b/MultiFacetLucene/TermFacetBitSetCalulcator.cs @@ -20,24 +20,27 @@ public TermFacetBitSetCalulcator(FacetSearcherConfiguration configuration) } public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) - { - var termReader = indexReader.Terms(new Term(info.FieldName, String.Empty)); - do - { - if (termReader.Term.Field != info.FieldName) - yield break; - - var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, termReader.Term.Text); - var cnt = bitset.Cardinality(); - if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) - yield return new FacetSearcher.FacetValues.FacetValueBitSet { Value = termReader.Term.Text, Bitset = bitset, Count = cnt }; - else - { - bitset = null; - } - } while (termReader.Next()); - termReader.Close(); - } + { + + foreach(var leaf in indexReader.Leaves) + { + + var termReader = leaf.AtomicReader.GetTerms(info.FieldName).GetEnumerator(); + do + { + var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, termReader.Term.Utf8ToString()); + var cnt = bitset.Cardinality; + if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) + yield return new FacetSearcher.FacetValues.FacetValueBitSet + {Value = termReader.Term.Utf8ToString(), Bitset = bitset, Count = cnt}; + else + { + bitset = null; + } + } while (termReader.MoveNext()); + + } + } public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) { @@ -49,7 +52,17 @@ protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string { var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); var facetQueryFilter = new QueryWrapperFilter(facetQuery); - return new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc); - } + var disi = new OpenBitSetDISI(indexReader.MaxDoc); + foreach (var leaf in indexReader.Leaves) + { + var docSet = facetQueryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs); + var iterator = docSet.GetIterator(); + if (iterator != null) + { + disi.InPlaceOr(iterator); + } + } + return disi; + } } } diff --git a/MultiFacetLucene/app.config b/MultiFacetLucene/app.config deleted file mode 100644 index bbc3602..0000000 --- a/MultiFacetLucene/app.config +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/MultiFacetLucene/packages.config b/MultiFacetLucene/packages.config deleted file mode 100644 index 591d3c9..0000000 --- a/MultiFacetLucene/packages.config +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/PerformanceTest/PerformanceTest.csproj b/PerformanceTest/PerformanceTest.csproj index af650ee..ebf123e 100644 --- a/PerformanceTest/PerformanceTest.csproj +++ b/PerformanceTest/PerformanceTest.csproj @@ -89,12 +89,6 @@ - - - {086093FD-D444-48BE-B897-7FAC14133C23} - MultiFacetLucene - - From 3bcf0e93cedb06aaaa359c4e021f0df72766605d Mon Sep 17 00:00:00 2001 From: Steve Temple Date: Tue, 16 Nov 2021 15:45:57 +0000 Subject: [PATCH 3/6] Renamed and moved back to old folder structure --- .../FacetSearcherConfiguration.cs | 20 - .../MemoryOptimizer/DefaultMemoryOptimizer.cs | 37 -- .../MemoryOptimizer/IMemoryOptimizer.cs | 9 - .../MemoryOptimizer/NoMemoryOptimizer.cs | 13 - .../FacetBitSetCalculatorProvider.cs | 21 -- MultiFacetLucene4/FacetFieldInfo.cs | 38 -- MultiFacetLucene4/FacetMatch.cs | 9 - MultiFacetLucene4/FacetSearchResult.cs | 11 - MultiFacetLucene4/FacetSearcher.cs | 262 ------------- MultiFacetLucene4/Filter/ChainedFilter.cs | 273 -------------- MultiFacetLucene4/IFacetBitSetCalculator.cs | 17 - .../IFacetBitSetCalculatorProvider.cs | 13 - MultiFacetLucene4/MultiFacetLucene4.csproj | 12 - .../RangeFacetBitSetCalculator.cs | 58 --- MultiFacetLucene4/ResultCollection.cs | 82 ---- .../TermFacetBitSetCalulcator.cs | 63 ---- .../DocumentExtensions.cs | 17 - .../FacetSearcherTest.cs | 351 ------------------ .../MultiFacetLuceneNet.Tests.csproj | 149 -------- MultiFacetLuceneNet.Tests/PerformanceTest.cs | 115 ------ .../Properties/AssemblyInfo.cs | 36 -- MultiFacetLuceneNet.Tests/app.config | 12 - MultiFacetLuceneNet.Tests/packages.config | 6 - MultiFacetLuceneNet.sln | 50 --- 24 files changed, 1674 deletions(-) delete mode 100644 MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs delete mode 100644 MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs delete mode 100644 MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs delete mode 100644 MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs delete mode 100644 MultiFacetLucene4/FacetBitSetCalculatorProvider.cs delete mode 100644 MultiFacetLucene4/FacetFieldInfo.cs delete mode 100644 MultiFacetLucene4/FacetMatch.cs delete mode 100644 MultiFacetLucene4/FacetSearchResult.cs delete mode 100644 MultiFacetLucene4/FacetSearcher.cs delete mode 100644 MultiFacetLucene4/Filter/ChainedFilter.cs delete mode 100644 MultiFacetLucene4/IFacetBitSetCalculator.cs delete mode 100644 MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs delete mode 100644 MultiFacetLucene4/MultiFacetLucene4.csproj delete mode 100644 MultiFacetLucene4/RangeFacetBitSetCalculator.cs delete mode 100644 MultiFacetLucene4/ResultCollection.cs delete mode 100644 MultiFacetLucene4/TermFacetBitSetCalulcator.cs delete mode 100644 MultiFacetLuceneNet.Tests/DocumentExtensions.cs delete mode 100644 MultiFacetLuceneNet.Tests/FacetSearcherTest.cs delete mode 100644 MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj delete mode 100644 MultiFacetLuceneNet.Tests/PerformanceTest.cs delete mode 100644 MultiFacetLuceneNet.Tests/Properties/AssemblyInfo.cs delete mode 100644 MultiFacetLuceneNet.Tests/app.config delete mode 100644 MultiFacetLuceneNet.Tests/packages.config delete mode 100644 MultiFacetLuceneNet.sln diff --git a/MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs b/MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs deleted file mode 100644 index e105616..0000000 --- a/MultiFacetLucene4/Configuration/FacetSearcherConfiguration.cs +++ /dev/null @@ -1,20 +0,0 @@ -using MultiFacetLucene.Configuration.MemoryOptimizer; - -namespace MultiFacetLucene.Configuration -{ - public class FacetSearcherConfiguration - { - public FacetSearcherConfiguration() - { - MinimumCountInTotalDatasetForFacet = 1; - MemoryOptimizer = null; - } - public static FacetSearcherConfiguration Default() - { - return new FacetSearcherConfiguration { MinimumCountInTotalDatasetForFacet = 1, MemoryOptimizer = null}; - } - public int MinimumCountInTotalDatasetForFacet { get; set; } - - public IMemoryOptimizer MemoryOptimizer { get; set; } - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs b/MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs deleted file mode 100644 index 00b2f4f..0000000 --- a/MultiFacetLucene4/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs +++ /dev/null @@ -1,37 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace MultiFacetLucene.Configuration.MemoryOptimizer -{ - public class DefaultMemoryOptimizer : IMemoryOptimizer - { - private readonly int _keepPercent; - private readonly int _optimizeIfTotalCountIsGreaterThan; - - public DefaultMemoryOptimizer(int keepPercent, int optimizeIfTotalCountIsGreaterThan) - { - _keepPercent = keepPercent; - _optimizeIfTotalCountIsGreaterThan = optimizeIfTotalCountIsGreaterThan; - } - - //Flag certain bitsets as lazyload (recalculate) - //If total number of facet values is larger than... - //have X percent removed - public IEnumerable SetAsLazyLoad(List facetValuesList) - { - var totalCount = facetValuesList.Sum(a => a.FacetValueBitSetList.Count); - if (totalCount < _optimizeIfTotalCountIsGreaterThan) yield break; - foreach (var facetValues in facetValuesList) - { - var index = 0; - var percent = Convert.ToInt32(totalCount * _keepPercent / 100.0); - foreach (var value in facetValues.FacetValueBitSetList) - { - if (index++ > percent) - yield return value; - } - } - } - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs b/MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs deleted file mode 100644 index 3eaaa8a..0000000 --- a/MultiFacetLucene4/Configuration/MemoryOptimizer/IMemoryOptimizer.cs +++ /dev/null @@ -1,9 +0,0 @@ -using System.Collections.Generic; - -namespace MultiFacetLucene.Configuration.MemoryOptimizer -{ - public interface IMemoryOptimizer - { - IEnumerable SetAsLazyLoad(List facetValuesList); - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs b/MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs deleted file mode 100644 index 2a16dc4..0000000 --- a/MultiFacetLucene4/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs +++ /dev/null @@ -1,13 +0,0 @@ -using System.Collections.Generic; - -namespace MultiFacetLucene.Configuration.MemoryOptimizer -{ - public class NoMemoryOptimizer : IMemoryOptimizer - { - //Never flag any facetvalues as lazyload (recalculate) - public IEnumerable SetAsLazyLoad(List facetValuesList) - { - yield break; - } - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetBitSetCalculatorProvider.cs b/MultiFacetLucene4/FacetBitSetCalculatorProvider.cs deleted file mode 100644 index 03eb244..0000000 --- a/MultiFacetLucene4/FacetBitSetCalculatorProvider.cs +++ /dev/null @@ -1,21 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using MultiFacetLucene.Configuration; - -namespace MultiFacetLucene -{ - public class FacetBitSetCalculatorProvider : IFacetBitSetCalculatorProvider - { - public IFacetBitSetCalculator GetFacetBitSetCalculator(FacetFieldInfo info) - { - if (info.IsRange) - { - return new RangeFacetBitSetCalculator(new FacetSearcherConfiguration()); - } - return new TermFacetBitSetCalulcator(new FacetSearcherConfiguration()); - } - } -} diff --git a/MultiFacetLucene4/FacetFieldInfo.cs b/MultiFacetLucene4/FacetFieldInfo.cs deleted file mode 100644 index 3ce4ec2..0000000 --- a/MultiFacetLucene4/FacetFieldInfo.cs +++ /dev/null @@ -1,38 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace MultiFacetLucene -{ - public class FacetFieldInfo - { - public FacetFieldInfo() - { - Selections = new List(); - Ranges = new List(); - MaxToFetchExcludingSelections = 20; - } - public string FieldName { get; set; } - public List Selections { get; set; } - public int MaxToFetchExcludingSelections { get; set; } - public bool IsRange { get; set; } - public List Ranges { get; set; } - - public virtual bool HasSelections - { - get - { - return Selections.Any(); - } - } - } - - public class Range - { - public string Id { get; set; } - public string From { get; set; } - public string To { get; set; } - } - - -} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetMatch.cs b/MultiFacetLucene4/FacetMatch.cs deleted file mode 100644 index 76249fb..0000000 --- a/MultiFacetLucene4/FacetMatch.cs +++ /dev/null @@ -1,9 +0,0 @@ -namespace MultiFacetLucene -{ - public class FacetMatch - { - public string FacetFieldName { get; set; } - public string Value { get; set; } - public long Count { get; set; } - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetSearchResult.cs b/MultiFacetLucene4/FacetSearchResult.cs deleted file mode 100644 index 8a520d9..0000000 --- a/MultiFacetLucene4/FacetSearchResult.cs +++ /dev/null @@ -1,11 +0,0 @@ -using System.Collections.Generic; -using Lucene.Net.Search; - -namespace MultiFacetLucene -{ - public class FacetSearchResult - { - public List Facets { get; set; } - public TopDocs Hits { get; set; } - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/FacetSearcher.cs b/MultiFacetLucene4/FacetSearcher.cs deleted file mode 100644 index 52a2603..0000000 --- a/MultiFacetLucene4/FacetSearcher.cs +++ /dev/null @@ -1,262 +0,0 @@ -using System; -using System.Collections.Concurrent; -using System.Collections.Generic; -using System.Linq; -using System.Threading.Tasks; -using Lucene.Net.Analysis; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; -using MultiFacetLucene.Configuration; - -namespace MultiFacetLucene -{ - public class FacetSearcher : IndexSearcher - { - private readonly ConcurrentDictionary _facetBitSetDictionary = new ConcurrentDictionary(); - - public FacetSearcher(IndexReader reader, FacetSearcherConfiguration? facetSearcherConfiguration = null) - : base(reader) - { - Initialize(facetSearcherConfiguration); - } - - public FacetSearcher(IndexReader reader, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) - : base(reader, scheduler) - { - Initialize(facetSearcherConfiguration); - } - - public FacetSearcher(IndexReaderContext context, FacetSearcherConfiguration? facetSearcherConfiguration = null) - : base(context) - { - Initialize(facetSearcherConfiguration); - } - - public FacetSearcher(IndexReaderContext context, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) - : base(context, scheduler) - { - Initialize(facetSearcherConfiguration); - } - - public FacetSearcherConfiguration FacetSearcherConfiguration { get; protected set; } - - private void Initialize(FacetSearcherConfiguration? facetSearcherConfiguration) - { - FacetSearcherConfiguration = facetSearcherConfiguration ?? FacetSearcherConfiguration.Default(); - } - - public virtual FacetSearchResult SearchWithFacets(Query baseQueryWithoutFacetDrilldown, int topResults, IList facetFieldInfos, bool includeEmptyFacets = false, Filter filter = null, Dictionary docIdMappingTable = null) - { - var hits = Search(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, null), topResults); - - var facets = GetAllFacetsValues(baseQueryWithoutFacetDrilldown, facetFieldInfos, filter, docIdMappingTable); - if (!includeEmptyFacets) - { - facets = facets.Where(x => x.Count > 0); - } - - return new FacetSearchResult - { - Facets = facets.ToList(), - Hits = hits - }; - } - - private FacetValues GetOrCreateFacetBitSet(FacetFieldInfo fieldInfo) - { - return _facetBitSetDictionary.GetOrAdd(fieldInfo.FieldName, ReadBitSetsForValues(fieldInfo)); - } - - private IFacetBitSetCalculator GetFacetBitSetCalculator(FacetFieldInfo fieldInfo) - { - return new FacetBitSetCalculatorProvider().GetFacetBitSetCalculator(fieldInfo); - } - - private FacetValues ReadBitSetsForValues(FacetFieldInfo fieldInfo) - { - var facetValues = new FacetValues { Term = fieldInfo.FieldName }; - - facetValues.FacetValueBitSetList.AddRange(GetFacetBitSetCalculator(fieldInfo).GetFacetValueBitSets(IndexReader, fieldInfo).OrderByDescending(x => x.Count)); - - if (FacetSearcherConfiguration.MemoryOptimizer == null) return facetValues; - foreach (var facetValue in FacetSearcherConfiguration.MemoryOptimizer.SetAsLazyLoad(_facetBitSetDictionary.Values.ToList())) - facetValue.Bitset = null; - - return facetValues; - } - - private IEnumerable GetAllFacetsValues(Query baseQueryWithoutFacetDrilldown, IList facetFieldInfos, Filter filter, Dictionary docIdMappingTable) - { - return - facetFieldInfos.SelectMany( - facetFieldInfo => - FindMatchesInQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, facetFieldInfo, filter, docIdMappingTable)); - } - - private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor, Filter filter, Dictionary docIdMappingTable) - { - var calculations = 0; - var queryFilter = new CachingWrapperFilter(CombineQueryWithFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName), filter)); - var bitsQueryWithoutFacetDrilldown = new OpenBitSetDISI(queryFilter.GetDocIdSet(IndexReader).GetIterator(), IndexReader.MaxDoc); - var baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(bitsQueryWithoutFacetDrilldown.Bits.Length); - baseQueryWithoutFacetDrilldownCopy.Bits = new long[bitsQueryWithoutFacetDrilldown.Bits.Length]; - - var docIdMappingArray = GetDocIdMappingArray(docIdMappingTable); - - var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor); - foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor).FacetValueBitSetList) - { - var isSelected = calculatedFacetCounts.IsSelected(facetValueBitSet.Value); - - if (!isSelected && facetValueBitSet.Count < calculatedFacetCounts.MinCountForNonSelected) //Impossible to get a better result - { - if (calculatedFacetCounts.HaveEnoughResults) - break; - } - - bitsQueryWithoutFacetDrilldown.Bits.CopyTo(baseQueryWithoutFacetDrilldownCopy.Bits, 0); - baseQueryWithoutFacetDrilldownCopy.NumWords = bitsQueryWithoutFacetDrilldown.NumWords; - - var bitset = facetValueBitSet.Bitset ?? GetFacetBitSetCalculator(facetFieldInfoToCalculateFor).GetFacetBitSet(IndexReader, facetFieldInfoToCalculateFor, facetValueBitSet.Value); - baseQueryWithoutFacetDrilldownCopy.And(bitset); - - if (docIdMappingArray != null) - { - CascadeVariantValuesToParent(baseQueryWithoutFacetDrilldownCopy, docIdMappingArray); - } - var count = baseQueryWithoutFacetDrilldownCopy.Cardinality(); - - var match = new FacetMatch - { - Count = count, - Value = facetValueBitSet.Value, - FacetFieldName = facetFieldInfoToCalculateFor.FieldName - }; - - calculations++; - if (isSelected) - calculatedFacetCounts.AddToSelected(match); - else - calculatedFacetCounts.AddToNonSelected(match); - } - - return calculatedFacetCounts.GetList(); - } - - private int[] GetDocIdMappingArray(Dictionary docIdMappingTable) - { - if (docIdMappingTable == null) - { - return null; - } - var mappingArray = new int[docIdMappingTable.Keys.Max() + 1]; - foreach (var kvp in docIdMappingTable) - { - mappingArray[kvp.Key] = kvp.Value; - } - - return mappingArray; - } - - private void CascadeVariantValuesToParent(OpenBitSetDISI bitset, int[] docIdMappingTable) - { - var capacity = Math.Min(bitset.Capacity, docIdMappingTable.Length); - if (bitset.IsEmpty) - return; - for (int i = 0; i < capacity; i++) - { - if (docIdMappingTable[i] != i) - { - if (bitset.FastGet(i)) - { - bitset.FastSet(docIdMappingTable[i]); - } - bitset.FastClear(i); - } - } - } - private Filter CombineQueryWithFilter(Query query, Filter filter) - { - if (filter == null) - { - return new QueryWrapperFilter(query); - } - - return new ChainedFilter(new [] - { - new QueryWrapperFilter(query), - filter - }, - ChainedFilter.Logic.AND - ); - - } - - protected Query CreateFacetedQuery(Query baseQueryWithoutFacetDrilldown, IList facetFieldInfos, string facetAttributeFieldName) - { - var facetsToAdd = facetFieldInfos.Where(x => x.FieldName != facetAttributeFieldName && x.HasSelections).ToList(); - if (!facetsToAdd.Any()) return baseQueryWithoutFacetDrilldown; - var booleanQuery = new BooleanQuery(); - booleanQuery.Add(baseQueryWithoutFacetDrilldown, Occur.MUST); - foreach (var facetFieldInfo in facetsToAdd) - { - if (facetFieldInfo.IsRange) - { - var selectedRanges = facetFieldInfo.Ranges.Where(r => facetFieldInfo.Selections.Contains(r.Id)).ToList(); - if (selectedRanges.Count == 1) - { - booleanQuery.Add( - new TermRangeQuery(facetFieldInfo.FieldName, new BytesRef(selectedRanges[0].From), new BytesRef(selectedRanges[0].To), true, true), Occur.MUST); - } - else - { - var valuesQuery = new BooleanQuery(); - foreach (var range in selectedRanges) - { - valuesQuery.Add(new TermRangeQuery(facetFieldInfo.FieldName, new BytesRef(range.From), new BytesRef(range.To), true, true), - Occur.SHOULD); - } - booleanQuery.Add(valuesQuery, Occur.MUST); - } - } - else - { - if (facetFieldInfo.Selections.Count == 1) - { - booleanQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, facetFieldInfo.Selections[0])), Occur.MUST); - } - else - { - var valuesQuery = new BooleanQuery(); - foreach (var value in facetFieldInfo.Selections) - { - valuesQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, value)), Occur.SHOULD); - } - booleanQuery.Add(valuesQuery, Occur.MUST); - } - } - } - return booleanQuery; - } - - public class FacetValues - { - public FacetValues() - { - FacetValueBitSetList = new List(); - } - - public string Term { get; set; } - - public List FacetValueBitSetList { get; set; } - - public class FacetValueBitSet - { - public string Value { get; set; } - public OpenBitSetDISI Bitset { get; set; } - public long Count { get; set; } - } - } - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/Filter/ChainedFilter.cs b/MultiFacetLucene4/Filter/ChainedFilter.cs deleted file mode 100644 index b26706d..0000000 --- a/MultiFacetLucene4/Filter/ChainedFilter.cs +++ /dev/null @@ -1,273 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -using System; -using System.Text; - -using Lucene.Net.Search; -using Lucene.Net.Index; -using Lucene.Net.Util; - -namespace Lucene.Net.Analysis -{ - - /// - ///*

- /// * Allows multiple {@link Filter}s to be chained. - /// * Logical operations such as NOT and XOR - /// * are applied between filters. One operation can be used - /// * for all filters, or a specific operation can be declared - /// * for each filter. - /// *

- /// *

- /// * Order in which filters are called depends on - /// * the position of the filter in the chain. It's probably - /// * more efficient to place the most restrictive filters - /// * /least computationally-intensive filters first. - /// *

- ///
- public class ChainedFilter : Filter - { - public enum Logic - { - NONE = -1, - OR = 0, - AND = 1, - ANDNOT = 2, - XOR = 3 - }; - - ///Logical operation when none is declared. Defaults to OR - public const Logic DEFAULT = Logic.OR; - - /** The filter chain */ - private Filter[] chain = null; - - private Logic[] logicArray; - - private Logic logic = Logic.NONE; - - ///CtorThe chain of filters - public ChainedFilter(Filter[] chain) - { - this.chain = chain; - } - - ///ctor - ///The chain of filters - ///Logical operations to apply between filters - public ChainedFilter(Filter[] chain, Logic[] logicArray) - { - this.chain = chain; - this.logicArray = logicArray; - } - - ///ctor - ///The chain of filters - ///Logical operation to apply to ALL filters - public ChainedFilter(Filter[] chain, Logic logic) - { - this.chain = chain; - this.logic = logic; - } - - /// - public override DocIdSet GetDocIdSet(AtomicReaderContext reader, IBits acceptDocs) - { - int[] index = new int[1]; // use array as reference to modifiable int; - index[0] = 0; // an object attribute would not be thread safe. - if (logic != Logic.NONE) - return GetDocIdSet(reader, logic, index); - else if (logicArray != null) - return GetDocIdSet(reader, logicArray, index); - else - return GetDocIdSet(reader, DEFAULT, index); - } - - private DocIdSetIterator GetDISI(Filter filter, IndexReader reader) - { - DocIdSet docIdSet = filter.GetDocIdSet(reader); - if (docIdSet == null) - { - return DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - else - { - DocIdSetIterator iter = docIdSet.GetIterator(); - if (iter == null) - { - return DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - else - { - return iter; - } - } - } - - private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index) - { - OpenBitSetDISI result; - /** - * First AND operation takes place against a completely false - * bitset and will always return zero results. - */ - if (logic == Logic.AND) - { - result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc); - ++index[0]; - } - else if (logic == Logic.ANDNOT) - { - result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc); - result.Flip(0, reader.MaxDoc); // NOTE: may set bits for deleted docs. - ++index[0]; - } - else - { - result = new OpenBitSetDISI(reader.MaxDoc); - } - return result; - } - - - /// - /// * Provide a SortedVIntList when it is definitely - /// * smaller than an OpenBitSet - /// * @deprecated Either use CachingWrapperFilter, or - /// * switch to a different DocIdSet implementation yourself. - /// * This method will be removed in Lucene 4.0 - /// - protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs) - { - return result; - } - - - /** - * Delegates to each filter in the chain. - * @param reader IndexReader - * @param logic Logical operation - * @return DocIdSet - */ - private DocIdSet GetDocIdSet(IndexReader reader, Logic logic, int[] index) - { - OpenBitSetDISI result = InitialResult(reader, logic, index); - for (; index[0] < chain.Length; index[0]++) - { - DoChain(result, logic, chain[index[0]].GetDocIdSet(reader)); - } - return FinalResult(result, reader.MaxDoc); - } - - /** - * Delegates to each filter in the chain. - * @param reader IndexReader - * @param logic Logical operation - * @return DocIdSet - */ - private DocIdSet GetDocIdSet(IndexReader reader, Logic[] logic, int[] index) - { - if (logic.Length != chain.Length) - throw new ArgumentException("Invalid number of elements in logic array"); - - OpenBitSetDISI result = InitialResult(reader, logic[0], index); - for (; index[0] < chain.Length; index[0]++) - { - DoChain(result, logic[index[0]], chain[index[0]].GetDocIdSet(reader)); - } - return FinalResult(result, reader.MaxDoc); - } - - public override String ToString() - { - StringBuilder sb = new StringBuilder(); - sb.Append("ChainedFilter: ["); - for (int i = 0; i < chain.Length; i++) - { - sb.Append(chain[i]); - sb.Append(' '); - } - sb.Append(']'); - return sb.ToString(); - } - - private void DoChain(OpenBitSetDISI result, Logic logic, DocIdSet dis) - { - - if (dis is OpenBitSet) - { - // optimized case for OpenBitSets - switch (logic) - { - case Logic.OR: - result.Or((OpenBitSet)dis); - break; - case Logic.AND: - result.And((OpenBitSet)dis); - break; - case Logic.ANDNOT: - result.AndNot((OpenBitSet)dis); - break; - case Logic.XOR: - result.Xor((OpenBitSet)dis); - break; - default: - DoChain(result, DEFAULT, dis); - break; - } - } - else - { - DocIdSetIterator disi; - if (dis == null) - { - disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - else - { - disi = dis.Iterator(); - if (disi == null) - { - disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); - } - } - - switch (logic) - { - case Logic.OR: - result.InPlaceOr(disi); - break; - case Logic.AND: - result.InPlaceAnd(disi); - break; - case Logic.ANDNOT: - result.InPlaceNot(disi); - break; - case Logic.XOR: - result.InPlaceXor(disi); - break; - default: - DoChain(result, DEFAULT, dis); - break; - } - } - } - - } - -} \ No newline at end of file diff --git a/MultiFacetLucene4/IFacetBitSetCalculator.cs b/MultiFacetLucene4/IFacetBitSetCalculator.cs deleted file mode 100644 index 2c41259..0000000 --- a/MultiFacetLucene4/IFacetBitSetCalculator.cs +++ /dev/null @@ -1,17 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Lucene.Net.Index; -using Lucene.Net.Util; -using MultiFacetLucene.Configuration; - -namespace MultiFacetLucene -{ - public interface IFacetBitSetCalculator - { - IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info); - OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value); - } -} diff --git a/MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs b/MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs deleted file mode 100644 index baffff8..0000000 --- a/MultiFacetLucene4/IFacetBitSetCalculatorProvider.cs +++ /dev/null @@ -1,13 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace MultiFacetLucene -{ - public interface IFacetBitSetCalculatorProvider - { - IFacetBitSetCalculator GetFacetBitSetCalculator(FacetFieldInfo info); - } -} diff --git a/MultiFacetLucene4/MultiFacetLucene4.csproj b/MultiFacetLucene4/MultiFacetLucene4.csproj deleted file mode 100644 index 312d19f..0000000 --- a/MultiFacetLucene4/MultiFacetLucene4.csproj +++ /dev/null @@ -1,12 +0,0 @@ - - - - netstandard2.1 - enable - - - - - - - diff --git a/MultiFacetLucene4/RangeFacetBitSetCalculator.cs b/MultiFacetLucene4/RangeFacetBitSetCalculator.cs deleted file mode 100644 index b484d32..0000000 --- a/MultiFacetLucene4/RangeFacetBitSetCalculator.cs +++ /dev/null @@ -1,58 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; -using MultiFacetLucene.Configuration; - -namespace MultiFacetLucene -{ - public class RangeFacetBitSetCalculator : IFacetBitSetCalculator - { - private FacetSearcherConfiguration _facetSearcherConfiguration; - - public RangeFacetBitSetCalculator(FacetSearcherConfiguration configuration) - { - _facetSearcherConfiguration = configuration; - } - - public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) - { - foreach (var range in info.Ranges) - { - var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); - var cnt = bitset.Cardinality; - if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) - { - yield return - new FacetSearcher.FacetValues.FacetValueBitSet {Value = range.Id, Bitset = bitset, Count = cnt}; - } - else - { - bitset = null; - } - } - } - - public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) - { - var range = info.Ranges.FirstOrDefault(r => r.Id == value); - return CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); - } - - protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string from, string to) - { - var facetQuery = new TermRangeQuery(facetAttributeFieldName, new BytesRef(from), new BytesRef(to), true, true); - var facetQueryFilter = new QueryWrapperFilter(facetQuery); - var disi = new OpenBitSetDISI(indexReader.MaxDoc); - foreach (var leaf in indexReader.Leaves) - { - disi.InPlaceOr(facetQueryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs).GetIterator()); - } - return disi; - } - } -} diff --git a/MultiFacetLucene4/ResultCollection.cs b/MultiFacetLucene4/ResultCollection.cs deleted file mode 100644 index c9aaeee..0000000 --- a/MultiFacetLucene4/ResultCollection.cs +++ /dev/null @@ -1,82 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; - -namespace MultiFacetLucene -{ - internal class ResultCollection - { - private readonly FacetFieldInfo _facetFieldInfoToCalculateFor; - private int _uncalculatedSelectedCount; - public long MinCountForNonSelected { get; protected set; } - - public ResultCollection(FacetFieldInfo facetFieldInfoToCalculateFor) - { - MinCountForNonSelected = 0; - _facetFieldInfoToCalculateFor = facetFieldInfoToCalculateFor; - _uncalculatedSelectedCount = facetFieldInfoToCalculateFor.Selections.Count; - NonSelectedMatches = new List(); - SelectedMatches = new List(); - } - - public bool HaveEnoughResults - { - get { return _uncalculatedSelectedCount == 0 && NonSelectedMatches.Count >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections; } - } - - public bool IsSelected(string facetValue) - { - return _uncalculatedSelectedCount > 0 && _facetFieldInfoToCalculateFor.Selections.Contains(facetValue); - } - - - - public void AddToNonSelected(FacetMatch match) - { - if (NonSelectedMatches.Count >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections) - { - if (match.Count < MinCountForNonSelected) - return; - if (match.Count > MinCountForNonSelected) - { - //Remove tail if possible - while (true) - { - var allWithMinCount = NonSelectedMatches.Where(x => x.Count == MinCountForNonSelected).ToList(); - if (allWithMinCount.Count == 0) - break; - var countWhenAddingThisAndRemovingMin = NonSelectedMatches.Count - allWithMinCount.Count + 1; - if (countWhenAddingThisAndRemovingMin >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections) - { - allWithMinCount.ForEach(x => NonSelectedMatches.Remove(x)); - MinCountForNonSelected = NonSelectedMatches.Min(x => x.Count); - } - else - { - break; - } - } - - } - } - - MinCountForNonSelected = MinCountForNonSelected == 0 ? match.Count : Math.Min(MinCountForNonSelected, match.Count); - - NonSelectedMatches.Add(match); - } - - public void AddToSelected(FacetMatch match) - { - SelectedMatches.Add(match); - _uncalculatedSelectedCount--; - } - - protected List NonSelectedMatches { get; set; } - private List SelectedMatches { get; set; } - - public IEnumerable GetList() - { - return SelectedMatches.Union(NonSelectedMatches).OrderByDescending(x => x.Count); - } - } -} \ No newline at end of file diff --git a/MultiFacetLucene4/TermFacetBitSetCalulcator.cs b/MultiFacetLucene4/TermFacetBitSetCalulcator.cs deleted file mode 100644 index 4739461..0000000 --- a/MultiFacetLucene4/TermFacetBitSetCalulcator.cs +++ /dev/null @@ -1,63 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Util; -using MultiFacetLucene.Configuration; - -namespace MultiFacetLucene -{ - public class TermFacetBitSetCalulcator : IFacetBitSetCalculator - { - private readonly FacetSearcherConfiguration _facetSearcherConfiguration; - - public TermFacetBitSetCalulcator(FacetSearcherConfiguration configuration) - { - _facetSearcherConfiguration = configuration; - } - - public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) - { - - foreach(var leaf in indexReader.Leaves) - { - - var termReader = leaf.AtomicReader.GetTerms(info.FieldName).GetEnumerator(); - do - { - var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, termReader.Term.Utf8ToString()); - var cnt = bitset.Cardinality; - if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) - yield return new FacetSearcher.FacetValues.FacetValueBitSet - {Value = termReader.Term.Utf8ToString(), Bitset = bitset, Count = cnt}; - else - { - bitset = null; - } - } while (termReader.MoveNext()); - - } - } - - public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) - { - return CalculateOpenBitSetDisi(indexReader, info.FieldName, value); - } - - - protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string value) - { - var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); - var facetQueryFilter = new QueryWrapperFilter(facetQuery); - var disi = new OpenBitSetDISI(indexReader.MaxDoc); - foreach (var leaf in indexReader.Leaves) - { - disi.InPlaceOr(facetQueryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs).GetIterator()); - } - return disi; - } - } -} diff --git a/MultiFacetLuceneNet.Tests/DocumentExtensions.cs b/MultiFacetLuceneNet.Tests/DocumentExtensions.cs deleted file mode 100644 index 3c82ccf..0000000 --- a/MultiFacetLuceneNet.Tests/DocumentExtensions.cs +++ /dev/null @@ -1,17 +0,0 @@ -using System; -using Lucene.Net.Documents; - -namespace MultiFacetLuceneNet.Tests -{ - public static class DocumentExtensions - { - public static Document AddField(this Document document, string name, string value, Field.Store store, Field.Index index) - { - if (String.IsNullOrEmpty(value)) - return document; - - document.Add(new Field(name, value, store, index)); - return document; - } - } -} \ No newline at end of file diff --git a/MultiFacetLuceneNet.Tests/FacetSearcherTest.cs b/MultiFacetLuceneNet.Tests/FacetSearcherTest.cs deleted file mode 100644 index 4ac46fe..0000000 --- a/MultiFacetLuceneNet.Tests/FacetSearcherTest.cs +++ /dev/null @@ -1,351 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Linq; -using Lucene.Net.Analysis; -using Lucene.Net.Analysis.Standard; -using Lucene.Net.Documents; -using Lucene.Net.Index; -using Lucene.Net.QueryParsers; -using Lucene.Net.Search; -using Lucene.Net.Store; -using Microsoft.VisualStudio.TestTools.UnitTesting; -using MultiFacetLucene; -using Lucene.Net.Util; - -namespace MultiFacetLuceneNet.Tests -{ - [TestClass] - public class FacetSearcherTest - { - private FacetSearcher _target; - - [TestInitialize] - public void TestInitialize() - { - _target = new FacetSearcher(SetupIndex()); - } - - [TestMethod] - public void MatchAllQueryShouldReturnCorrectFacetsAndDocuments() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color"}, - new FacetFieldInfo{ FieldName = "type"}, - }; - - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); - - Assert.AreEqual(5, actual.Hits.TotalHits); - - Assert.AreEqual(3, colorFacets.Count); - Assert.AreEqual(4, typeFacets.Count); - - Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); - - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "drink").Count); - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); - Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); - } - - [TestMethod] - public void DrilldownSingleFacetSingleValueShouldReturnCorrectFacetsAndDocuments() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow"}}, - new FacetFieldInfo{ FieldName = "type"}, - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); - - Assert.AreEqual(3, actual.Hits.TotalHits); - - Assert.AreEqual(3, colorFacets.Count); - Assert.AreEqual(3, typeFacets.Count); - - Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); - - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); - Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); - } - - [TestMethod] - public void DrilldownSingleFacetMultiValueShouldReturnCorrectFacetsAndDocuments() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow", "none"}}, - new FacetFieldInfo{ FieldName = "type"}, - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); - - Assert.AreEqual(4, actual.Hits.TotalHits); - - Assert.AreEqual(3, colorFacets.Count); - Assert.AreEqual(4, typeFacets.Count); - - Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); - - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); - Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "drink").Count); - } - - [TestMethod] - public void MaxFacetRestrictionShouldReturnCorrectFacetsAndDocuments() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color", MaxToFetchExcludingSelections = 1}, - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - - Assert.AreEqual(5, actual.Hits.TotalHits); - Assert.AreEqual(1, colorFacets.Count); - Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); - } - - [TestMethod] - public void MaxFacetRestrictionShouldStillReturnSelectedFacet() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color", Selections = new List{"none"}, MaxToFetchExcludingSelections = 1}, - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - - Assert.AreEqual(2, colorFacets.Count); - Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); - } - - - public void MaxFacetRestrictionShouldReturnSelectedFacetAsWell() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow", "none"}}, - new FacetFieldInfo{ FieldName = "type", MaxToFetchExcludingSelections = 2}, - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); - - Assert.AreEqual(4, actual.Hits.TotalHits); - - Assert.AreEqual(3, colorFacets.Count); - Assert.AreEqual(2, typeFacets.Count); - - Assert.AreEqual(3, colorFacets.Single(x => x.Value == "yellow").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "white").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); - - Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count); - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); - } - - - [TestMethod] - public void MatchSpecifiedQueryShouldReturnCorrectFacetsAndDocuments() - { - var query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, string.Empty, new KeywordAnalyzer()).Parse("keywords:apa"); - - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color"}, - new FacetFieldInfo{ FieldName = "type"}, - }; - var actual = _target.SearchWithFacets(query, 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); - - Assert.AreEqual(2, actual.Hits.TotalHits); - Assert.AreEqual("Banana", _target.Doc(actual.Hits.ScoreDocs[0].Doc).GetField("title").StringValue); - Assert.AreEqual("Water", _target.Doc(actual.Hits.ScoreDocs[1].Doc).GetField("title").StringValue); - - Assert.AreEqual(2, colorFacets.Count); - Assert.AreEqual(3, typeFacets.Count); - - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "yellow").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); - - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "drink").Count); - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "food").Count); - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "fruit").Count); - } - - [TestMethod] - public void DrilldownMultiFacetsShouldReturnCorrectFacetsAndDocuments() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color", Selections = new List{"yellow"}}, - new FacetFieldInfo{ FieldName = "type", Selections = new List{"fruit"}}, - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); - - Assert.AreEqual(2, actual.Hits.TotalHits); - - Assert.AreEqual(1, colorFacets.Count); - Assert.AreEqual(3, typeFacets.Count); - - Assert.AreEqual(2, colorFacets.Single(x => x.Value == "yellow").Count); // only fruits - - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "meat").Count); //only yellow - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count);//only yellow - Assert.AreEqual(3, typeFacets.Single(x => x.Value == "food").Count);//only yellow - } - - [TestMethod] - public void IncludeEmptyFacetsShouldIncludeEmptyFacets() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "type", Selections = new List{"drink"} }, - new FacetFieldInfo{ FieldName = "color"} - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos, true); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - - Assert.AreEqual(1, colorFacets.Count(x => x.Count == 0)); - } - - [TestMethod] - public void DoNotIncludeEmptyFacetsShouldNotIncludeEmptyFacets() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "type", Selections = new List{"drink"} }, - new FacetFieldInfo{ FieldName = "color"} - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos, false); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - - Assert.AreEqual(0, colorFacets.Count(x => x.Count == 0)); - } - - [TestMethod] - public void RangeFacetsShouldReturnCorrectFacetsAndDocument() - { - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "price", IsRange = true, Ranges = new List - { - new Range { Id = "A", From = "0", To = "10"}, - new Range { Id = "B", From = "0", To = "20"}, - new Range { Id = "C", From = "0", To = "30"} - }, - Selections = new List - { - "B" - }}, - new FacetFieldInfo{ FieldName = "color", Selections = new List()}, - new FacetFieldInfo{ FieldName = "type", Selections = new List()}, - }; - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - var colorFacets = actual.Facets.Where(x => x.FacetFieldName == "color").ToList(); - var typeFacets = actual.Facets.Where(x => x.FacetFieldName == "type").ToList(); - var priceFacets = actual.Facets.Where(x => x.FacetFieldName == "price").ToList(); - - Assert.AreEqual(3, actual.Hits.TotalHits); - - Assert.AreEqual(2, colorFacets.Count); - Assert.AreEqual(3, typeFacets.Count); - Assert.AreEqual(3, priceFacets.Count); - - Assert.AreEqual(2, priceFacets.Single(x => x.Value == "A").Count); - Assert.AreEqual(3, priceFacets.Single(x => x.Value == "B").Count); - Assert.AreEqual(4, priceFacets.Single(x => x.Value == "C").Count); - - Assert.AreEqual(2, colorFacets.Single(x => x.Value == "yellow").Count); - Assert.AreEqual(1, colorFacets.Single(x => x.Value == "none").Count); - - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "food").Count); - Assert.AreEqual(2, typeFacets.Single(x => x.Value == "fruit").Count); - Assert.AreEqual(1, typeFacets.Single(x => x.Value == "drink").Count); - } - - - protected static IndexReader SetupIndex() - { - var directory = new RAMDirectory(); - var writer = new IndexWriter(directory, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29), true, - IndexWriter.MaxFieldLength.LIMITED); - writer.AddDocument(new Document() - .AddField("title", "Banana", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("color", "yellow", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "food", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "fruit", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("keywords", "apa hello whatever", Field.Store.YES, Field.Index.ANALYZED) - .AddField("price", "10", Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.AddDocument(new Document() - .AddField("title", "Apple", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("color", "yellow", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "food", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "fruit", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("price", "20", Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.AddDocument(new Document() - .AddField("title", "Burger", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("color", "yellow", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "food", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "meat", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("price", "30", Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.AddDocument(new Document() - .AddField("title", "Milk", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("color", "white", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "drink", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("price", "40", Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.AddDocument(new Document() - .AddField("title", "Water", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("color", "none", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", "drink", Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("keywords", "apa hello cars", Field.Store.YES, Field.Index.ANALYZED) - .AddField("price", "0", Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.Flush(true, true, true); - writer.Optimize(); - writer.Commit(); - return IndexReader.Open(directory, true); - - } - } -} diff --git a/MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj b/MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj deleted file mode 100644 index 8dfc243..0000000 --- a/MultiFacetLuceneNet.Tests/MultiFacetLuceneNet.Tests.csproj +++ /dev/null @@ -1,149 +0,0 @@ - - - - Debug - AnyCPU - {F29DAEA8-7267-4230-B2F6-70062D2E890B} - Library - Properties - MultiFacetLuceneNet.Tests - MultiFacetLuceneNet.Tests - v4.8 - 512 - {3AC096D0-A1C2-E12C-1390-A8335801FDAB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} - 10.0 - $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) - $(ProgramFiles)\Common Files\microsoft shared\VSTT\$(VisualStudioVersion)\UITestExtensionPackages - False - UnitTest - - - - - - - - - ..\ - true - - - - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - ..\packages\SharpZipLib.0.86.0\lib\20\ICSharpCode.SharpZipLib.dll - - - ..\packages\Lucene.Net.3.0.3\lib\NET40\Lucene.Net.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Analyzers.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Core.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.FastVectorHighlighter.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Highlighter.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Memory.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Queries.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Regex.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.SimpleFacetedSearch.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Snowball.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.SpellChecker.dll - - - - - - - - - - - - - - - - - - - - - - - - - - - - {086093FD-D444-48BE-B897-7FAC14133C23} - MultiFacetLucene - - - - - - - False - - - False - - - False - - - False - - - - - - - - - - This project references NuGet package(s) that are missing on this computer. Enable NuGet Package Restore to download them. For more information, see http://go.microsoft.com/fwlink/?LinkID=322105. The missing file is {0}. - - - - - \ No newline at end of file diff --git a/MultiFacetLuceneNet.Tests/PerformanceTest.cs b/MultiFacetLuceneNet.Tests/PerformanceTest.cs deleted file mode 100644 index 75878f3..0000000 --- a/MultiFacetLuceneNet.Tests/PerformanceTest.cs +++ /dev/null @@ -1,115 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Diagnostics; -using System.Linq; -using Lucene.Net.Analysis.Standard; -using Lucene.Net.Documents; -using Lucene.Net.Index; -using Lucene.Net.Search; -using Lucene.Net.Store; -using Microsoft.VisualStudio.TestTools.UnitTesting; -using MultiFacetLucene; -using Version = Lucene.Net.Util.Version; - -namespace MultiFacetLuceneNet.Tests -{ - [Ignore] - [TestClass] - public class PerformanceTest - { - private static FacetSearcher _target; - private static readonly Random _rnd = new Random(Guid.NewGuid().GetHashCode()); - - public void Warmup() - { - //Warmup to prefetch facet bitset - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color"}, - new FacetFieldInfo{ FieldName = "type"}, - }; - _target.SearchWithFacets(new TermQuery(new Term("Price", "5")), 100, facetFieldInfos); - } - - [TestInitialize] - public void TestInitialize() - { - _target = new FacetSearcher(SetupIndex()); - Warmup(); - } - - [TestMethod] - public void MatchAllDocsAndCalculateFacetsPerformanceTest() - { - Warmup(); - var stopwatch = new Stopwatch(); - stopwatch.Start(); - - var facetFieldInfos = new List - { - new FacetFieldInfo{ FieldName = "color"}, - new FacetFieldInfo{ FieldName = "type"}, - }; - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, facetFieldInfos); - - - stopwatch.Stop(); - var vs = stopwatch.ElapsedMilliseconds; - Trace.WriteLine("Took " + vs + " ms"); - } - - [TestMethod] - public void MatchAllDocsPerformanceTest() - { - var stopwatch = new Stopwatch(); - stopwatch.Start(); - - var actual = _target.SearchWithFacets(new MatchAllDocsQuery(), 100, new List()); - - - stopwatch.Stop(); - var vs = stopwatch.ElapsedMilliseconds; - Trace.WriteLine("Took " + vs + " ms"); - } - - - protected static IndexReader SetupIndex() - { - var directory = new RAMDirectory(); - var writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_29), true, - IndexWriter.MaxFieldLength.LIMITED); - for (var i = 0; i < 50000; i++) - writer.AddDocument(new Document() - .AddField("title", Guid.NewGuid().ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("color", GenerateColor(), Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", GenerateFood(), Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("type", GenerateFruit(), Field.Store.YES, Field.Index.NOT_ANALYZED) - .AddField("price", "10", Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.Flush(true, true, true); - writer.Optimize(); - writer.Commit(); - return IndexReader.Open(directory, true); - } - - - private static string GenerateFruit() - { - return "fruit" + GetRandom(1000, 2000); // 1000 different values - } - - private static string GenerateFood() - { - return "food" + GetRandom(1000, 1100); // 100 different values - } - - private static string GenerateColor() - { - return "color" + GetRandom(1000, 1100); // 30 different values - } - - private static string GetRandom(int i, int i1) - { - return _rnd.Next(i, i1).ToString("00000"); - } - } -} \ No newline at end of file diff --git a/MultiFacetLuceneNet.Tests/Properties/AssemblyInfo.cs b/MultiFacetLuceneNet.Tests/Properties/AssemblyInfo.cs deleted file mode 100644 index f8108e5..0000000 --- a/MultiFacetLuceneNet.Tests/Properties/AssemblyInfo.cs +++ /dev/null @@ -1,36 +0,0 @@ -using System.Reflection; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; - -// General Information about an assembly is controlled through the following -// set of attributes. Change these attribute values to modify the information -// associated with an assembly. -[assembly: AssemblyTitle("MultiFacetLuceneNet.Tests")] -[assembly: AssemblyDescription("")] -[assembly: AssemblyConfiguration("")] -[assembly: AssemblyCompany("Microsoft")] -[assembly: AssemblyProduct("MultiFacetLuceneNet.Tests")] -[assembly: AssemblyCopyright("Copyright © Microsoft 2014")] -[assembly: AssemblyTrademark("")] -[assembly: AssemblyCulture("")] - -// Setting ComVisible to false makes the types in this assembly not visible -// to COM components. If you need to access a type in this assembly from -// COM, set the ComVisible attribute to true on that type. -[assembly: ComVisible(false)] - -// The following GUID is for the ID of the typelib if this project is exposed to COM -[assembly: Guid("7e5ee159-a373-48a1-8062-1f2dc7ee9e8f")] - -// Version information for an assembly consists of the following four values: -// -// Major Version -// Minor Version -// Build Number -// Revision -// -// You can specify all the values or you can default the Build and Revision Numbers -// by using the '*' as shown below: -// [assembly: AssemblyVersion("1.0.*")] -[assembly: AssemblyVersion("1.0.0.0")] -[assembly: AssemblyFileVersion("1.0.0.0")] diff --git a/MultiFacetLuceneNet.Tests/app.config b/MultiFacetLuceneNet.Tests/app.config deleted file mode 100644 index 5d92d73..0000000 --- a/MultiFacetLuceneNet.Tests/app.config +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/MultiFacetLuceneNet.Tests/packages.config b/MultiFacetLuceneNet.Tests/packages.config deleted file mode 100644 index 591d3c9..0000000 --- a/MultiFacetLuceneNet.Tests/packages.config +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/MultiFacetLuceneNet.sln b/MultiFacetLuceneNet.sln deleted file mode 100644 index 07bc0c0..0000000 --- a/MultiFacetLuceneNet.sln +++ /dev/null @@ -1,50 +0,0 @@ - -Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 17 -VisualStudioVersion = 17.0.31903.59 -MinimumVisualStudioVersion = 10.0.40219.1 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLucene", "MultiFacetLucene\MultiFacetLucene.csproj", "{086093FD-D444-48BE-B897-7FAC14133C23}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLuceneNet.Tests", "MultiFacetLuceneNet.Tests\MultiFacetLuceneNet.Tests.csproj", "{F29DAEA8-7267-4230-B2F6-70062D2E890B}" -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PerformanceTest", "PerformanceTest\PerformanceTest.csproj", "{0065E899-596D-4B16-9CF5-E917DD957DAB}" -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".nuget", ".nuget", "{B13A814A-4ECA-4A5F-A340-FF4C6DCE5145}" - ProjectSection(SolutionItems) = preProject - .nuget\NuGet.Config = .nuget\NuGet.Config - .nuget\NuGet.exe = .nuget\NuGet.exe - .nuget\NuGet.targets = .nuget\NuGet.targets - EndProjectSection -EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLucene4", "MultiFacetLucene4\MultiFacetLucene4.csproj", "{87059512-913A-4B3D-BD07-C13419852B81}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|Any CPU = Debug|Any CPU - Release|Any CPU = Release|Any CPU - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {086093FD-D444-48BE-B897-7FAC14133C23}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {086093FD-D444-48BE-B897-7FAC14133C23}.Debug|Any CPU.Build.0 = Debug|Any CPU - {086093FD-D444-48BE-B897-7FAC14133C23}.Release|Any CPU.ActiveCfg = Release|Any CPU - {086093FD-D444-48BE-B897-7FAC14133C23}.Release|Any CPU.Build.0 = Release|Any CPU - {F29DAEA8-7267-4230-B2F6-70062D2E890B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {F29DAEA8-7267-4230-B2F6-70062D2E890B}.Debug|Any CPU.Build.0 = Debug|Any CPU - {F29DAEA8-7267-4230-B2F6-70062D2E890B}.Release|Any CPU.ActiveCfg = Release|Any CPU - {F29DAEA8-7267-4230-B2F6-70062D2E890B}.Release|Any CPU.Build.0 = Release|Any CPU - {0065E899-596D-4B16-9CF5-E917DD957DAB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {0065E899-596D-4B16-9CF5-E917DD957DAB}.Debug|Any CPU.Build.0 = Debug|Any CPU - {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.ActiveCfg = Release|Any CPU - {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.Build.0 = Release|Any CPU - {87059512-913A-4B3D-BD07-C13419852B81}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {87059512-913A-4B3D-BD07-C13419852B81}.Debug|Any CPU.Build.0 = Debug|Any CPU - {87059512-913A-4B3D-BD07-C13419852B81}.Release|Any CPU.ActiveCfg = Release|Any CPU - {87059512-913A-4B3D-BD07-C13419852B81}.Release|Any CPU.Build.0 = Release|Any CPU - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(ExtensibilityGlobals) = postSolution - SolutionGuid = {D0C27F1D-550D-403C-9243-107D171EE003} - EndGlobalSection -EndGlobal From cf1b419dfbf87809636d925f6d152c140ead91dd Mon Sep 17 00:00:00 2001 From: Steve Temple Date: Mon, 22 Nov 2021 19:52:35 +0000 Subject: [PATCH 4/6] 4.0.0 --- VERSION.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION.txt b/VERSION.txt index 56fea8a..0c89fc9 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -3.0.0 \ No newline at end of file +4.0.0 \ No newline at end of file From d3ffd6aed02c153dc91fb8a44835e135afc3dba7 Mon Sep 17 00:00:00 2001 From: Steve Temple Date: Tue, 23 Nov 2021 09:55:02 +0000 Subject: [PATCH 5/6] Updated to .net 4.8 and get everything running --- .gitignore | 4 +- .../MultiFacetLucene.Tests.csproj | 2 +- MultiFacetLucene/FacetSearcher.cs | 10 +- MultiFacetLucene/MultiFacetLucene.csproj | 3 +- PerformanceTest/PerformanceTest.csproj | 183 +++++++++++++++--- PerformanceTest/Program.cs | 20 +- PerformanceTest/packages.config | 58 +++++- 7 files changed, 230 insertions(+), 50 deletions(-) diff --git a/.gitignore b/.gitignore index 1bc915c..7e61028 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ *.suo *.user *.sln.docstates +.vs # Build results @@ -96,8 +97,7 @@ publish/ *.Publish.xml # NuGet Packages Directory -## TODO: If you have NuGet Package Restore enabled, uncomment the next line -#packages/ +packages/ # Windows Azure Build Output csx diff --git a/MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj b/MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj index f542322..2fffdbf 100644 --- a/MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj +++ b/MultiFacetLucene.Tests/MultiFacetLucene.Tests.csproj @@ -15,7 +15,7 @@ - + diff --git a/MultiFacetLucene/FacetSearcher.cs b/MultiFacetLucene/FacetSearcher.cs index e4c0f64..4b0bbd6 100644 --- a/MultiFacetLucene/FacetSearcher.cs +++ b/MultiFacetLucene/FacetSearcher.cs @@ -15,25 +15,25 @@ public class FacetSearcher : IndexSearcher { private readonly ConcurrentDictionary _facetBitSetDictionary = new ConcurrentDictionary(); - public FacetSearcher(IndexReader reader, FacetSearcherConfiguration? facetSearcherConfiguration = null) + public FacetSearcher(IndexReader reader, FacetSearcherConfiguration facetSearcherConfiguration = null) : base(reader) { Initialize(facetSearcherConfiguration); } - public FacetSearcher(IndexReader reader, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) + public FacetSearcher(IndexReader reader, TaskScheduler scheduler, FacetSearcherConfiguration facetSearcherConfiguration = null) : base(reader, scheduler) { Initialize(facetSearcherConfiguration); } - public FacetSearcher(IndexReaderContext context, FacetSearcherConfiguration? facetSearcherConfiguration = null) + public FacetSearcher(IndexReaderContext context, FacetSearcherConfiguration facetSearcherConfiguration = null) : base(context) { Initialize(facetSearcherConfiguration); } - public FacetSearcher(IndexReaderContext context, TaskScheduler scheduler, FacetSearcherConfiguration? facetSearcherConfiguration = null) + public FacetSearcher(IndexReaderContext context, TaskScheduler scheduler, FacetSearcherConfiguration facetSearcherConfiguration = null) : base(context, scheduler) { Initialize(facetSearcherConfiguration); @@ -41,7 +41,7 @@ public FacetSearcher(IndexReaderContext context, TaskScheduler scheduler, FacetS public FacetSearcherConfiguration FacetSearcherConfiguration { get; protected set; } - private void Initialize(FacetSearcherConfiguration? facetSearcherConfiguration) + private void Initialize(FacetSearcherConfiguration facetSearcherConfiguration) { FacetSearcherConfiguration = facetSearcherConfiguration ?? FacetSearcherConfiguration.Default(); } diff --git a/MultiFacetLucene/MultiFacetLucene.csproj b/MultiFacetLucene/MultiFacetLucene.csproj index 1c5a951..570d3a2 100644 --- a/MultiFacetLucene/MultiFacetLucene.csproj +++ b/MultiFacetLucene/MultiFacetLucene.csproj @@ -1,8 +1,7 @@ - netstandard2.1 - enable + net48 diff --git a/PerformanceTest/PerformanceTest.csproj b/PerformanceTest/PerformanceTest.csproj index ebf123e..a15de8e 100644 --- a/PerformanceTest/PerformanceTest.csproj +++ b/PerformanceTest/PerformanceTest.csproj @@ -36,49 +36,171 @@ 4
- - ..\packages\SharpZipLib.0.86.0\lib\20\ICSharpCode.SharpZipLib.dll + + ..\packages\J2N.2.0.0-beta-0017\lib\net45\J2N.dll - - ..\packages\Lucene.Net.3.0.3\lib\NET40\Lucene.Net.dll + + ..\packages\Lucene.Net.4.8.0-beta00015\lib\net45\Lucene.Net.dll - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Analyzers.dll + + ..\packages\Lucene.Net.Analysis.Common.4.8.0-beta00015\lib\net45\Lucene.Net.Analysis.Common.dll - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Core.dll + + ..\packages\Lucene.Net.Queries.4.8.0-beta00015\lib\net45\Lucene.Net.Queries.dll - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.FastVectorHighlighter.dll + + ..\packages\Lucene.Net.QueryParser.4.8.0-beta00015\lib\net45\Lucene.Net.QueryParser.dll - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Highlighter.dll + + ..\packages\Lucene.Net.Sandbox.4.8.0-beta00015\lib\net45\Lucene.Net.Sandbox.dll - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Memory.dll + + ..\packages\Microsoft.Extensions.Configuration.Abstractions.1.1.2\lib\netstandard1.0\Microsoft.Extensions.Configuration.Abstractions.dll - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Queries.dll + + ..\packages\Microsoft.Extensions.Primitives.1.1.1\lib\netstandard1.0\Microsoft.Extensions.Primitives.dll - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Regex.dll + + ..\packages\Microsoft.Win32.Primitives.4.3.0\lib\net46\Microsoft.Win32.Primitives.dll + True + True - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.SimpleFacetedSearch.dll - - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.Snowball.dll + + + ..\packages\System.AppContext.4.3.0\lib\net463\System.AppContext.dll + True + True - - ..\packages\Lucene.Net.Contrib.3.0.3\lib\net40\Lucene.Net.Contrib.SpellChecker.dll + + + ..\packages\System.Console.4.3.0\lib\net46\System.Console.dll + True + True - + + ..\packages\System.Diagnostics.DiagnosticSource.4.3.0\lib\net46\System.Diagnostics.DiagnosticSource.dll + + + ..\packages\System.Diagnostics.Tracing.4.3.0\lib\net462\System.Diagnostics.Tracing.dll + True + True + + + ..\packages\System.Globalization.Calendars.4.3.0\lib\net46\System.Globalization.Calendars.dll + True + True + + + ..\packages\System.IO.4.3.0\lib\net462\System.IO.dll + True + True + + + ..\packages\System.IO.Compression.4.3.0\lib\net46\System.IO.Compression.dll + True + True + + + + ..\packages\System.IO.Compression.ZipFile.4.3.0\lib\net46\System.IO.Compression.ZipFile.dll + True + True + + + ..\packages\System.IO.FileSystem.4.3.0\lib\net46\System.IO.FileSystem.dll + True + True + + + ..\packages\System.IO.FileSystem.Primitives.4.3.0\lib\net46\System.IO.FileSystem.Primitives.dll + True + True + + + ..\packages\System.Linq.4.3.0\lib\net463\System.Linq.dll + True + True + + + ..\packages\System.Linq.Expressions.4.3.0\lib\net463\System.Linq.Expressions.dll + True + True + + + ..\packages\System.Net.Http.4.3.0\lib\net46\System.Net.Http.dll + True + True + + + ..\packages\System.Net.Sockets.4.3.0\lib\net46\System.Net.Sockets.dll + True + True + + + + ..\packages\System.Reflection.4.3.0\lib\net462\System.Reflection.dll + True + True + + + ..\packages\System.Runtime.4.3.0\lib\net462\System.Runtime.dll + True + True + + + ..\packages\System.Runtime.CompilerServices.Unsafe.4.3.0\lib\netstandard1.0\System.Runtime.CompilerServices.Unsafe.dll + + + ..\packages\System.Runtime.Extensions.4.3.0\lib\net462\System.Runtime.Extensions.dll + True + True + + + ..\packages\System.Runtime.InteropServices.4.3.0\lib\net463\System.Runtime.InteropServices.dll + True + True + + + ..\packages\System.Runtime.InteropServices.RuntimeInformation.4.3.0\lib\net45\System.Runtime.InteropServices.RuntimeInformation.dll + True + True + + + ..\packages\System.Security.Cryptography.Algorithms.4.3.0\lib\net463\System.Security.Cryptography.Algorithms.dll + True + True + + + ..\packages\System.Security.Cryptography.Encoding.4.3.0\lib\net46\System.Security.Cryptography.Encoding.dll + True + True + + + ..\packages\System.Security.Cryptography.Primitives.4.3.0\lib\net46\System.Security.Cryptography.Primitives.dll + True + True + + + ..\packages\System.Security.Cryptography.X509Certificates.4.3.0\lib\net461\System.Security.Cryptography.X509Certificates.dll + True + True + + + ..\packages\System.Text.RegularExpressions.4.3.0\lib\net463\System.Text.RegularExpressions.dll + True + True + + + ..\packages\System.Xml.ReaderWriter.4.3.0\lib\net46\System.Xml.ReaderWriter.dll + True + True + @@ -89,6 +211,15 @@ + + + {87059512-913a-4b3d-bd07-c13419852b81} + MultiFacetLucene + + + + + diff --git a/PerformanceTest/Program.cs b/PerformanceTest/Program.cs index e8d294e..af68854 100644 --- a/PerformanceTest/Program.cs +++ b/PerformanceTest/Program.cs @@ -6,13 +6,13 @@ using Lucene.Net.Analysis.Standard; using Lucene.Net.Documents; using Lucene.Net.Index; -using Lucene.Net.QueryParsers; +using Lucene.Net.QueryParsers.Classic; using Lucene.Net.Search; using Lucene.Net.Store; +using Lucene.Net.Util; using MultiFacetLucene; using MultiFacetLucene.Configuration; using MultiFacetLucene.Configuration.MemoryOptimizer; -using Version = Lucene.Net.Util.Version; namespace PerformanceTest { @@ -64,8 +64,7 @@ public static void Warmup() protected static IndexReader SetupIndex() { var directory = new RAMDirectory(); - var writer = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_29), true, - IndexWriter.MaxFieldLength.LIMITED); + var writer = new IndexWriter(directory, new IndexWriterConfig(LuceneVersion.LUCENE_48, new StandardAnalyzer(LuceneVersion.LUCENE_48))); for (var i = 0; i < 50000; i++) writer.AddDocument(new Document() .AddField("title", Guid.NewGuid().ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED) @@ -73,17 +72,16 @@ protected static IndexReader SetupIndex() .AddField("type", GenerateFood(), Field.Store.YES, Field.Index.NOT_ANALYZED) .AddField("type", GenerateFruit(), Field.Store.YES, Field.Index.NOT_ANALYZED) .AddField("price", "10", Field.Store.YES, Field.Index.NOT_ANALYZED)); - writer.Flush(true, true, true); - writer.Optimize(); + writer.Flush(true, true); writer.Commit(); - return IndexReader.Open(directory, true); + return DirectoryReader.Open(directory); } protected static IndexReader SetupIndexPhysicalTest() { - var directory = new DirectoryInfo(@"D:\Code\sites\SearchSite\SearchSiteWeb\App_Data\Index\Wordpress"); + var directory = new DirectoryInfo(@"c:\temp\lucene\multifacetlucene"); var index = FSDirectory.Open(directory); - return IndexReader.Open(index, true); + return DirectoryReader.Open(index); } private static void Main(string[] args) @@ -94,9 +92,9 @@ private static void Main(string[] args) var stopwatchAll = new Stopwatch(); stopwatchAll.Start(); - var queryParser = new MultiFieldQueryParser(Version.LUCENE_29, + var queryParser = new MultiFieldQueryParser(LuceneVersion.LUCENE_48, new[] {"title", "bodies"}, - new StandardAnalyzer(Version.LUCENE_29) + new StandardAnalyzer(LuceneVersion.LUCENE_48) ); var facetFieldInfos = new List diff --git a/PerformanceTest/packages.config b/PerformanceTest/packages.config index 591d3c9..a9789e6 100644 --- a/PerformanceTest/packages.config +++ b/PerformanceTest/packages.config @@ -1,6 +1,58 @@  - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file From 503c49b417abf1e3ea5e9da2e0cd73c7593dfdca Mon Sep 17 00:00:00 2001 From: Steve Temple Date: Tue, 23 Nov 2021 10:04:38 +0000 Subject: [PATCH 6/6] Fix some spacing --- .../RangeFacetBitSetCalculator.cs | 70 +++++++++---------- MultiFacetLucene/TermFacetBitSetCalulcator.cs | 42 +++++------ 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/MultiFacetLucene/RangeFacetBitSetCalculator.cs b/MultiFacetLucene/RangeFacetBitSetCalculator.cs index b484d32..1151089 100644 --- a/MultiFacetLucene/RangeFacetBitSetCalculator.cs +++ b/MultiFacetLucene/RangeFacetBitSetCalculator.cs @@ -10,49 +10,49 @@ namespace MultiFacetLucene { - public class RangeFacetBitSetCalculator : IFacetBitSetCalculator - { - private FacetSearcherConfiguration _facetSearcherConfiguration; + public class RangeFacetBitSetCalculator : IFacetBitSetCalculator + { + private FacetSearcherConfiguration _facetSearcherConfiguration; - public RangeFacetBitSetCalculator(FacetSearcherConfiguration configuration) - { - _facetSearcherConfiguration = configuration; - } + public RangeFacetBitSetCalculator(FacetSearcherConfiguration configuration) + { + _facetSearcherConfiguration = configuration; + } - public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) - { - foreach (var range in info.Ranges) - { - var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); - var cnt = bitset.Cardinality; - if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) - { - yield return - new FacetSearcher.FacetValues.FacetValueBitSet {Value = range.Id, Bitset = bitset, Count = cnt}; - } - else - { - bitset = null; - } - } - } + public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) + { + foreach (var range in info.Ranges) + { + var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); + var cnt = bitset.Cardinality; + if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) + { + yield return + new FacetSearcher.FacetValues.FacetValueBitSet { Value = range.Id, Bitset = bitset, Count = cnt }; + } + else + { + bitset = null; + } + } + } - public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) - { - var range = info.Ranges.FirstOrDefault(r => r.Id == value); - return CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); - } + public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) + { + var range = info.Ranges.FirstOrDefault(r => r.Id == value); + return CalculateOpenBitSetDisi(indexReader, info.FieldName, range.From, range.To); + } - protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string from, string to) - { - var facetQuery = new TermRangeQuery(facetAttributeFieldName, new BytesRef(from), new BytesRef(to), true, true); - var facetQueryFilter = new QueryWrapperFilter(facetQuery); + protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string from, string to) + { + var facetQuery = new TermRangeQuery(facetAttributeFieldName, new BytesRef(from), new BytesRef(to), true, true); + var facetQueryFilter = new QueryWrapperFilter(facetQuery); var disi = new OpenBitSetDISI(indexReader.MaxDoc); foreach (var leaf in indexReader.Leaves) { disi.InPlaceOr(facetQueryFilter.GetDocIdSet(leaf.AtomicReader.AtomicContext, leaf.AtomicReader.LiveDocs).GetIterator()); } return disi; - } - } + } + } } diff --git a/MultiFacetLucene/TermFacetBitSetCalulcator.cs b/MultiFacetLucene/TermFacetBitSetCalulcator.cs index f6b3388..cfcd18f 100644 --- a/MultiFacetLucene/TermFacetBitSetCalulcator.cs +++ b/MultiFacetLucene/TermFacetBitSetCalulcator.cs @@ -10,29 +10,29 @@ namespace MultiFacetLucene { - public class TermFacetBitSetCalulcator : IFacetBitSetCalculator - { - private readonly FacetSearcherConfiguration _facetSearcherConfiguration; + public class TermFacetBitSetCalulcator : IFacetBitSetCalculator + { + private readonly FacetSearcherConfiguration _facetSearcherConfiguration; - public TermFacetBitSetCalulcator(FacetSearcherConfiguration configuration) - { - _facetSearcherConfiguration = configuration; - } - - public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) + public TermFacetBitSetCalulcator(FacetSearcherConfiguration configuration) + { + _facetSearcherConfiguration = configuration; + } + + public IEnumerable GetFacetValueBitSets(IndexReader indexReader, FacetFieldInfo info) { - foreach(var leaf in indexReader.Leaves) + foreach (var leaf in indexReader.Leaves) { var termReader = leaf.AtomicReader.GetTerms(info.FieldName).GetEnumerator(); do - { + { var bitset = CalculateOpenBitSetDisi(indexReader, info.FieldName, termReader.Term.Utf8ToString()); var cnt = bitset.Cardinality; if (cnt >= _facetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) yield return new FacetSearcher.FacetValues.FacetValueBitSet - {Value = termReader.Term.Utf8ToString(), Bitset = bitset, Count = cnt}; + { Value = termReader.Term.Utf8ToString(), Bitset = bitset, Count = cnt }; else { bitset = null; @@ -42,16 +42,16 @@ public TermFacetBitSetCalulcator(FacetSearcherConfiguration configuration) } } - public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) - { - return CalculateOpenBitSetDisi(indexReader, info.FieldName, value); - } + public OpenBitSetDISI GetFacetBitSet(IndexReader indexReader, FacetFieldInfo info, string value) + { + return CalculateOpenBitSetDisi(indexReader, info.FieldName, value); + } - protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string value) - { - var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); - var facetQueryFilter = new QueryWrapperFilter(facetQuery); + protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string facetAttributeFieldName, string value) + { + var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); + var facetQueryFilter = new QueryWrapperFilter(facetQuery); var disi = new OpenBitSetDISI(indexReader.MaxDoc); foreach (var leaf in indexReader.Leaves) { @@ -64,5 +64,5 @@ protected OpenBitSetDISI CalculateOpenBitSetDisi(IndexReader indexReader, string } return disi; } - } + } }