More Related Content Similar to Activate 2019: Tweaking the Base Score: Lucene/Solr Similarities Explained (20) Activate 2019: Tweaking the Base Score: Lucene/Solr Similarities Explained2. Tweaking the Base Score:
Lucene/Solr Similarities Explained
Demo: github.com/sematext/activate/tree/master/2019
More info: sematext.com/blog/search-relevance-solr-elasticsearch-similarity
Radu
Gheorghe
Rafaล
Kuฤ
www.sematext.com
3. Agenda
BM25 - Best Match: the default
DFR - Divergence From Randomness framework
DFI - Divergence From Independence
IB - Information-Based models
LM - Language Models
Custom similarity
Putting it all together
5. BM25 - the TF part
freq / (freq + k1 * (1 - b + b * dl / avgdl))
Best for Most ๐
11. tf * c * avgFieldLength / docFieldLength
Divergence From Randomness - H1
13. tf * log2
(1 + c * (avgFieldLength / docFieldLength))
Divergence From Randomness - H2
17. (tf * mu * ((totalTermFreq + 1) / (#๏ฌeldTokens + 1)))
(docFieldLength + mu) * mu
Divergence From Randomness - H3
24. DFI demo
Oh, but donโt remove
stopwords*!
1) arbitrarily chops ๏ฌeld length
2) stopwords arenโt always
stopwords ;)
30. Information Based - Retrieval Function
the average of the document information brought
by each query term
31. Information Based - Retrieval Function - DF
number of matching documents
(docFrequency + 1) / (numberOfDocuments + 1)
32. Information Based - Retrieval Function - TTF
total number of term occurrences
(totalTermFrequency + 1) / (numberOfDocuments + 1)
41. Custom Similarity - Activate Similarity Factory
public class ActivateSimilarityFactory extends SimilarityFactory {
private volatile Similarity similarity;
public void init(SolrParams params) {
super.init(params);
}
public Similarity getSimilarity() {
if (similarity == null) {
similarity = new ActivateSimilarity();
}
return similarity;
}
}
42. Custom Similarity - Activate Similarity Factory
public class ActivateSimilarityFactory extends SimilarityFactory {
private volatile Similarity similarity;
public void init(SolrParams params) {
super.init(params);
}
public Similarity getSimilarity() {
if (similarity == null) {
similarity = new ActivateSimilarity();
}
return similarity;
}
}
43. Custom Similarity - Activate Similarity Factory
public class ActivateSimilarityFactory extends SimilarityFactory {
private volatile Similarity similarity;
public void init(SolrParams params) {
super.init(params);
}
public Similarity getSimilarity() {
if (similarity == null) {
similarity = new ActivateSimilarity();
}
return similarity;
}
}
44. Custom Similarity - Similarity
public class ActivateSimilarity extends Similarity {
public ActivateSimilarity() {}
public long computeNorm(FieldInvertState state) { return 1; }
public Similarity.SimScorer scorer(float boost,
CollectionStatistics collectionStats, TermStatistics... termStats) {
return new ActivateSimScorer();
}
}
45. Custom Similarity - Similarity
public class ActivateSimilarity extends Similarity {
public ActivateSimilarity() {}
public long computeNorm(FieldInvertState state) { return 1; }
public Similarity.SimScorer scorer(float boost,
CollectionStatistics collectionStats, TermStatistics... termStats) {
return new ActivateSimScorer();
}
}
46. Custom Similarity - Similarity
public class ActivateSimilarity extends Similarity {
public ActivateSimilarity() {}
public long computeNorm(FieldInvertState state) { return 1; }
public Similarity.SimScorer scorer(float boost,
CollectionStatistics collectionStats, TermStatistics... termStats) {
return new ActivateSimScorer();
}
}
47. Custom Similarity - SimScorer
public class ActivateSimScorer extends Similarity.SimScorer {
public float score(float freq, long norm) {
return freq;
}
}
48. Custom Similarity - SimScorer
public class ActivateSimScorer extends Similarity.SimScorer {
public float score(float freq, long norm) {
return freq;
}
}