说明
python topscoredocsupercollector示例是从最受好评的开源项目中提取的实现代码,你可以参考下面示例的使用方式。
编程语言: Python
命名空间/包名称: orgmerescolucenesearch
示例#1文件:
dedupfiltercollectortest.py项目:
jerryba/meresco-lucene
def testCollectorFiltersTwoSimilar(self):
self._addDocument("urn:1", 2, 1)
self._addDocument("urn:2", 2, 2)
tc = TopScoreDocSuperCollector(100, True)
c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
self.lucene.search(query=MatchAllDocsQuery(), collector=c)
topDocsResult = tc.topDocs(0)
self.assertEquals(1, topDocsResult.totalHits)
self.assertEquals(1, len(topDocsResult.scoreDocs))
docId = topDocsResult.scoreDocs[0].doc
key = c.keyForDocId(docId)
identifier = self.lucene._index.getDocument(key.getDocId()).get(IDFIELD)
self.assertEquals('urn:2', identifier)
self.assertEquals(2, key.count)
示例#2文件:
dedupfiltercollectortest.py项目:
jerryba/meresco-lucene
def testShouldAddResultsWithoutIsFormatOf(self):
self._addDocument("urn:1", 2)
self._addDocument("urn:2", None)
self._addDocument("urn:3", 2)
self._addDocument("urn:4", None)
self._addDocument("urn:5", None)
self._addDocument("urn:6", None)
self._addDocument("urn:7", None)
self._addDocument("urn:8", None)
self._addDocument("urn:9", None)
self._addDocument("urn:A", None)
self._addDocument("urn:B", None) # trigger a merge
tc = TopScoreDocSuperCollector(100, True)
c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
self.lucene.search(query=MatchAllDocsQuery(), collector=c)
self.assertEquals(10, tc.topDocs(0).totalHits)
示例#3文件:
dedupfiltercollectortest.py项目:
jerryba/meresco-lucene
def testCollectorFiltersTwoTimesTwoSimilarOneNot(self):
self._addDocument("urn:1", 1, 2001)
self._addDocument("urn:2", 3, 2009) # result 2x
self._addDocument("urn:3", 50, 2010) # result 1x
self._addDocument("urn:4", 3, 2001)
self._addDocument("urn:5", 1, 2009) # result 2x
#expected: "urn:2', "urn:3" and "urn:5" in no particular order
tc = TopScoreDocSuperCollector(100, True)
c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
self.lucene.search(query=MatchAllDocsQuery(), collector=c)
topDocsResult = tc.topDocs(0)
self.assertEquals(3, topDocsResult.totalHits)
self.assertEquals(3, len(topDocsResult.scoreDocs))
rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs]
netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds]
identifiers = set(self.lucene._index.getDocument(doc).get(IDFIELD) for doc in netDocIds)
self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers)
self.assertEquals([1,2,2], list(sorted(c.keyForDocId(d).count for d in netDocIds)))
示例#4文件:
dedupfiltercollectortest.py项目:
jerryba/meresco-lucene
def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self):
self._addDocument("urn:1", 2)
tc = TopScoreDocSuperCollector(100, True)
c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc)
self.lucene.search(query=MatchAllDocsQuery(), collector=c)
self.assertEquals(1, tc.topDocs(0).totalHits)
示例#5文件:
dedupfiltercollectortest.py项目:
jerryba/meresco-lucene
def testCollectorTransparentlyDelegatesToNextCollector(self):
self._addDocument("urn:1", 2)
tc = TopScoreDocSuperCollector(100, True)
c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
self.lucene.search(query=MatchAllDocsQuery(), collector=c)
self.assertEquals(1, tc.topDocs(0).totalHits)