Python TopScoreDocSuperCollector Example

说明

python topscoredocsupercollector示例是从最受好评的开源项目中提取的实现代码,你可以参考下面示例的使用方式。

编程语言: Python

命名空间/包名称: orgmerescolucenesearch

示例#1
文件: dedupfiltercollectortest.py项目: jerryba/meresco-lucene

    def testCollectorFiltersTwoSimilar(self):
        self._addDocument("urn:1", 2, 1)
        self._addDocument("urn:2", 2, 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(1, topDocsResult.totalHits)
        self.assertEquals(1, len(topDocsResult.scoreDocs))

        docId = topDocsResult.scoreDocs[0].doc
        key = c.keyForDocId(docId)
        identifier = self.lucene._index.getDocument(key.getDocId()).get(IDFIELD)
        self.assertEquals('urn:2', identifier)
        self.assertEquals(2, key.count)

示例#2
文件: dedupfiltercollectortest.py项目: jerryba/meresco-lucene

 def testShouldAddResultsWithoutIsFormatOf(self):
     self._addDocument("urn:1", 2)
     self._addDocument("urn:2", None)
     self._addDocument("urn:3", 2)
     self._addDocument("urn:4", None)
     self._addDocument("urn:5", None)
     self._addDocument("urn:6", None)
     self._addDocument("urn:7", None)
     self._addDocument("urn:8", None)
     self._addDocument("urn:9", None)
     self._addDocument("urn:A", None)
     self._addDocument("urn:B", None) # trigger a merge
     tc = TopScoreDocSuperCollector(100, True)
     c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
     self.lucene.search(query=MatchAllDocsQuery(), collector=c)
     self.assertEquals(10, tc.topDocs(0).totalHits)

示例#3
文件: dedupfiltercollectortest.py项目: jerryba/meresco-lucene

 def testCollectorFiltersTwoTimesTwoSimilarOneNot(self):
     self._addDocument("urn:1",  1, 2001)
     self._addDocument("urn:2",  3, 2009) # result 2x
     self._addDocument("urn:3", 50, 2010) # result 1x
     self._addDocument("urn:4",  3, 2001)
     self._addDocument("urn:5",  1, 2009) # result 2x
     #expected: "urn:2', "urn:3" and "urn:5" in no particular order
     tc = TopScoreDocSuperCollector(100, True)
     c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
     self.lucene.search(query=MatchAllDocsQuery(), collector=c)
     topDocsResult = tc.topDocs(0)
     self.assertEquals(3, topDocsResult.totalHits)
     self.assertEquals(3, len(topDocsResult.scoreDocs))
     rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs]
     netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds]
     identifiers = set(self.lucene._index.getDocument(doc).get(IDFIELD) for doc in netDocIds)
     self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers)
     self.assertEquals([1,2,2], list(sorted(c.keyForDocId(d).count for d in netDocIds)))

示例#4
文件: dedupfiltercollectortest.py项目: jerryba/meresco-lucene

 def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self):
     self._addDocument("urn:1", 2)
     tc = TopScoreDocSuperCollector(100, True)
     c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc)
     self.lucene.search(query=MatchAllDocsQuery(), collector=c)
     self.assertEquals(1, tc.topDocs(0).totalHits)

示例#5
文件: dedupfiltercollectortest.py项目: jerryba/meresco-lucene

 def testCollectorTransparentlyDelegatesToNextCollector(self):
     self._addDocument("urn:1", 2)
     tc = TopScoreDocSuperCollector(100, True)
     c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
     self.lucene.search(query=MatchAllDocsQuery(), collector=c)
     self.assertEquals(1, tc.topDocs(0).totalHits)

展开阅读全文