#!/usr/bin/env python## Simple command-line search script.## Copyright (C) 2003 James Aylett# Copyright (C) 2004,2007,2009,2013 Olly Betts## This program is free software; you can redistribute it and/or# modify it under the terms of the GNU General Public License as# published by the Free Software Foundation; either version 2 of the# License, or (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.## You should have received a copy of the GNU General Public License# along with this program; if not, write to the Free Software# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301# USAimportsysimportxapian# We require at least two command line arguments.iflen(sys.argv)<3:print("Usage: %s PATH_TO_DATABASE QUERY"%sys.argv[0],file=sys.stderr)sys.exit(1)try:# Open the database for searching.database=xapian.Database(sys.argv[1])# Start an enquire session.enquire=xapian.Enquire(database)# Combine the rest of the command line arguments with spaces between# them, so that simple queries don't have to be quoted at the shell# level.query_string=str.join(' ',sys.argv[2:])# Parse the query string to produce a Xapian::Query object.qp=xapian.QueryParser()stemmer=xapian.Stem("english")qp.set_stemmer(stemmer)qp.set_database(database)qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)query=qp.parse_query(query_string)print("Parsed query is: %s"%str(query))# Find the top 10 results for the query.enquire.set_query(query)matches=enquire.get_mset(0,10)# Display the results.print("%i results found."%matches.get_matches_estimated())print("Results 1-%i:"%matches.size())forminmatches:print("%i: %i%% docid=%i [%s]"%(m.rank+1,m.percent,m.docid,m.document.get_data().decode('utf-8')))exceptExceptionase:print("Exception: %s"%str(e),file=sys.stderr)sys.exit(1)
#!/usr/bin/env python## Index each paragraph of a text file as a Xapian document.## Copyright (C) 2003 James Aylett# Copyright (C) 2004,2007,2013,2014 Olly Betts## This program is free software; you can redistribute it and/or# modify it under the terms of the GNU General Public License as# published by the Free Software Foundation; either version 2 of the# License, or (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.## You should have received a copy of the GNU General Public License# along with this program; if not, write to the Free Software# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301# USAimportsysimportxapianimportstringiflen(sys.argv)!=2:print("Usage: %s PATH_TO_DATABASE"%sys.argv[0],file=sys.stderr)sys.exit(1)try:# Open the database for update, creating a new database if necessary.database=xapian.WritableDatabase(sys.argv[1],xapian.DB_CREATE_OR_OPEN)indexer=xapian.TermGenerator()stemmer=xapian.Stem("english")indexer.set_stemmer(stemmer)para=''try:forlineinsys.stdin:line=line.strip()ifline=='':ifpara!='':# We've reached the end of a paragraph, so index it.doc=xapian.Document()doc.set_data(para)indexer.set_document(doc)indexer.index_text(para)# Add the document to the database.database.add_document(doc)para=''else:ifpara!='':para+=' 'para+=lineexceptStopIteration:passexceptExceptionase:print("Exception: %s"%str(e),file=sys.stderr)sys.exit(1)
#!/usr/bin/env python## Simple example script demonstrating query expansion.## Copyright (C) 2003 James Aylett# Copyright (C) 2004,2006,2007,2012,2013,2014 Olly Betts## This program is free software; you can redistribute it and/or# modify it under the terms of the GNU General Public License as# published by the Free Software Foundation; either version 2 of the# License, or (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.## You should have received a copy of the GNU General Public License# along with this program; if not, write to the Free Software# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301# USAimportsysimportxapian# We require at least two command line arguments.iflen(sys.argv)<3:print("Usage: %s PATH_TO_DATABASE QUERY [-- [DOCID...]]"%sys.argv[0],file=sys.stderr)sys.exit(1)try:# Open the database for searching.database=xapian.Database(sys.argv[1])# Start an enquire session.enquire=xapian.Enquire(database)# Combine command line arguments up to "--" with spaces between# them, so that simple queries don't have to be quoted at the shell# level.query_string=sys.argv[2]index=3whileindex<len(sys.argv):arg=sys.argv[index]index+=1ifarg=='--':# Passed marker, move to parsing relevant docids.breakquery_string+=' 'query_string+=arg# Create an RSet with the listed docids in.reldocs=xapian.RSet()forindexinrange(index,len(sys.argv)):reldocs.add_document(int(sys.argv[index]))# Parse the query string to produce a Xapian::Query object.qp=xapian.QueryParser()stemmer=xapian.Stem("english")qp.set_stemmer(stemmer)qp.set_database(database)qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)query=qp.parse_query(query_string)ifnotquery.empty():print("Parsed query is: %s"%str(query))# Find the top 10 results for the query.enquire.set_query(query)matches=enquire.get_mset(0,10,reldocs)# Display the results.print("%i results found."%matches.get_matches_estimated())print("Results 1-%i:"%matches.size())forminmatches:print("%i: %i%% docid=%i [%s]"%(m.rank+1,m.percent,m.docid,m.document.get_data()))# Put the top 5 (at most) docs into the rset if rset is emptyifreldocs.empty():rel_count=0forminmatches:reldocs.add_document(m.docid)rel_count+=1ifrel_count==5:break# Get the suggested expand termseterms=enquire.get_eset(10,reldocs)print("%i suggested additional terms"%eterms.size())forkineterms:print("%s: %f"%(k.term,k.weight))exceptExceptionase:print("Exception: %s"%str(e),file=sys.stderr)sys.exit(1)
#!/usr/bin/env python## Simple command-line match decider example## Copyright (C) 2003 James Aylett# Copyright (C) 2004,2007,2009,2013 Olly Betts## This program is free software; you can redistribute it and/or# modify it under the terms of the GNU General Public License as# published by the Free Software Foundation; either version 2 of the# License, or (at your option) any later version.## This program is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.## You should have received a copy of the GNU General Public License# along with this program; if not, write to the Free Software# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301# USAimportsysimportxapian# This example runs a query like simplesearch does, but uses a MatchDecider# (mymatchdecider) to discard any document for which value 0 is equal to# the string passed as the second command line argument.iflen(sys.argv)<4:print("Usage: %s PATH_TO_DATABASE AVOID_VALUE QUERY"%sys.argv[0],file=sys.stderr)sys.exit(1)classmymatchdecider(xapian.MatchDecider):def__init__(self,avoidvalue):xapian.MatchDecider.__init__(self)self.avoidvalue=avoidvaluedef__call__(self,doc):returndoc.get_value(0)!=self.avoidvaluetry:# Open the database for searching.database=xapian.Database(sys.argv[1])# Start an enquire session.enquire=xapian.Enquire(database)# Combine the rest of the command line arguments with spaces between# them, so that simple queries don't have to be quoted at the shell# level.avoid_value=sys.argv[2]query_string=str.join(' ',sys.argv[3:])# Parse the query string to produce a Xapian::Query object.qp=xapian.QueryParser()stemmer=xapian.Stem("english")qp.set_stemmer(stemmer)qp.set_database(database)qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)query=qp.parse_query(query_string)print("Parsed query is: %s"%str(query))# Find the top 10 results for the query.enquire.set_query(query)mdecider=mymatchdecider(avoid_value)matches=enquire.get_mset(0,10,None,mdecider)# Display the results.print("%i results found."%matches.get_matches_estimated())print("Results 1-%i:"%matches.size())forminmatches:print("%i: %i%% docid=%i [%s]"%(m.rank+1,m.percent,m.docid,m.document.get_data()))exceptExceptionase:print("Exception: %s"%str(e),file=sys.stderr)sys.exit(1)