00001 #ifndef EPT_TEXTSEARCH_TEXTSEARCH_H 00002 #define EPT_TEXTSEARCH_TEXTSEARCH_H 00003 00009 /* 00010 * Copyright (C) 2007 Enrico Zini <enrico@debian.org> 00011 * 00012 * This program is free software; you can redistribute it and/or modify 00013 * it under the terms of the GNU General Public License as published by 00014 * the Free Software Foundation; either version 2 of the License, or 00015 * (at your option) any later version. 00016 * 00017 * This program is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU General Public License for more details. 00021 * 00022 * You should have received a copy of the GNU General Public License 00023 * along with this program; if not, write to the Free Software 00024 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00025 */ 00026 00027 #include <xapian.h> 00028 #include <vector> 00029 #include <string> 00030 00031 namespace ept { 00032 namespace apt { 00033 class Apt; 00034 } 00035 namespace debtags { 00036 class Debtags; 00037 } 00038 namespace textsearch { 00039 00040 /* 00041 Fallback on apt scan searches when index is not present 00042 00043 Explicitly decide at instantiation (or at any other time) if a rebuild should 00044 be performed. Just adding a 'rebuildIfNeeded' method would be enough. 00045 00046 17:14 #xapian < enrico> Hello. I'm finally in a position of writing a library to maintain 00047 a xapian index with Debian package descriptions in a Debian system 00048 17:14 #xapian < enrico> I have a question, though 00049 17:14 #xapian < enrico> The descriptions change regularly as people run 'apt-get update' 00050 17:15 #xapian < enrico> I'd need to have a way to update the description index after 00051 apt-get update, without rebuilding it from scratch 00052 17:15 #xapian < enrico> Is there some documentation on how to do that? I can't exactly 00053 tell Xapian "the new description for package foo is this" because 00054 I'd need the xapian id 00055 19:11 #xapian < omega> you can add a unique term with a boolean prefix? 00056 19:11 #xapian < omega> like Qpackage-name 00057 19:11 #xapian < omega> then you search for it and replace_document 00058 19:24 #xapian < richardb> Or indeed, you use the "replace_document()" form which takes a 00059 unique_id term. 00060 19:25 #xapian < richardb> Xapian::docid replace_document(const std::string & 00061 unique_term, 00062 19:25 #xapian < richardb> const Xapian::Document & 00063 document); 00064 19:43 #xapian < enrico> unique term 00065 19:43 #xapian < enrico> nice! 00066 19:44 #xapian < enrico> can I use a non-alpha prefix, like :package-name ? 00067 19:45 #xapian < enrico> or pkg:package-name 00068 19:45 #xapian < enrico> I suppose I can 00069 */ 00070 00085 class TextSearch 00086 { 00087 protected: 00088 time_t m_timestamp; 00089 Xapian::Database m_db; 00090 Xapian::Stem m_stem; 00091 00093 static std::string toLower(const std::string& str); 00094 00101 void normalize_and_add(Xapian::Document& doc, const std::string& term, int& pos) const; 00102 00103 public: 00104 TextSearch(); 00105 00107 Xapian::Database& db() { return m_db; } 00108 00110 const Xapian::Database& db() const { return m_db; } 00111 00113 time_t timestamp() const { return m_timestamp; } 00114 00116 bool hasData() const { return m_timestamp > 0; } 00117 00119 bool needsRebuild(apt::Apt& apt); 00120 00126 bool rebuildIfNeeded(apt::Apt& apt); 00127 00135 bool rebuildIfNeeded(apt::Apt& apt, const debtags::Debtags& debtags); 00136 00140 Xapian::docid docidByName(const std::string& pkgname) const; 00141 00145 Xapian::Query makeORQuery(const std::string& keywords) const; 00146 00153 Xapian::Query makePartialORQuery(const std::string& keywords) const; 00154 00158 template<typename ITER> 00159 Xapian::Query makeORQuery(const ITER& begin, const ITER& end) const 00160 { 00161 std::vector<std::string> terms; 00162 // Insert both the lowercased and the stemmed lowercased query terms 00163 for (ITER i = begin; i != end; ++i) 00164 { 00165 std::string t = toLower(*i); 00166 std::string s = m_stem(t); 00167 terms.push_back(t); 00168 if (s != t) 00169 terms.push_back(s); 00170 } 00171 return Xapian::Query(Xapian::Query::OP_OR, terms.begin(), terms.end()); 00172 } 00173 00175 std::vector<std::string> expand(Xapian::Enquire& enq) const; 00176 00177 // std::vector<std::string> similar(const std::string& pkg); 00178 00182 Xapian::Query makeRelatedQuery(const std::string& pkgname) const; 00183 }; 00184 00185 } 00186 } 00187 00188 // vim:set ts=4 sw=4: 00189 #endif