vocabulary.h

Go to the documentation of this file.
00001 #ifndef EPT_DEBTAGS_VOCABULARY_H
00002 #define EPT_DEBTAGS_VOCABULARY_H
00003 
00009 /*
00010  * Copyright (C) 2003,2004,2005,2006,2007  Enrico Zini <enrico@debian.org>
00011  *
00012  * This program is free software; you can redistribute it and/or modify
00013  * it under the terms of the GNU General Public License as published by
00014  * the Free Software Foundation; either version 2 of the License, or
00015  * (at your option) any later version.
00016  *
00017  * This program is distributed in the hope that it will be useful,
00018  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00019  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020  * GNU General Public License for more details.
00021  *
00022  * You should have received a copy of the GNU General Public License
00023  * along with this program; if not, write to the Free Software
00024  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00025  */
00026 
00027 #include <ept/debtags/tag.h>
00028 #include <tagcoll/diskindex/mmap.h>
00029 
00030 #include <string>
00031 #include <vector>
00032 #include <map>
00033 
00034 namespace ept {
00035 namespace debtags {
00036 
00037 class Vocabulary
00038 {
00039 public:
00040     class FacetIndex : public tagcoll::diskindex::MMap
00041     {
00042     protected:
00043         // Layout of the data in the index
00044         struct Item {
00045             int offset;
00046             int size;
00047             int firsttag;
00048             int lasttag;
00049             const char name[];
00050         };
00051         inline Item* item(int id) const
00052         {
00053             if (id >= 0 && (unsigned)id < size())
00054                 return (Item*)(m_buf + ((int*)m_buf)[id]);
00055             return NULL;
00056         }
00057 
00058     public:
00059         FacetIndex() : tagcoll::diskindex::MMap() {}
00060         FacetIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx)
00061             : tagcoll::diskindex::MMap(master, idx) {}
00062 
00064         size_t size() const { return m_size == 0 ? 0 :  *(int*)m_buf / sizeof(int); }
00066         size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; }
00068         size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; }
00070         int firsttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->firsttag; }
00072         int lasttag(int id) const { Item* i = item(id); return i == NULL ? -1 : i->lasttag; }
00074         const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; }
00076         int id(const char* name) const;
00077         int id(const std::string& name) const { return id(name.c_str()); }
00078     };
00079     
00080     class TagIndex : public tagcoll::diskindex::MMap
00081     {
00082     protected:
00083         // Layout of the data in the index
00084         struct Item {
00085             int offset;
00086             int size;
00087             int facet;
00088             const char name[];
00089         };
00090         inline Item* item(int id) const
00091         {
00092             if (id >= 0 && (unsigned)id < size())
00093                 return (Item*)(m_buf + ((int*)m_buf)[id]);
00094             return NULL;
00095         }
00096 
00097     public:
00098         TagIndex() : tagcoll::diskindex::MMap() {}
00099         TagIndex(const tagcoll::diskindex::MasterMMap& master, size_t idx)
00100             : tagcoll::diskindex::MMap(master, idx) {}
00101 
00103         size_t size() const { return m_size == 0 ? 0 : *(int*)m_buf / sizeof(int); }
00105         size_t offset(int id) const { Item* i = item(id); return i == NULL ? 0 : i->offset; }
00107         size_t size(int id) const { Item* i = item(id); return i == NULL ? 0 : i->size; }
00109         int facet(int id) const { Item* i = item(id); return i == NULL ? -1 : i->facet; }
00111         const char* name(int id) const { Item* i = item(id); return i == NULL ? "" : i->name; }
00113         int id(const char* name) const;
00114         int id(const std::string& name) const { return id(name.c_str()); }
00115     };
00116 
00117 protected:
00118     // Master MMap index container
00119     tagcoll::diskindex::MasterMMap mastermmap;
00120 
00121     time_t m_timestamp;
00122 
00123     // Mmapped vocabulary file
00124     std::string voc_fname;
00125     int voc_fd;
00126     size_t voc_size;
00127     const char* voc_buf;
00128     
00129     // Facet and tag indexes
00130     FacetIndex findex;
00131     TagIndex tindex;
00132     
00133     // Cached parsed facet and tag records
00134     mutable std::vector< std::map<std::string, std::string> > m_facetData;
00135     mutable std::vector< std::map<std::string, std::string> > m_tagData;
00136     // Empty parsed data to return when data is asked for IDs == -1
00137     std::map<std::string, std::string> emptyData;
00138 
00139     void parseVocBuf(std::map<std::string, std::string>& res, size_t ofs, size_t len) const;
00140 
00141 public:
00142     Vocabulary();
00143     ~Vocabulary();
00144 
00146     time_t timestamp() const { return m_timestamp; }
00147 
00149     bool hasData() const { return m_timestamp != 0; }
00150 
00151     const FacetIndex& facetIndex() const { return findex; }
00152     const TagIndex& tagIndex() const { return tindex; }
00153 
00157     bool hasFacet(const std::string& name) const
00158     {
00159         return findex.id(name.c_str()) != -1;
00160     }
00161 
00165     bool hasTag(const std::string& fullname) const
00166     {
00167         return tindex.id(fullname.c_str()) != -1;
00168     }
00169 
00173     Facet facetByID(int id) const;
00174 
00178     Tag tagByID(int id) const;
00179 
00180     template<typename IDS>
00181     std::set<Tag> tagsByID(const IDS& ids) const
00182     {
00183         std::set<Tag> res;
00184         for (typename IDS::const_iterator i = ids.begin();
00185                 i != ids.end(); ++i)
00186             res.insert(tagByID(*i));
00187         return res;
00188     }
00189 
00193     Facet facetByTag(int id) const { return facetByID(tindex.facet(id)); }
00194 
00198     Facet facetByName(const std::string& name) const { return facetByID(findex.id(name)); }
00199 
00203     Tag tagByName(const std::string& fullname) const { return tagByID(tindex.id(fullname)); }
00204 
00208     std::set< Facet > facets() const
00209     {
00210         std::set< Facet > res;
00211         for (size_t i = 0; i < findex.size(); i++)
00212             res.insert(facetByID(i));
00213         return res;
00214     }
00215 
00219     std::set< Tag > tags() const
00220     {
00221         std::set< Tag > res;
00222         for (size_t i = 0; i < tindex.size(); i++)
00223             res.insert(tagByID(i));
00224         return res;
00225     }
00226 
00230     std::set< Tag > tags(int facet) const
00231     {
00232         std::set< Tag > res;
00233         for (int i = findex.firsttag(facet); i != -1 && i <= findex.lasttag(facet); i++)
00234             res.insert(tagByID(i));
00235         return res;
00236     }
00237 
00238     std::set< Tag > tags(const std::string& facetName) const
00239     {
00240         return tags(findex.id(facetName));
00241     }
00242 
00243     std::set< Tag > tags(const Facet& facet) const
00244     {
00245         return tags(facet.id());
00246     }
00247 
00248 #if 0
00250     const DerivedTagList& getEquations() const throw () { return equations; }
00251     
00253     FacetSet facets(const FacetMatcher& filter) const throw () { return getFiltered(filter); }
00254 #endif
00255 
00256 #if 0
00257     // These functions are here just to be used by Facet and Tag.  I'm not
00258     // making them private because I don't want Facet and Tag to access other
00259     // Vocabulary member, and other classes can't use these anyway as Facet::Data and
00260     // Tag::Data are protected
00261     const Facet::Data& facetData(int idx) { return m_facets[idx]; }
00262     const Tag::Data& tagData(int idx) { return m_tags[idx]; }
00263 #endif
00264 
00266     std::string facetName(int id) const { return findex.name(id); }
00267 
00269     std::string tagName(int id) const { return tindex.name(id); }
00270 
00272     std::string tagShortName(int id) const;
00273 
00274     const std::map<std::string, std::string>& facetData(int id) const;
00275     const std::map<std::string, std::string>& tagData(int id) const;
00276 };
00277 
00278 }
00279 }
00280 
00281 // vim:set ts=4 sw=4:
00282 #endif

Generated on Fri Sep 14 23:09:13 2007 for libept by  doxygen 1.5.3