TemplatedVocabulary.h 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665
  1. /**
  2. * This is a modified version of TemplatedVocabulary.h from DBoW2 (see below).
  3. * Added functions: Save and Load from text files without using cv::FileStorage.
  4. * Date: August 2015
  5. * Raúl Mur-Artal
  6. */
  7. /**
  8. * File: TemplatedVocabulary.h
  9. * Date: February 2011
  10. * Author: Dorian Galvez-Lopez
  11. * Description: templated vocabulary
  12. * License: see the LICENSE.txt file
  13. *
  14. */
  15. #ifndef __D_T_TEMPLATED_VOCABULARY__
  16. #define __D_T_TEMPLATED_VOCABULARY__
  17. #include <cassert>
  18. #include <vector>
  19. #include <numeric>
  20. #include <fstream>
  21. #include <string>
  22. #include <algorithm>
  23. #include <opencv2/core/core.hpp>
  24. #include <limits>
  25. #include "FeatureVector.h"
  26. #include "BowVector.h"
  27. #include "ScoringObject.h"
  28. #include "../DUtils/Random.h"
  29. using namespace std;
  30. namespace DBoW2 {
  31. /// @param TDescriptor class of descriptor
  32. /// @param F class of descriptor functions
  33. template<class TDescriptor, class F>
  34. /// Generic Vocabulary
  35. class TemplatedVocabulary
  36. {
  37. public:
  38. /**
  39. * Initiates an empty vocabulary
  40. * @param k branching factor
  41. * @param L depth levels
  42. * @param weighting weighting type
  43. * @param scoring scoring type
  44. */
  45. TemplatedVocabulary(int k = 10, int L = 5,
  46. WeightingType weighting = TF_IDF, ScoringType scoring = L1_NORM);
  47. /**
  48. * Creates the vocabulary by loading a file
  49. * @param filename
  50. */
  51. TemplatedVocabulary(const std::string &filename);
  52. /**
  53. * Creates the vocabulary by loading a file
  54. * @param filename
  55. */
  56. TemplatedVocabulary(const char *filename);
  57. /**
  58. * Copy constructor
  59. * @param voc
  60. */
  61. TemplatedVocabulary(const TemplatedVocabulary<TDescriptor, F> &voc);
  62. /**
  63. * Destructor
  64. */
  65. virtual ~TemplatedVocabulary();
  66. /**
  67. * Assigns the given vocabulary to this by copying its data and removing
  68. * all the data contained by this vocabulary before
  69. * @param voc
  70. * @return reference to this vocabulary
  71. */
  72. TemplatedVocabulary<TDescriptor, F>& operator=(
  73. const TemplatedVocabulary<TDescriptor, F> &voc);
  74. /**
  75. * Creates a vocabulary from the training features with the already
  76. * defined parameters
  77. * @param training_features
  78. */
  79. virtual void create
  80. (const std::vector<std::vector<TDescriptor> > &training_features);
  81. /**
  82. * Creates a vocabulary from the training features, setting the branching
  83. * factor and the depth levels of the tree
  84. * @param training_features
  85. * @param k branching factor
  86. * @param L depth levels
  87. */
  88. virtual void create
  89. (const std::vector<std::vector<TDescriptor> > &training_features,
  90. int k, int L);
  91. /**
  92. * Creates a vocabulary from the training features, setting the branching
  93. * factor nad the depth levels of the tree, and the weighting and scoring
  94. * schemes
  95. */
  96. virtual void create
  97. (const std::vector<std::vector<TDescriptor> > &training_features,
  98. int k, int L, WeightingType weighting, ScoringType scoring);
  99. /**
  100. * Returns the number of words in the vocabulary
  101. * @return number of words
  102. */
  103. virtual inline unsigned int size() const;
  104. /**
  105. * Returns whether the vocabulary is empty (i.e. it has not been trained)
  106. * @return true iff the vocabulary is empty
  107. */
  108. virtual inline bool empty() const;
  109. /**
  110. * Transforms a set of descriptores into a bow vector
  111. * @param features
  112. * @param v (out) bow vector of weighted words
  113. */
  114. virtual void transform(const std::vector<TDescriptor>& features, BowVector &v)
  115. const;
  116. /**
  117. * Transform a set of descriptors into a bow vector and a feature vector
  118. * @param features
  119. * @param v (out) bow vector
  120. * @param fv (out) feature vector of nodes and feature indexes
  121. * @param levelsup levels to go up the vocabulary tree to get the node index
  122. */
  123. virtual void transform(const std::vector<TDescriptor>& features,
  124. BowVector &v, FeatureVector &fv, int levelsup) const;
  125. /**
  126. * Transforms a single feature into a word (without weight)
  127. * @param feature
  128. * @return word id
  129. */
  130. virtual WordId transform(const TDescriptor& feature) const;
  131. /**
  132. * Returns the score of two vectors
  133. * @param a vector
  134. * @param b vector
  135. * @return score between vectors
  136. * @note the vectors must be already sorted and normalized if necessary
  137. */
  138. inline double score(const BowVector &a, const BowVector &b) const;
  139. /**
  140. * Returns the id of the node that is "levelsup" levels from the word given
  141. * @param wid word id
  142. * @param levelsup 0..L
  143. * @return node id. if levelsup is 0, returns the node id associated to the
  144. * word id
  145. */
  146. virtual NodeId getParentNode(WordId wid, int levelsup) const;
  147. /**
  148. * Returns the ids of all the words that are under the given node id,
  149. * by traversing any of the branches that goes down from the node
  150. * @param nid starting node id
  151. * @param words ids of words
  152. */
  153. void getWordsFromNode(NodeId nid, std::vector<WordId> &words) const;
  154. /**
  155. * Returns the branching factor of the tree (k)
  156. * @return k
  157. */
  158. inline int getBranchingFactor() const { return m_k; }
  159. /**
  160. * Returns the depth levels of the tree (L)
  161. * @return L
  162. */
  163. inline int getDepthLevels() const { return m_L; }
  164. /**
  165. * Returns the real depth levels of the tree on average
  166. * @return average of depth levels of leaves
  167. */
  168. float getEffectiveLevels() const;
  169. /**
  170. * Returns the descriptor of a word
  171. * @param wid word id
  172. * @return descriptor
  173. */
  174. virtual inline TDescriptor getWord(WordId wid) const;
  175. /**
  176. * Returns the weight of a word
  177. * @param wid word id
  178. * @return weight
  179. */
  180. virtual inline WordValue getWordWeight(WordId wid) const;
  181. /**
  182. * Returns the weighting method
  183. * @return weighting method
  184. */
  185. inline WeightingType getWeightingType() const { return m_weighting; }
  186. /**
  187. * Returns the scoring method
  188. * @return scoring method
  189. */
  190. inline ScoringType getScoringType() const { return m_scoring; }
  191. /**
  192. * Changes the weighting method
  193. * @param type new weighting type
  194. */
  195. inline void setWeightingType(WeightingType type);
  196. /**
  197. * Changes the scoring method
  198. * @param type new scoring type
  199. */
  200. void setScoringType(ScoringType type);
  201. /**
  202. * Loads the vocabulary from a text file
  203. * @param filename
  204. */
  205. bool loadFromTextFile(const std::string &filename);
  206. /**
  207. * Saves the vocabulary into a text file
  208. * @param filename
  209. */
  210. void saveToTextFile(const std::string &filename) const;
  211. /**
  212. * Saves the vocabulary into a file
  213. * @param filename
  214. */
  215. void save(const std::string &filename) const;
  216. /**
  217. * Loads the vocabulary from a file
  218. * @param filename
  219. */
  220. void load(const std::string &filename);
  221. /**
  222. * Saves the vocabulary to a file storage structure
  223. * @param fn node in file storage
  224. */
  225. virtual void save(cv::FileStorage &fs,
  226. const std::string &name = "vocabulary") const;
  227. /**
  228. * Loads the vocabulary from a file storage node
  229. * @param fn first node
  230. * @param subname name of the child node of fn where the tree is stored.
  231. * If not given, the fn node is used instead
  232. */
  233. virtual void load(const cv::FileStorage &fs,
  234. const std::string &name = "vocabulary");
  235. /**
  236. * Stops those words whose weight is below minWeight.
  237. * Words are stopped by setting their weight to 0. There are not returned
  238. * later when transforming image features into vectors.
  239. * Note that when using IDF or TF_IDF, the weight is the idf part, which
  240. * is equivalent to -log(f), where f is the frequency of the word
  241. * (f = Ni/N, Ni: number of training images where the word is present,
  242. * N: number of training images).
  243. * Note that the old weight is forgotten, and subsequent calls to this
  244. * function with a lower minWeight have no effect.
  245. * @return number of words stopped now
  246. */
  247. virtual int stopWords(double minWeight);
  248. protected:
  249. /// Pointer to descriptor
  250. typedef const TDescriptor *pDescriptor;
  251. /// Tree node
  252. struct Node
  253. {
  254. /// Node id
  255. NodeId id;
  256. /// Weight if the node is a word
  257. WordValue weight;
  258. /// Children
  259. vector<NodeId> children;
  260. /// Parent node (undefined in case of root)
  261. NodeId parent;
  262. /// Node descriptor
  263. TDescriptor descriptor;
  264. /// Word id if the node is a word
  265. WordId word_id;
  266. /**
  267. * Empty constructor
  268. */
  269. Node(): id(0), weight(0), parent(0), word_id(0){}
  270. /**
  271. * Constructor
  272. * @param _id node id
  273. */
  274. Node(NodeId _id): id(_id), weight(0), parent(0), word_id(0){}
  275. /**
  276. * Returns whether the node is a leaf node
  277. * @return true iff the node is a leaf
  278. */
  279. inline bool isLeaf() const { return children.empty(); }
  280. };
  281. protected:
  282. /**
  283. * Creates an instance of the scoring object accoring to m_scoring
  284. */
  285. void createScoringObject();
  286. /**
  287. * Returns a set of pointers to descriptores
  288. * @param training_features all the features
  289. * @param features (out) pointers to the training features
  290. */
  291. void getFeatures(
  292. const vector<vector<TDescriptor> > &training_features,
  293. vector<pDescriptor> &features) const;
  294. /**
  295. * Returns the word id associated to a feature
  296. * @param feature
  297. * @param id (out) word id
  298. * @param weight (out) word weight
  299. * @param nid (out) if given, id of the node "levelsup" levels up
  300. * @param levelsup
  301. */
  302. virtual void transform(const TDescriptor &feature,
  303. WordId &id, WordValue &weight, NodeId* nid = NULL, int levelsup = 0) const;
  304. /**
  305. * Returns the word id associated to a feature
  306. * @param feature
  307. * @param id (out) word id
  308. */
  309. virtual void transform(const TDescriptor &feature, WordId &id) const;
  310. /**
  311. * Creates a level in the tree, under the parent, by running kmeans with
  312. * a descriptor set, and recursively creates the subsequent levels too
  313. * @param parent_id id of parent node
  314. * @param descriptors descriptors to run the kmeans on
  315. * @param current_level current level in the tree
  316. */
  317. void HKmeansStep(NodeId parent_id, const vector<pDescriptor> &descriptors,
  318. int current_level);
  319. /**
  320. * Creates k clusters from the given descriptors with some seeding algorithm.
  321. * @note In this class, kmeans++ is used, but this function should be
  322. * overriden by inherited classes.
  323. */
  324. virtual void initiateClusters(const vector<pDescriptor> &descriptors,
  325. vector<TDescriptor> &clusters) const;
  326. /**
  327. * Creates k clusters from the given descriptor sets by running the
  328. * initial step of kmeans++
  329. * @param descriptors
  330. * @param clusters resulting clusters
  331. */
  332. void initiateClustersKMpp(const vector<pDescriptor> &descriptors,
  333. vector<TDescriptor> &clusters) const;
  334. /**
  335. * Create the words of the vocabulary once the tree has been built
  336. */
  337. void createWords();
  338. /**
  339. * Sets the weights of the nodes of tree according to the given features.
  340. * Before calling this function, the nodes and the words must be already
  341. * created (by calling HKmeansStep and createWords)
  342. * @param features
  343. */
  344. void setNodeWeights(const vector<vector<TDescriptor> > &features);
  345. protected:
  346. /// Branching factor
  347. int m_k;
  348. /// Depth levels
  349. int m_L;
  350. /// Weighting method
  351. WeightingType m_weighting;
  352. /// Scoring method
  353. ScoringType m_scoring;
  354. /// Object for computing scores
  355. GeneralScoring* m_scoring_object;
  356. /// Tree nodes
  357. std::vector<Node> m_nodes;
  358. /// Words of the vocabulary (tree leaves)
  359. /// this condition holds: m_words[wid]->word_id == wid
  360. std::vector<Node*> m_words;
  361. };
  362. // --------------------------------------------------------------------------
  363. template<class TDescriptor, class F>
  364. TemplatedVocabulary<TDescriptor,F>::TemplatedVocabulary
  365. (int k, int L, WeightingType weighting, ScoringType scoring)
  366. : m_k(k), m_L(L), m_weighting(weighting), m_scoring(scoring),
  367. m_scoring_object(NULL)
  368. {
  369. createScoringObject();
  370. }
  371. // --------------------------------------------------------------------------
  372. template<class TDescriptor, class F>
  373. TemplatedVocabulary<TDescriptor,F>::TemplatedVocabulary
  374. (const std::string &filename): m_scoring_object(NULL)
  375. {
  376. load(filename);
  377. }
  378. // --------------------------------------------------------------------------
  379. template<class TDescriptor, class F>
  380. TemplatedVocabulary<TDescriptor,F>::TemplatedVocabulary
  381. (const char *filename): m_scoring_object(NULL)
  382. {
  383. load(filename);
  384. }
  385. // --------------------------------------------------------------------------
  386. template<class TDescriptor, class F>
  387. void TemplatedVocabulary<TDescriptor,F>::createScoringObject()
  388. {
  389. delete m_scoring_object;
  390. m_scoring_object = NULL;
  391. switch(m_scoring)
  392. {
  393. case L1_NORM:
  394. m_scoring_object = new L1Scoring;
  395. break;
  396. case L2_NORM:
  397. m_scoring_object = new L2Scoring;
  398. break;
  399. case CHI_SQUARE:
  400. m_scoring_object = new ChiSquareScoring;
  401. break;
  402. case KL:
  403. m_scoring_object = new KLScoring;
  404. break;
  405. case BHATTACHARYYA:
  406. m_scoring_object = new BhattacharyyaScoring;
  407. break;
  408. case DOT_PRODUCT:
  409. m_scoring_object = new DotProductScoring;
  410. break;
  411. }
  412. }
  413. // --------------------------------------------------------------------------
  414. template<class TDescriptor, class F>
  415. void TemplatedVocabulary<TDescriptor,F>::setScoringType(ScoringType type)
  416. {
  417. m_scoring = type;
  418. createScoringObject();
  419. }
  420. // --------------------------------------------------------------------------
  421. template<class TDescriptor, class F>
  422. void TemplatedVocabulary<TDescriptor,F>::setWeightingType(WeightingType type)
  423. {
  424. this->m_weighting = type;
  425. }
  426. // --------------------------------------------------------------------------
  427. template<class TDescriptor, class F>
  428. TemplatedVocabulary<TDescriptor,F>::TemplatedVocabulary(
  429. const TemplatedVocabulary<TDescriptor, F> &voc)
  430. : m_scoring_object(NULL)
  431. {
  432. *this = voc;
  433. }
  434. // --------------------------------------------------------------------------
  435. template<class TDescriptor, class F>
  436. TemplatedVocabulary<TDescriptor,F>::~TemplatedVocabulary()
  437. {
  438. delete m_scoring_object;
  439. }
  440. // --------------------------------------------------------------------------
  441. template<class TDescriptor, class F>
  442. TemplatedVocabulary<TDescriptor, F>&
  443. TemplatedVocabulary<TDescriptor,F>::operator=
  444. (const TemplatedVocabulary<TDescriptor, F> &voc)
  445. {
  446. this->m_k = voc.m_k;
  447. this->m_L = voc.m_L;
  448. this->m_scoring = voc.m_scoring;
  449. this->m_weighting = voc.m_weighting;
  450. this->createScoringObject();
  451. this->m_nodes.clear();
  452. this->m_words.clear();
  453. this->m_nodes = voc.m_nodes;
  454. this->createWords();
  455. return *this;
  456. }
  457. // --------------------------------------------------------------------------
  458. template<class TDescriptor, class F>
  459. void TemplatedVocabulary<TDescriptor,F>::create(
  460. const std::vector<std::vector<TDescriptor> > &training_features)
  461. {
  462. m_nodes.clear();
  463. m_words.clear();
  464. // expected_nodes = Sum_{i=0..L} ( k^i )
  465. int expected_nodes =
  466. (int)((pow((double)m_k, (double)m_L + 1) - 1)/(m_k - 1));
  467. m_nodes.reserve(expected_nodes); // avoid allocations when creating the tree
  468. vector<pDescriptor> features;
  469. getFeatures(training_features, features);
  470. // create root
  471. m_nodes.push_back(Node(0)); // root
  472. // create the tree
  473. HKmeansStep(0, features, 1);
  474. // create the words
  475. createWords();
  476. // and set the weight of each node of the tree
  477. setNodeWeights(training_features);
  478. }
  479. // --------------------------------------------------------------------------
  480. template<class TDescriptor, class F>
  481. void TemplatedVocabulary<TDescriptor,F>::create(
  482. const std::vector<std::vector<TDescriptor> > &training_features,
  483. int k, int L)
  484. {
  485. m_k = k;
  486. m_L = L;
  487. create(training_features);
  488. }
  489. // --------------------------------------------------------------------------
  490. template<class TDescriptor, class F>
  491. void TemplatedVocabulary<TDescriptor,F>::create(
  492. const std::vector<std::vector<TDescriptor> > &training_features,
  493. int k, int L, WeightingType weighting, ScoringType scoring)
  494. {
  495. m_k = k;
  496. m_L = L;
  497. m_weighting = weighting;
  498. m_scoring = scoring;
  499. createScoringObject();
  500. create(training_features);
  501. }
  502. // --------------------------------------------------------------------------
  503. template<class TDescriptor, class F>
  504. void TemplatedVocabulary<TDescriptor,F>::getFeatures(
  505. const vector<vector<TDescriptor> > &training_features,
  506. vector<pDescriptor> &features) const
  507. {
  508. features.resize(0);
  509. typename vector<vector<TDescriptor> >::const_iterator vvit;
  510. typename vector<TDescriptor>::const_iterator vit;
  511. for(vvit = training_features.begin(); vvit != training_features.end(); ++vvit)
  512. {
  513. features.reserve(features.size() + vvit->size());
  514. for(vit = vvit->begin(); vit != vvit->end(); ++vit)
  515. {
  516. features.push_back(&(*vit));
  517. }
  518. }
  519. }
  520. // --------------------------------------------------------------------------
  521. template<class TDescriptor, class F>
  522. void TemplatedVocabulary<TDescriptor,F>::HKmeansStep(NodeId parent_id,
  523. const vector<pDescriptor> &descriptors, int current_level)
  524. {
  525. if(descriptors.empty()) return;
  526. // features associated to each cluster
  527. vector<TDescriptor> clusters;
  528. vector<vector<unsigned int> > groups; // groups[i] = [j1, j2, ...]
  529. // j1, j2, ... indices of descriptors associated to cluster i
  530. clusters.reserve(m_k);
  531. groups.reserve(m_k);
  532. //const int msizes[] = { m_k, descriptors.size() };
  533. //cv::SparseMat assoc(2, msizes, CV_8U);
  534. //cv::SparseMat last_assoc(2, msizes, CV_8U);
  535. //// assoc.row(cluster_idx).col(descriptor_idx) = 1 iif associated
  536. if((int)descriptors.size() <= m_k)
  537. {
  538. // trivial case: one cluster per feature
  539. groups.resize(descriptors.size());
  540. for(unsigned int i = 0; i < descriptors.size(); i++)
  541. {
  542. groups[i].push_back(i);
  543. clusters.push_back(*descriptors[i]);
  544. }
  545. }
  546. else
  547. {
  548. // select clusters and groups with kmeans
  549. bool first_time = true;
  550. bool goon = true;
  551. // to check if clusters move after iterations
  552. vector<int> last_association, current_association;
  553. while(goon)
  554. {
  555. // 1. Calculate clusters
  556. if(first_time)
  557. {
  558. // random sample
  559. initiateClusters(descriptors, clusters);
  560. }
  561. else
  562. {
  563. // calculate cluster centres
  564. for(unsigned int c = 0; c < clusters.size(); ++c)
  565. {
  566. vector<pDescriptor> cluster_descriptors;
  567. cluster_descriptors.reserve(groups[c].size());
  568. /*
  569. for(unsigned int d = 0; d < descriptors.size(); ++d)
  570. {
  571. if( assoc.find<unsigned char>(c, d) )
  572. {
  573. cluster_descriptors.push_back(descriptors[d]);
  574. }
  575. }
  576. */
  577. vector<unsigned int>::const_iterator vit;
  578. for(vit = groups[c].begin(); vit != groups[c].end(); ++vit)
  579. {
  580. cluster_descriptors.push_back(descriptors[*vit]);
  581. }
  582. F::meanValue(cluster_descriptors, clusters[c]);
  583. }
  584. } // if(!first_time)
  585. // 2. Associate features with clusters
  586. // calculate distances to cluster centers
  587. groups.clear();
  588. groups.resize(clusters.size(), vector<unsigned int>());
  589. current_association.resize(descriptors.size());
  590. //assoc.clear();
  591. typename vector<pDescriptor>::const_iterator fit;
  592. //unsigned int d = 0;
  593. for(fit = descriptors.begin(); fit != descriptors.end(); ++fit)//, ++d)
  594. {
  595. double best_dist = F::distance(*(*fit), clusters[0]);
  596. unsigned int icluster = 0;
  597. for(unsigned int c = 1; c < clusters.size(); ++c)
  598. {
  599. double dist = F::distance(*(*fit), clusters[c]);
  600. if(dist < best_dist)
  601. {
  602. best_dist = dist;
  603. icluster = c;
  604. }
  605. }
  606. //assoc.ref<unsigned char>(icluster, d) = 1;
  607. groups[icluster].push_back(fit - descriptors.begin());
  608. current_association[ fit - descriptors.begin() ] = icluster;
  609. }
  610. // kmeans++ ensures all the clusters has any feature associated with them
  611. // 3. check convergence
  612. if(first_time)
  613. {
  614. first_time = false;
  615. }
  616. else
  617. {
  618. //goon = !eqUChar(last_assoc, assoc);
  619. goon = false;
  620. for(unsigned int i = 0; i < current_association.size(); i++)
  621. {
  622. if(current_association[i] != last_association[i]){
  623. goon = true;
  624. break;
  625. }
  626. }
  627. }
  628. if(goon)
  629. {
  630. // copy last feature-cluster association
  631. last_association = current_association;
  632. //last_assoc = assoc.clone();
  633. }
  634. } // while(goon)
  635. } // if must run kmeans
  636. // create nodes
  637. for(unsigned int i = 0; i < clusters.size(); ++i)
  638. {
  639. NodeId id = m_nodes.size();
  640. m_nodes.push_back(Node(id));
  641. m_nodes.back().descriptor = clusters[i];
  642. m_nodes.back().parent = parent_id;
  643. m_nodes[parent_id].children.push_back(id);
  644. }
  645. // go on with the next level
  646. if(current_level < m_L)
  647. {
  648. // iterate again with the resulting clusters
  649. const vector<NodeId> &children_ids = m_nodes[parent_id].children;
  650. for(unsigned int i = 0; i < clusters.size(); ++i)
  651. {
  652. NodeId id = children_ids[i];
  653. vector<pDescriptor> child_features;
  654. child_features.reserve(groups[i].size());
  655. vector<unsigned int>::const_iterator vit;
  656. for(vit = groups[i].begin(); vit != groups[i].end(); ++vit)
  657. {
  658. child_features.push_back(descriptors[*vit]);
  659. }
  660. if(child_features.size() > 1)
  661. {
  662. HKmeansStep(id, child_features, current_level + 1);
  663. }
  664. }
  665. }
  666. }
  667. // --------------------------------------------------------------------------
  668. template<class TDescriptor, class F>
  669. void TemplatedVocabulary<TDescriptor, F>::initiateClusters
  670. (const vector<pDescriptor> &descriptors, vector<TDescriptor> &clusters) const
  671. {
  672. initiateClustersKMpp(descriptors, clusters);
  673. }
  674. // --------------------------------------------------------------------------
  675. template<class TDescriptor, class F>
  676. void TemplatedVocabulary<TDescriptor,F>::initiateClustersKMpp(
  677. const vector<pDescriptor> &pfeatures, vector<TDescriptor> &clusters) const
  678. {
  679. // Implements kmeans++ seeding algorithm
  680. // Algorithm:
  681. // 1. Choose one center uniformly at random from among the data points.
  682. // 2. For each data point x, compute D(x), the distance between x and the nearest
  683. // center that has already been chosen.
  684. // 3. Add one new data point as a center. Each point x is chosen with probability
  685. // proportional to D(x)^2.
  686. // 4. Repeat Steps 2 and 3 until k centers have been chosen.
  687. // 5. Now that the initial centers have been chosen, proceed using standard k-means
  688. // clustering.
  689. DUtils::Random::SeedRandOnce();
  690. clusters.resize(0);
  691. clusters.reserve(m_k);
  692. vector<double> min_dists(pfeatures.size(), std::numeric_limits<double>::max());
  693. // 1.
  694. int ifeature = DUtils::Random::RandomInt(0, pfeatures.size()-1);
  695. // create first cluster
  696. clusters.push_back(*pfeatures[ifeature]);
  697. // compute the initial distances
  698. typename vector<pDescriptor>::const_iterator fit;
  699. vector<double>::iterator dit;
  700. dit = min_dists.begin();
  701. for(fit = pfeatures.begin(); fit != pfeatures.end(); ++fit, ++dit)
  702. {
  703. *dit = F::distance(*(*fit), clusters.back());
  704. }
  705. while((int)clusters.size() < m_k)
  706. {
  707. // 2.
  708. dit = min_dists.begin();
  709. for(fit = pfeatures.begin(); fit != pfeatures.end(); ++fit, ++dit)
  710. {
  711. if(*dit > 0)
  712. {
  713. double dist = F::distance(*(*fit), clusters.back());
  714. if(dist < *dit) *dit = dist;
  715. }
  716. }
  717. // 3.
  718. double dist_sum = std::accumulate(min_dists.begin(), min_dists.end(), 0.0);
  719. if(dist_sum > 0)
  720. {
  721. double cut_d;
  722. do
  723. {
  724. cut_d = DUtils::Random::RandomValue<double>(0, dist_sum);
  725. } while(cut_d == 0.0);
  726. double d_up_now = 0;
  727. for(dit = min_dists.begin(); dit != min_dists.end(); ++dit)
  728. {
  729. d_up_now += *dit;
  730. if(d_up_now >= cut_d) break;
  731. }
  732. if(dit == min_dists.end())
  733. ifeature = pfeatures.size()-1;
  734. else
  735. ifeature = dit - min_dists.begin();
  736. clusters.push_back(*pfeatures[ifeature]);
  737. } // if dist_sum > 0
  738. else
  739. break;
  740. } // while(used_clusters < m_k)
  741. }
  742. // --------------------------------------------------------------------------
  743. template<class TDescriptor, class F>
  744. void TemplatedVocabulary<TDescriptor,F>::createWords()
  745. {
  746. m_words.resize(0);
  747. if(!m_nodes.empty())
  748. {
  749. m_words.reserve( (int)pow((double)m_k, (double)m_L) );
  750. typename vector<Node>::iterator nit;
  751. nit = m_nodes.begin(); // ignore root
  752. for(++nit; nit != m_nodes.end(); ++nit)
  753. {
  754. if(nit->isLeaf())
  755. {
  756. nit->word_id = m_words.size();
  757. m_words.push_back( &(*nit) );
  758. }
  759. }
  760. }
  761. }
  762. // --------------------------------------------------------------------------
  763. template<class TDescriptor, class F>
  764. void TemplatedVocabulary<TDescriptor,F>::setNodeWeights
  765. (const vector<vector<TDescriptor> > &training_features)
  766. {
  767. const unsigned int NWords = m_words.size();
  768. const unsigned int NDocs = training_features.size();
  769. if(m_weighting == TF || m_weighting == BINARY)
  770. {
  771. // idf part must be 1 always
  772. for(unsigned int i = 0; i < NWords; i++)
  773. m_words[i]->weight = 1;
  774. }
  775. else if(m_weighting == IDF || m_weighting == TF_IDF)
  776. {
  777. // IDF and TF-IDF: we calculte the idf path now
  778. // Note: this actually calculates the idf part of the tf-idf score.
  779. // The complete tf-idf score is calculated in ::transform
  780. vector<unsigned int> Ni(NWords, 0);
  781. vector<bool> counted(NWords, false);
  782. typename vector<vector<TDescriptor> >::const_iterator mit;
  783. typename vector<TDescriptor>::const_iterator fit;
  784. for(mit = training_features.begin(); mit != training_features.end(); ++mit)
  785. {
  786. fill(counted.begin(), counted.end(), false);
  787. for(fit = mit->begin(); fit < mit->end(); ++fit)
  788. {
  789. WordId word_id;
  790. transform(*fit, word_id);
  791. if(!counted[word_id])
  792. {
  793. Ni[word_id]++;
  794. counted[word_id] = true;
  795. }
  796. }
  797. }
  798. // set ln(N/Ni)
  799. for(unsigned int i = 0; i < NWords; i++)
  800. {
  801. if(Ni[i] > 0)
  802. {
  803. m_words[i]->weight = log((double)NDocs / (double)Ni[i]);
  804. }// else // This cannot occur if using kmeans++
  805. }
  806. }
  807. }
  808. // --------------------------------------------------------------------------
  809. template<class TDescriptor, class F>
  810. inline unsigned int TemplatedVocabulary<TDescriptor,F>::size() const
  811. {
  812. return m_words.size();
  813. }
  814. // --------------------------------------------------------------------------
  815. template<class TDescriptor, class F>
  816. inline bool TemplatedVocabulary<TDescriptor,F>::empty() const
  817. {
  818. return m_words.empty();
  819. }
  820. // --------------------------------------------------------------------------
  821. template<class TDescriptor, class F>
  822. float TemplatedVocabulary<TDescriptor,F>::getEffectiveLevels() const
  823. {
  824. long sum = 0;
  825. typename std::vector<Node*>::const_iterator wit;
  826. for(wit = m_words.begin(); wit != m_words.end(); ++wit)
  827. {
  828. const Node *p = *wit;
  829. for(; p->id != 0; sum++) p = &m_nodes[p->parent];
  830. }
  831. return (float)((double)sum / (double)m_words.size());
  832. }
  833. // --------------------------------------------------------------------------
  834. template<class TDescriptor, class F>
  835. TDescriptor TemplatedVocabulary<TDescriptor,F>::getWord(WordId wid) const
  836. {
  837. return m_words[wid]->descriptor;
  838. }
  839. // --------------------------------------------------------------------------
  840. template<class TDescriptor, class F>
  841. WordValue TemplatedVocabulary<TDescriptor, F>::getWordWeight(WordId wid) const
  842. {
  843. return m_words[wid]->weight;
  844. }
  845. // --------------------------------------------------------------------------
  846. template<class TDescriptor, class F>
  847. WordId TemplatedVocabulary<TDescriptor, F>::transform
  848. (const TDescriptor& feature) const
  849. {
  850. if(empty())
  851. {
  852. return 0;
  853. }
  854. WordId wid;
  855. transform(feature, wid);
  856. return wid;
  857. }
  858. // --------------------------------------------------------------------------
  859. template<class TDescriptor, class F>
  860. void TemplatedVocabulary<TDescriptor,F>::transform(
  861. const std::vector<TDescriptor>& features, BowVector &v) const
  862. {
  863. v.clear();
  864. if(empty())
  865. {
  866. return;
  867. }
  868. // normalize
  869. LNorm norm;
  870. bool must = m_scoring_object->mustNormalize(norm);
  871. typename vector<TDescriptor>::const_iterator fit;
  872. if(m_weighting == TF || m_weighting == TF_IDF)
  873. {
  874. for(fit = features.begin(); fit < features.end(); ++fit)
  875. {
  876. WordId id;
  877. WordValue w;
  878. // w is the idf value if TF_IDF, 1 if TF
  879. transform(*fit, id, w);
  880. // not stopped
  881. if(w > 0) v.addWeight(id, w);
  882. }
  883. if(!v.empty() && !must)
  884. {
  885. // unnecessary when normalizing
  886. const double nd = v.size();
  887. for(BowVector::iterator vit = v.begin(); vit != v.end(); vit++)
  888. vit->second /= nd;
  889. }
  890. }
  891. else // IDF || BINARY
  892. {
  893. for(fit = features.begin(); fit < features.end(); ++fit)
  894. {
  895. WordId id;
  896. WordValue w;
  897. // w is idf if IDF, or 1 if BINARY
  898. transform(*fit, id, w);
  899. // not stopped
  900. if(w > 0) v.addIfNotExist(id, w);
  901. } // if add_features
  902. } // if m_weighting == ...
  903. if(must) v.normalize(norm);
  904. }
  905. // --------------------------------------------------------------------------
  906. template<class TDescriptor, class F>
  907. void TemplatedVocabulary<TDescriptor,F>::transform(
  908. const std::vector<TDescriptor>& features,
  909. BowVector &v, FeatureVector &fv, int levelsup) const
  910. {
  911. v.clear();
  912. fv.clear();
  913. if(empty()) // safe for subclasses
  914. {
  915. return;
  916. }
  917. // normalize
  918. LNorm norm;
  919. bool must = m_scoring_object->mustNormalize(norm);
  920. typename vector<TDescriptor>::const_iterator fit;
  921. if(m_weighting == TF || m_weighting == TF_IDF)
  922. {
  923. unsigned int i_feature = 0;
  924. for(fit = features.begin(); fit < features.end(); ++fit, ++i_feature)
  925. {
  926. WordId id;
  927. NodeId nid;
  928. WordValue w;
  929. // w is the idf value if TF_IDF, 1 if TF
  930. transform(*fit, id, w, &nid, levelsup);
  931. if(w > 0) // not stopped
  932. {
  933. v.addWeight(id, w);
  934. fv.addFeature(nid, i_feature);
  935. }
  936. }
  937. if(!v.empty() && !must)
  938. {
  939. // unnecessary when normalizing
  940. const double nd = v.size();
  941. for(BowVector::iterator vit = v.begin(); vit != v.end(); vit++)
  942. vit->second /= nd;
  943. }
  944. }
  945. else // IDF || BINARY
  946. {
  947. unsigned int i_feature = 0;
  948. for(fit = features.begin(); fit < features.end(); ++fit, ++i_feature)
  949. {
  950. WordId id;
  951. NodeId nid;
  952. WordValue w;
  953. // w is idf if IDF, or 1 if BINARY
  954. transform(*fit, id, w, &nid, levelsup);
  955. if(w > 0) // not stopped
  956. {
  957. v.addIfNotExist(id, w);
  958. fv.addFeature(nid, i_feature);
  959. }
  960. }
  961. } // if m_weighting == ...
  962. if(must) v.normalize(norm);
  963. }
  964. // --------------------------------------------------------------------------
  965. template<class TDescriptor, class F>
  966. inline double TemplatedVocabulary<TDescriptor,F>::score
  967. (const BowVector &v1, const BowVector &v2) const
  968. {
  969. return m_scoring_object->score(v1, v2);
  970. }
  971. // --------------------------------------------------------------------------
  972. template<class TDescriptor, class F>
  973. void TemplatedVocabulary<TDescriptor,F>::transform
  974. (const TDescriptor &feature, WordId &id) const
  975. {
  976. WordValue weight;
  977. transform(feature, id, weight);
  978. }
  979. // --------------------------------------------------------------------------
  980. template<class TDescriptor, class F>
  981. void TemplatedVocabulary<TDescriptor,F>::transform(const TDescriptor &feature,
  982. WordId &word_id, WordValue &weight, NodeId *nid, int levelsup) const
  983. {
  984. // propagate the feature down the tree
  985. vector<NodeId> nodes;
  986. typename vector<NodeId>::const_iterator nit;
  987. // level at which the node must be stored in nid, if given
  988. const int nid_level = m_L - levelsup;
  989. if(nid_level <= 0 && nid != NULL) *nid = 0; // root
  990. NodeId final_id = 0; // root
  991. int current_level = 0;
  992. do
  993. {
  994. ++current_level;
  995. nodes = m_nodes[final_id].children;
  996. final_id = nodes[0];
  997. double best_d = F::distance(feature, m_nodes[final_id].descriptor);
  998. for(nit = nodes.begin() + 1; nit != nodes.end(); ++nit)
  999. {
  1000. NodeId id = *nit;
  1001. double d = F::distance(feature, m_nodes[id].descriptor);
  1002. if(d < best_d)
  1003. {
  1004. best_d = d;
  1005. final_id = id;
  1006. }
  1007. }
  1008. if(nid != NULL && current_level == nid_level)
  1009. *nid = final_id;
  1010. } while( !m_nodes[final_id].isLeaf() );
  1011. // turn node id into word id
  1012. word_id = m_nodes[final_id].word_id;
  1013. weight = m_nodes[final_id].weight;
  1014. }
  1015. // --------------------------------------------------------------------------
  1016. template<class TDescriptor, class F>
  1017. NodeId TemplatedVocabulary<TDescriptor,F>::getParentNode
  1018. (WordId wid, int levelsup) const
  1019. {
  1020. NodeId ret = m_words[wid]->id; // node id
  1021. while(levelsup > 0 && ret != 0) // ret == 0 --> root
  1022. {
  1023. --levelsup;
  1024. ret = m_nodes[ret].parent;
  1025. }
  1026. return ret;
  1027. }
  1028. // --------------------------------------------------------------------------
  1029. template<class TDescriptor, class F>
  1030. void TemplatedVocabulary<TDescriptor,F>::getWordsFromNode
  1031. (NodeId nid, std::vector<WordId> &words) const
  1032. {
  1033. words.clear();
  1034. if(m_nodes[nid].isLeaf())
  1035. {
  1036. words.push_back(m_nodes[nid].word_id);
  1037. }
  1038. else
  1039. {
  1040. words.reserve(m_k); // ^1, ^2, ...
  1041. vector<NodeId> parents;
  1042. parents.push_back(nid);
  1043. while(!parents.empty())
  1044. {
  1045. NodeId parentid = parents.back();
  1046. parents.pop_back();
  1047. const vector<NodeId> &child_ids = m_nodes[parentid].children;
  1048. vector<NodeId>::const_iterator cit;
  1049. for(cit = child_ids.begin(); cit != child_ids.end(); ++cit)
  1050. {
  1051. const Node &child_node = m_nodes[*cit];
  1052. if(child_node.isLeaf())
  1053. words.push_back(child_node.word_id);
  1054. else
  1055. parents.push_back(*cit);
  1056. } // for each child
  1057. } // while !parents.empty
  1058. }
  1059. }
  1060. // --------------------------------------------------------------------------
  1061. template<class TDescriptor, class F>
  1062. int TemplatedVocabulary<TDescriptor,F>::stopWords(double minWeight)
  1063. {
  1064. int c = 0;
  1065. typename vector<Node*>::iterator wit;
  1066. for(wit = m_words.begin(); wit != m_words.end(); ++wit)
  1067. {
  1068. if((*wit)->weight < minWeight)
  1069. {
  1070. ++c;
  1071. (*wit)->weight = 0;
  1072. }
  1073. }
  1074. return c;
  1075. }
  1076. // --------------------------------------------------------------------------
  1077. template<class TDescriptor, class F>
  1078. bool TemplatedVocabulary<TDescriptor,F>::loadFromTextFile(const std::string &filename)
  1079. {
  1080. ifstream f;
  1081. f.open(filename.c_str());
  1082. if(f.eof())
  1083. return false;
  1084. m_words.clear();
  1085. m_nodes.clear();
  1086. string s;
  1087. getline(f,s);
  1088. stringstream ss;
  1089. ss << s;
  1090. ss >> m_k;
  1091. ss >> m_L;
  1092. int n1, n2;
  1093. ss >> n1;
  1094. ss >> n2;
  1095. if(m_k<0 || m_k>20 || m_L<1 || m_L>10 || n1<0 || n1>5 || n2<0 || n2>3)
  1096. {
  1097. std::cerr << "Vocabulary loading failure: This is not a correct text file!" << endl;
  1098. return false;
  1099. }
  1100. m_scoring = (ScoringType)n1;
  1101. m_weighting = (WeightingType)n2;
  1102. createScoringObject();
  1103. // nodes
  1104. int expected_nodes =
  1105. (int)((pow((double)m_k, (double)m_L + 1) - 1)/(m_k - 1));
  1106. m_nodes.reserve(expected_nodes);
  1107. m_words.reserve(pow((double)m_k, (double)m_L + 1));
  1108. m_nodes.resize(1);
  1109. m_nodes[0].id = 0;
  1110. while(!f.eof())
  1111. {
  1112. string snode;
  1113. getline(f,snode);
  1114. stringstream ssnode;
  1115. ssnode << snode;
  1116. int nid = m_nodes.size();
  1117. m_nodes.resize(m_nodes.size()+1);
  1118. m_nodes[nid].id = nid;
  1119. int pid ;
  1120. ssnode >> pid;
  1121. m_nodes[nid].parent = pid;
  1122. m_nodes[pid].children.push_back(nid);
  1123. int nIsLeaf;
  1124. ssnode >> nIsLeaf;
  1125. stringstream ssd;
  1126. for(int iD=0;iD<F::L;iD++)
  1127. {
  1128. string sElement;
  1129. ssnode >> sElement;
  1130. ssd << sElement << " ";
  1131. }
  1132. F::fromString(m_nodes[nid].descriptor, ssd.str());
  1133. ssnode >> m_nodes[nid].weight;
  1134. if(nIsLeaf>0)
  1135. {
  1136. int wid = m_words.size();
  1137. m_words.resize(wid+1);
  1138. m_nodes[nid].word_id = wid;
  1139. m_words[wid] = &m_nodes[nid];
  1140. }
  1141. else
  1142. {
  1143. m_nodes[nid].children.reserve(m_k);
  1144. }
  1145. }
  1146. return true;
  1147. }
  1148. // --------------------------------------------------------------------------
  1149. template<class TDescriptor, class F>
  1150. void TemplatedVocabulary<TDescriptor,F>::saveToTextFile(const std::string &filename) const
  1151. {
  1152. fstream f;
  1153. f.open(filename.c_str(),ios_base::out);
  1154. f << m_k << " " << m_L << " " << " " << m_scoring << " " << m_weighting << endl;
  1155. for(size_t i=1; i<m_nodes.size();i++)
  1156. {
  1157. const Node& node = m_nodes[i];
  1158. f << node.parent << " ";
  1159. if(node.isLeaf())
  1160. f << 1 << " ";
  1161. else
  1162. f << 0 << " ";
  1163. f << F::toString(node.descriptor) << " " << (double)node.weight << endl;
  1164. }
  1165. f.close();
  1166. }
  1167. // --------------------------------------------------------------------------
  1168. template<class TDescriptor, class F>
  1169. void TemplatedVocabulary<TDescriptor,F>::save(const std::string &filename) const
  1170. {
  1171. cv::FileStorage fs(filename.c_str(), cv::FileStorage::WRITE);
  1172. if(!fs.isOpened()) throw string("Could not open file ") + filename;
  1173. save(fs);
  1174. }
  1175. // --------------------------------------------------------------------------
  1176. template<class TDescriptor, class F>
  1177. void TemplatedVocabulary<TDescriptor,F>::load(const std::string &filename)
  1178. {
  1179. cv::FileStorage fs(filename.c_str(), cv::FileStorage::READ);
  1180. if(!fs.isOpened()) throw string("Could not open file ") + filename;
  1181. this->load(fs);
  1182. }
  1183. // --------------------------------------------------------------------------
  1184. template<class TDescriptor, class F>
  1185. void TemplatedVocabulary<TDescriptor,F>::save(cv::FileStorage &f,
  1186. const std::string &name) const
  1187. {
  1188. // Format YAML:
  1189. // vocabulary
  1190. // {
  1191. // k:
  1192. // L:
  1193. // scoringType:
  1194. // weightingType:
  1195. // nodes
  1196. // [
  1197. // {
  1198. // nodeId:
  1199. // parentId:
  1200. // weight:
  1201. // descriptor:
  1202. // }
  1203. // ]
  1204. // words
  1205. // [
  1206. // {
  1207. // wordId:
  1208. // nodeId:
  1209. // }
  1210. // ]
  1211. // }
  1212. //
  1213. // The root node (index 0) is not included in the node vector
  1214. //
  1215. f << name << "{";
  1216. f << "k" << m_k;
  1217. f << "L" << m_L;
  1218. f << "scoringType" << m_scoring;
  1219. f << "weightingType" << m_weighting;
  1220. // tree
  1221. f << "nodes" << "[";
  1222. vector<NodeId> parents, children;
  1223. vector<NodeId>::const_iterator pit;
  1224. parents.push_back(0); // root
  1225. while(!parents.empty())
  1226. {
  1227. NodeId pid = parents.back();
  1228. parents.pop_back();
  1229. const Node& parent = m_nodes[pid];
  1230. children = parent.children;
  1231. for(pit = children.begin(); pit != children.end(); pit++)
  1232. {
  1233. const Node& child = m_nodes[*pit];
  1234. // save node data
  1235. f << "{:";
  1236. f << "nodeId" << (int)child.id;
  1237. f << "parentId" << (int)pid;
  1238. f << "weight" << (double)child.weight;
  1239. f << "descriptor" << F::toString(child.descriptor);
  1240. f << "}";
  1241. // add to parent list
  1242. if(!child.isLeaf())
  1243. {
  1244. parents.push_back(*pit);
  1245. }
  1246. }
  1247. }
  1248. f << "]"; // nodes
  1249. // words
  1250. f << "words" << "[";
  1251. typename vector<Node*>::const_iterator wit;
  1252. for(wit = m_words.begin(); wit != m_words.end(); wit++)
  1253. {
  1254. WordId id = wit - m_words.begin();
  1255. f << "{:";
  1256. f << "wordId" << (int)id;
  1257. f << "nodeId" << (int)(*wit)->id;
  1258. f << "}";
  1259. }
  1260. f << "]"; // words
  1261. f << "}";
  1262. }
  1263. // --------------------------------------------------------------------------
  1264. template<class TDescriptor, class F>
  1265. void TemplatedVocabulary<TDescriptor,F>::load(const cv::FileStorage &fs,
  1266. const std::string &name)
  1267. {
  1268. m_words.clear();
  1269. m_nodes.clear();
  1270. cv::FileNode fvoc = fs[name];
  1271. m_k = (int)fvoc["k"];
  1272. m_L = (int)fvoc["L"];
  1273. m_scoring = (ScoringType)((int)fvoc["scoringType"]);
  1274. m_weighting = (WeightingType)((int)fvoc["weightingType"]);
  1275. createScoringObject();
  1276. // nodes
  1277. cv::FileNode fn = fvoc["nodes"];
  1278. m_nodes.resize(fn.size() + 1); // +1 to include root
  1279. m_nodes[0].id = 0;
  1280. for(unsigned int i = 0; i < fn.size(); ++i)
  1281. {
  1282. NodeId nid = (int)fn[i]["nodeId"];
  1283. NodeId pid = (int)fn[i]["parentId"];
  1284. WordValue weight = (WordValue)fn[i]["weight"];
  1285. string d = (string)fn[i]["descriptor"];
  1286. m_nodes[nid].id = nid;
  1287. m_nodes[nid].parent = pid;
  1288. m_nodes[nid].weight = weight;
  1289. m_nodes[pid].children.push_back(nid);
  1290. F::fromString(m_nodes[nid].descriptor, d);
  1291. }
  1292. // words
  1293. fn = fvoc["words"];
  1294. m_words.resize(fn.size());
  1295. for(unsigned int i = 0; i < fn.size(); ++i)
  1296. {
  1297. NodeId wid = (int)fn[i]["wordId"];
  1298. NodeId nid = (int)fn[i]["nodeId"];
  1299. m_nodes[nid].word_id = wid;
  1300. m_words[wid] = &m_nodes[nid];
  1301. }
  1302. }
  1303. // --------------------------------------------------------------------------
  1304. /**
  1305. * Writes printable information of the vocabulary
  1306. * @param os stream to write to
  1307. * @param voc
  1308. */
  1309. template<class TDescriptor, class F>
  1310. std::ostream& operator<<(std::ostream &os,
  1311. const TemplatedVocabulary<TDescriptor,F> &voc)
  1312. {
  1313. os << "Vocabulary: k = " << voc.getBranchingFactor()
  1314. << ", L = " << voc.getDepthLevels()
  1315. << ", Weighting = ";
  1316. switch(voc.getWeightingType())
  1317. {
  1318. case TF_IDF: os << "tf-idf"; break;
  1319. case TF: os << "tf"; break;
  1320. case IDF: os << "idf"; break;
  1321. case BINARY: os << "binary"; break;
  1322. }
  1323. os << ", Scoring = ";
  1324. switch(voc.getScoringType())
  1325. {
  1326. case L1_NORM: os << "L1-norm"; break;
  1327. case L2_NORM: os << "L2-norm"; break;
  1328. case CHI_SQUARE: os << "Chi square distance"; break;
  1329. case KL: os << "KL-divergence"; break;
  1330. case BHATTACHARYYA: os << "Bhattacharyya coefficient"; break;
  1331. case DOT_PRODUCT: os << "Dot product"; break;
  1332. }
  1333. os << ", Number of words = " << voc.size();
  1334. return os;
  1335. }
  1336. } // namespace DBoW2
  1337. #endif