New subject: compilation time [was: Re: multi_index : performance issue]

20 Dec 2004

      Hi,

I've got a performance problem during a test I've done with multi_index 
on MSVC 6.0 SP 6.
I'm using BOOST 1.32.0 .
The search performance is comparable with several maps, but
insertion is dramatically slow : 5mn 30s for 10 000 employees.

Here is my test code :

===
/* Boost.MultiIndex basic example.
  *
  * Copyright 2003-2004 Joaquín M López Muñoz.
  * Distributed under the Boost Software License, Version 1.0.
  * (See accompanying file LICENSE_1_0.txt or copy at
  * http://www.boost.org/LICENSE_1_0.txt)
  *
  * See http://www.boost.org/libs/multi_index for library home page.
  */
#include "stdafx.h"

#if !defined(NDEBUG)
#define BOOST_MULTI_INDEX_ENABLE_INVARIANT_CHECKING
#define BOOST_MULTI_INDEX_ENABLE_SAFE_MODE
#endif

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <algorithm>
#include <iostream>
#include <iterator>
#include <string>

#include <cstdlib>
#include <map>
#include <vector>
#include <ctime>
#include <sstream>
#include <iomanip>

using boost::multi_index_container;
using namespace boost::multi_index;

/* an employee record holds its ID, name and age */

struct employee
{
   int         m_id;
   std::string m_name;
   int         m_age;

   employee(int id_,const std::string& name_,int 
age_):m_id(id_),m_name(name_),m_age(age_){}

   void set(int id_,const std::string& name_,int age_) { m_id=id_; 
m_name=name_; m_age=age_; }
   const employee& operator =(const employee& e) { set(e.m_id, e.m_name, 
e.m_age); return *this; }
   employee(const employee& e) { set(e.m_id, e.m_name, e.m_age); }

   int id() const { return m_id; } // impossible d'appeler la fonction 
et l'index "id"
   const std::string& name() const { return m_name; }
   int age() const { return m_age; }

   friend std::ostream& operator<<(std::ostream& os,const employee& e)
   {
     os<<e.m_id<<" "<<e.m_name<<" "<<e.m_age<<std::endl;
     return os;
   }
};

/* tags for accessing the corresponding indices of employee_set */

struct id{};
struct name{};
struct age{};

/* see Advanced topics: Use of member_offset for info on
  * BOOST_MULTI_INDEX_MEMBER
  */

/* Define a multi_index_container of employees with following indices:
  *   - a unique index sorted by employee::int,
  *   - a non-unique index sorted by employee::name,
  *   - a non-unique index sorted by employee::age.
  */

typedef multi_index_container<
   employee,
   indexed_by<
     ordered_unique<
       tag<id>,  BOOST_MULTI_INDEX_MEMBER(employee,int,m_id)>,
     ordered_non_unique<
       tag<name>,BOOST_MULTI_INDEX_MEMBER(employee,std::string,m_name)>,
     ordered_non_unique<
       tag<age>, BOOST_MULTI_INDEX_MEMBER(employee,int,m_age)> >
...
employee_set;
template<typename Tag,typename MultiIndexContainer>
void print_out_by(
  const MultiIndexContainer& s,
  Tag* =0 /* fixes a MSVC++ 6.0 bug with implicit template function parms */
)
{
   /* obtain a reference to the index tagged by Tag */

   const typename 
boost::multi_index::index<MultiIndexContainer,Tag>::type& i=
     get<Tag>(s);

   typedef typename MultiIndexContainer::value_type value_type;

   /* dump the elements of the index to cout */

std::copy(i.begin(),i.end(),std::ostream_iterator<value_type>(std::cout));
}

// Un conteneur qui permet de contenir des employee
// triés sur 3 index.
class EmployeeMapper
{
   std::map<int, employee *> m_byId;
   std::map<std::string, employee *> m_byName;
   std::map<int, employee *> m_byAge;
   typedef std::map<int, employee *>::iterator EMIdIt;
   typedef std::map<std::string, employee *>::iterator EMNameIt;
   typedef std::map<int, employee *>::iterator EMAgeIt;

public:
   EmployeeMapper() {}
   ~EmployeeMapper()
   {
     // Deux lignes pour éviter que ces objets ne contiennent
     // des pointeurs invalides pendant la destruction va un
     // autre conteneur.
     m_byName.clear();
     m_byAge.clear();

     std::map<int, employee *>::iterator it;
     while ((it=m_byId.begin()) != m_byId.end())
     {
       delete it->second;
       m_byId.erase(it);
     }
   }

   void add(const employee& e)
   {
     employee * e2 = NULL;
     try
     {
       e2 = new employee(e);
     }
     catch(...)
     {
       delete e2;
       throw;
     }
     m_byId[e2->id()] = e2;
     m_byName[e2->name()] = e2;
     m_byAge[e2->age()] = e2;
   }

   employee* byId(int id)
   {
     EMIdIt it = m_byId.find(id);
     if (it == m_byId.end())
       return NULL;
     else
       return it->second;
   }

   employee* byName(const std::string& name)
   {
     EMNameIt it = m_byName.find(name);
     if (it == m_byName.end())
       return NULL;
     else
       return it->second;
   }

   employee* byAge(int age)
   {
     EMAgeIt it = m_byAge.find(age);
     if (it == m_byAge.end())
       return NULL;
     else
       return it->second;
   }

private:
   // fonctions inhibées
   EmployeeMapper(const EmployeeMapper&);
   const EmployeeMapper& operator=(const EmployeeMapper&);
};

// Retourne un entier tq : 0 <= entier < maximum
int alea(int maximum)
{
   return int(std::min(maximum, RAND_MAX) * rand() / (RAND_MAX + 1.0));
}

int main()
{
   // Init aléatoire
   srand( (unsigned)time( NULL ) );

   employee_set es;
   EmployeeMapper em;
   const int tailleEnsemble = 10000;
   const int tailleEchantillon = 10;
   const int moduloEchantillon = tailleEnsemble / tailleEchantillon;
   const int nbRecherches = 10000;

   std::vector<std::string> noms;
   noms.push_back("Alphonse");
   noms.push_back("Éléonore");
   noms.push_back("Gaston");
   noms.push_back("Séverine");
   noms.push_back("Zorglub");

   std::vector<employee> jeuDeDepart;
   std::vector<employee> jeuARechercher;

   int i=0;
   for (i=0; i < tailleEnsemble; ++i)
   {
     std::ostringstream os;
     // Construction d'un nom unique avec un caractère aléatoire
     // Cela reflète le type de données gérées en réalité.
     // L'ajout de i implique l'unicité.
     os << noms[alea(5)] << std::setw(5) << std::setfill('0') << alea(30000)
                         << std::setw(5) << std::setfill('0') << i;
     employee e(i,           // index numérique unique
                os.str(),    // nom, chaîne unique
                alea(100)); // valeur potentiellement dupliquée

     jeuDeDepart.push_back(e);

     // On conserve un échantillon à rechercher plus tard
     if (0 == i % moduloEchantillon)
     {
       //employee_set::iterator it=get<id>(es).find(i);
       //if (it != es.end())
         jeuARechercher.push_back(jeuDeDepart[i]);

     }
   }
   clock_t tes1, tes2;
   tes1 = clock();

   for (i=0; i < tailleEnsemble; ++i)
   {
     es.insert(jeuDeDepart[i]);
   }
   tes2 = clock();

   clock_t tem1, tem2;
   tem1 = clock();
   for (i=0; i < tailleEnsemble; ++i)
   {
     em.add(jeuDeDepart[i]);
   }
   tem2 = clock();

   clock_t t1_1, t2_1;
   t1_1 = clock();
   // Accède à un certain nombre d'éléments précis selon
   // plusieurs index
   for (i=0; i < nbRecherches; ++i)
   {
     for (std::vector<employee>::iterator it = jeuARechercher.begin();
          it != jeuARechercher.end();
          ++it)
     {
       // Les fonctions membres template qui permette de trouver les
       // index par tag ne fonctionnent pas avec MSVC 6
       employee_set::iterator eit = get<id>(es).find(it->id());
       nth_index_iterator<employee_set,1>::type eit2 = 
get<1>(es).find(it->name());
       nth_index_iterator<employee_set,2>::type eit3 = 
get<2>(es).find(it->age());
     }
   }
   t2_1 = clock();

   clock_t t1_2, t2_2;
   t1_2 = clock();
   // Deuxième méthode : avec des maps et des news.
   for (i=0; i < nbRecherches; ++i)
   {
     for (std::vector<employee>::iterator it = jeuARechercher.begin();
          it != jeuARechercher.end();
          ++it)
     {
       em.byId(it->id());
       em.byName(it->name());
       em.byAge(it->age());
     }
   }
   t2_2 = clock();

   double d1, d2;
   d1 = t1_2 - t1_1;
   d2 = t2_2 - t2_1;

   std::cout << "Insertion" << std::endl;
   std::cout << "multi = " << (tes2-tes1) << std::endl
             << "STL   = " << (tem2-tem1) << std::endl;
   std::cout << "Recherche" << std::endl;
   std::cout << "multi = " << d1 << std::endl
             << "STL   = " << d2 << std::endl;
   return 0;
}
===

Here are the results displayed :

===
Insertion
multi = 329125
STL   = 156
Recherche
multi = 735
STL   = 625
===

Am I doing something wrong ?
Note that I do not instanciate anything explicitely during insertion
loop.

Regards
-- 
Stanislas RENAN

multi_index : performance issue

Stanislas RENAN

Joaquin M Lopez Munoz

Joaquin M Lopez Munoz

Stanislas RENAN

Joaquin M Lopez Munoz

Stanislas RENAN

Joaquin M Lopez Munoz

tags

participants (2)