LAMA
/home/brandes/workspace/LAMA/src/lama/distribution/Redistributor.hpp
Go to the documentation of this file.
00001 
00034 #ifndef LAMA_REDISTRIBUTOR_HPP_
00035 #define LAMA_REDISTRIBUTOR_HPP_
00036 
00037 // for dll_import
00038 #include <lama/config.hpp>
00039 
00040 // base classes
00041 #include <lama/Printable.hpp>
00042 
00043 // others
00044 #include <lama/distribution/Distribution.hpp>
00045 #include <lama/distribution/Halo.hpp>
00046 
00047 #include <lama/LAMAArray.hpp>
00048 #include <lama/HostReadAccess.hpp>
00049 #include <lama/HostWriteAccess.hpp>
00050 
00051 namespace lama
00052 {
00053 
00061 class LAMA_DLL_IMPORTEXPORT Redistributor : public Printable
00062 {
00063 public:
00064 
00073     Redistributor( DistributionPtr targetDistribution, DistributionPtr sourceDistribution );
00074 
00077     DistributionPtr getTargetDistributionPtr() const;
00078 
00079     DistributionPtr getSourceDistributionPtr() const;
00080 
00081     IndexType getSourceLocalSize() const { return mSourceSize; }
00082 
00083     IndexType getTargetLocalSize() const { return mTargetSize; }
00084 
00098     template<typename ValueType>
00099     void redistribute ( LAMAArray<ValueType>& targetArray, const LAMAArray<ValueType>& sourceArray ) const;
00100 
00114     template<typename ValueType>
00115     void redistributeN ( LAMAArray<ValueType>& targetArray, 
00116                          const LAMAArray<ValueType>& sourceArray,
00117                          IndexType n ) const;
00118 
00121     template<typename ValueType>
00122     void redistributeV ( LAMAArray<ValueType>& targetArray,
00123                          const LAMAArray<IndexType>& targetOffsets,
00124                          const LAMAArray<ValueType>& sourceArray,
00125                          const LAMAArray<IndexType>& sourceOffsets ) const;
00126 
00127     template<typename ValueType>
00128     static void gather ( LAMAArray<ValueType>& targetArray,
00129                   const LAMAArray<ValueType>& sourceArray, const LAMAArray<IndexType>& sourceIndexes )
00130     {
00131         HostWriteAccess<ValueType> target( targetArray );
00132         HostReadAccess<ValueType> source( sourceArray );
00133         HostReadAccess<IndexType> indexes( sourceIndexes );
00134  
00135         for ( IndexType i = 0; i < indexes.size(); i++)
00136         {
00137              LAMA_LOG_DEBUG( logger, "target[" << i << "] = source[" << indexes[i] 
00138                         << "] = " << source[indexes[i]] );
00139 
00140              target[i] = source[ indexes[i] ];
00141         }
00142     }
00143 
00144     template<typename ValueType>
00145     static void gatherN ( LAMAArray<ValueType>& targetArray,
00146                           const LAMAArray<ValueType>& sourceArray, 
00147                           const LAMAArray<IndexType>& sourceIndexes,
00148                           const IndexType n )
00149     {
00150         HostWriteAccess<ValueType> target( targetArray );
00151         HostReadAccess<ValueType> source( sourceArray );
00152         HostReadAccess<IndexType> indexes( sourceIndexes );
00153  
00154         #pragma omp parallel for
00155         for ( IndexType i = 0; i < indexes.size(); i++)
00156         {
00157             LAMA_LOG_DEBUG( logger, "targetN[" << i << "] = sourceN[" << indexes[i] 
00158                         << "] = " << source[indexes[i] * n] << " ..." );
00159 
00160             for ( IndexType j = 0; j < n; j++)
00161             {
00162                 target[i * n + j] = source[ indexes[i] * n + j];
00163             }
00164         }
00165     }
00166 
00167     template<typename ValueType>
00168     static void gatherV ( LAMAArray<ValueType>& targetArray,
00169                           const LAMAArray<ValueType>& sourceArray, 
00170                           const LAMAArray<IndexType>& sourceOffsets,
00171                           const LAMAArray<IndexType>& sourceIndexes );
00172  
00173     template<typename ValueType>
00174     static void scatter ( LAMAArray<ValueType>& targetArray,
00175                    const LAMAArray<IndexType>& targetIndexes, const LAMAArray<ValueType>& sourceArray )
00176     {
00177         HostWriteAccess<ValueType> target( targetArray );
00178         HostReadAccess<IndexType> indexes( targetIndexes );
00179         HostReadAccess<ValueType> source( sourceArray );
00180  
00181         for ( IndexType i = 0; i < indexes.size(); i++)
00182         {
00183              LAMA_LOG_DEBUG( logger, "target[" << indexes[i] << "] = source[" << i
00184                         << "] = " << source[i] );
00185 
00186              target[ indexes[i] ] = source[ i ];
00187         }
00188     }
00189 
00190     template<typename ValueType>
00191     static void scatterN ( LAMAArray<ValueType>& targetArray,
00192                            const LAMAArray<IndexType>& targetIndexes, 
00193                            const LAMAArray<ValueType>& sourceArray,
00194                            const IndexType n )
00195     {
00196         HostWriteAccess<ValueType> target( targetArray );
00197         HostReadAccess<IndexType> indexes( targetIndexes );
00198         HostReadAccess<ValueType> source( sourceArray );
00199  
00200         #pragma omp parallel for
00201         for ( IndexType i = 0; i < indexes.size(); i++)
00202         {
00203             LAMA_LOG_DEBUG( logger, "targetN[" << indexes[i] << "] = sourceN[" << i
00204                        << "] = " << source[i * n] << " ..." );
00205 
00206             for ( IndexType j = 0; j < n; j++)
00207             {
00208                 target[ indexes[i] * n + j ] = source[ i * n + j];
00209             }
00210         }
00211     }
00212 
00213     template<typename ValueType>
00214     static void scatterV ( LAMAArray<ValueType>& targetArray,
00215                            const LAMAArray<IndexType>& targetOffsets,
00216                            const LAMAArray<IndexType>& targetIndexes,
00217                            const LAMAArray<ValueType>& sourceArray );
00218  
00219     template<typename ValueType>
00220     static void copy ( LAMAArray<ValueType>& targetArray, const LAMAArray<IndexType>& targetIndexes, 
00221                        const LAMAArray<ValueType>& sourceArray, const LAMAArray<IndexType>& sourceIndexes )
00222     {
00223         HostWriteAccess<ValueType> target( targetArray );
00224         HostReadAccess<ValueType> source( sourceArray );
00225         HostReadAccess<IndexType> tindexes( targetIndexes );
00226         HostReadAccess<IndexType> sindexes( sourceIndexes );
00227 
00228         LAMA_ASSERT_ERROR( tindexes.size() == sindexes.size(), "index size mismatch" );
00229 
00230         for ( IndexType i = 0; i < tindexes.size(); i++)
00231         {
00232              LAMA_LOG_DEBUG( logger, "target[" << tindexes[i] << "] = source[" << sindexes[i] 
00233                         << "] = " << source[ sindexes[i] ] );
00234 
00235              target[ tindexes[i] ] = source[ sindexes[i] ];
00236         }
00237     }
00238 
00239     template<typename ValueType>
00240     static void copyN ( LAMAArray<ValueType>& targetArray, 
00241                         const LAMAArray<IndexType>& targetIndexes, 
00242                         const LAMAArray<ValueType>& sourceArray, 
00243                         const LAMAArray<IndexType>& sourceIndexes,
00244                         IndexType n )
00245     {
00246         HostWriteAccess<ValueType> target( targetArray );
00247         HostReadAccess<ValueType> source( sourceArray );
00248         HostReadAccess<IndexType> tindexes( targetIndexes );
00249         HostReadAccess<IndexType> sindexes( sourceIndexes );
00250 
00251         LAMA_ASSERT_ERROR( tindexes.size() == sindexes.size(), "index size mismatch" );
00252 
00253         #pragma omp parallel for
00254         for ( IndexType i = 0; i < tindexes.size(); i++)
00255         {
00256              LAMA_LOG_DEBUG( logger, "targetN[" << tindexes[i] << "] = sourceN[" << sindexes[i] 
00257                         << "] = " << source[ sindexes[i] * n ] << " ..." );
00258 
00259             for ( IndexType j = 0; j < n; j++ )
00260             {
00261                 target[ tindexes[i] * n + j ] = source[ sindexes[i] * n + j ];
00262             }
00263         }
00264     }
00265 
00266     template<typename ValueType>
00267     static void copyV ( LAMAArray<ValueType>& targetArray,
00268                         const LAMAArray<IndexType>& targetOffsets,
00269                         const LAMAArray<IndexType>& targetIndexes,
00270                         const LAMAArray<ValueType>& sourceArray, 
00271                         const LAMAArray<IndexType>& sourceOffsets,
00272                         const LAMAArray<IndexType>& sourceIndexes );
00273 
00274     IndexType getHaloSourceSize() const { return mHaloSourceIndexes.size(); }
00275     IndexType getHaloTargetSize() const { return mHaloTargetIndexes.size(); }
00276 
00277     template<typename ValueType>
00278     void exchangeHalo ( LAMAArray<ValueType>& targetHalo, const LAMAArray<ValueType>& sourceHalo ) const;
00279 
00280     template<typename ValueType>
00281     void exchangeHaloN ( LAMAArray<ValueType>& targetHalo, 
00282                          const LAMAArray<ValueType>& sourceHalo,
00283                          const IndexType n) const;
00284 
00285     void buildVPlans( const IndexType haloSourceSizes[], const IndexType haloTargetSizes[] ) const;
00286 
00291     void buildRowPlans( const LAMAArray<IndexType>& targetSizes, const LAMAArray<IndexType>& sourceSizes ) const;
00292 
00293     IndexType getVHaloSourceSize() const { return mProvidesPlan->totalQuantity(); }
00294     IndexType getVHaloTargetSize() const { return mRequiredPlan->totalQuantity(); }
00295 
00296     template<typename ValueType>
00297     void exchangeVHalo ( LAMAArray<ValueType>& targetHalo, const LAMAArray<ValueType>& sourceHalo ) const;
00298 
00299     const LAMAArray<IndexType>& getLocalSourceIndexes()const  { return mLocalSourceIndexes; };
00300     const LAMAArray<IndexType>& getLocalTargetIndexes()const  { return mLocalTargetIndexes; };
00301     const LAMAArray<IndexType>& getHaloSourceIndexes() const { return mHaloSourceIndexes; };
00302     const LAMAArray<IndexType>& getHaloTargetIndexes() const { return mHaloTargetIndexes; };
00303 
00304 private:
00305 
00308     virtual void writeAt( std::ostream& stream ) const;
00309 
00310     DistributionPtr mSourceDistribution;
00311     DistributionPtr mTargetDistribution;
00312 
00313     IndexType mSourceSize; // = mSourceDistribution->getLocalSize() 
00314     IndexType mTargetSize; // = mTargetDistribution->getLocalSize() 
00315 
00316     LAMAArray<IndexType> mLocalSourceIndexes;
00317     LAMAArray<IndexType> mLocalTargetIndexes;
00318 
00319     LAMAArray<IndexType> mHaloSourceIndexes;
00320     LAMAArray<IndexType> mHaloTargetIndexes;
00321 
00322     IndexType mNumLocalValues; // common number of local values
00323 
00324     Halo mHalo;  // Halo structure for exchanging non-local values
00325 
00326     mutable std::auto_ptr<CommunicationPlan> mProvidesPlan;
00327     mutable std::auto_ptr<CommunicationPlan> mRequiredPlan;
00328 
00329     LAMA_LOG_DECL_STATIC_LOGGER(logger);
00330 };
00331 
00332 /* ------------------------------------------------------------------------------- */
00333 
00334 template<typename ValueType>
00335 void Redistributor::redistribute ( LAMAArray<ValueType>& targetArray, 
00336                                    const LAMAArray<ValueType>& sourceArray ) const
00337 {
00338     {
00339         // make sure that target array has sufficient memory
00340 
00341         HostWriteOnlyAccess<ValueType> target( targetArray, mTargetSize );
00342     }
00343 
00344     // allocate memory for source (provides) and target (required) halo
00345 
00346     LAMAArray<ValueType> sourceHalo( getHaloSourceSize() );
00347     LAMAArray<ValueType> targetHalo( getHaloTargetSize() );
00348 
00349     LAMA_LOG_DEBUG( logger, "gather: sourceHalo " << mHaloSourceIndexes.size() << " values" );
00350 
00351     gather( sourceHalo, sourceArray, mHaloSourceIndexes );
00352 
00353     LAMA_LOG_DEBUG( logger, "copy: source -> target " << mLocalTargetIndexes.size() << " values" );
00354 
00355     copy( targetArray, mLocalTargetIndexes, sourceArray, mLocalSourceIndexes );
00356 
00357     exchangeHalo( targetHalo, sourceHalo );
00358 
00359     LAMA_LOG_DEBUG( logger, "scatter: targetHalo " << mHaloTargetIndexes.size() << " values" );
00360 
00361     scatter( targetArray, mHaloTargetIndexes, targetHalo );
00362 }
00363 
00364 /* ------------------------------------------------------------------------------- */
00365 
00366 template<typename ValueType>
00367 void Redistributor::redistributeN ( LAMAArray<ValueType>& targetArray,
00368                      const LAMAArray<ValueType>& sourceArray,
00369                      IndexType n ) const
00370 {
00371     {
00372         // make sure that target array has sufficient memory
00373 
00374         HostWriteOnlyAccess<ValueType> target( targetArray, mTargetSize * n );
00375     }
00376 
00377     // allocate memory for source (provides) and target (required) halo
00378 
00379     LAMAArray<ValueType> sourceHalo( n * getHaloSourceSize() );
00380     LAMAArray<ValueType> targetHalo( n * getHaloTargetSize() );
00381 
00382     LAMA_LOG_DEBUG( logger, "gather: sourceHalo " << mHaloSourceIndexes.size()
00383                        << " * " << n << " values" );
00384 
00385     gatherN( sourceHalo, sourceArray, mHaloSourceIndexes, n );
00386 
00387     LAMA_LOG_DEBUG( logger, "copy: source -> target " << mLocalTargetIndexes.size()
00388                        << " * " << n << " values" );
00389 
00390     copyN( targetArray, mLocalTargetIndexes, sourceArray, mLocalSourceIndexes, n );
00391 
00392     exchangeHaloN( targetHalo, sourceHalo, n );
00393 
00394     LAMA_LOG_DEBUG( logger, "scatter: targetHalo " << mHaloTargetIndexes.size()
00395                        << " * " << n << " values" );
00396 
00397     scatterN( targetArray, mHaloTargetIndexes, targetHalo, n );
00398 
00399 }
00400 
00401 /* ------------------------------------------------------------------------------- */
00402 
00403 template<typename ValueType>
00404 void Redistributor::redistributeV ( LAMAArray<ValueType>& targetArray,
00405                                     const LAMAArray<IndexType>& targetOffsets,
00406                                     const LAMAArray<ValueType>& sourceArray,
00407                                     const LAMAArray<IndexType>& sourceOffsets ) const
00408 {
00409     // allocate memory for source (provides) and target (required) halo
00410 
00411     LAMAArray<ValueType> sourceHalo( getVHaloSourceSize() );
00412     LAMAArray<ValueType> targetHalo( getVHaloTargetSize() );
00413 
00414     gatherV( sourceHalo, sourceArray, sourceOffsets, getHaloSourceIndexes() );
00415 
00416     copyV( targetArray, targetOffsets, mLocalTargetIndexes,
00417            sourceArray, sourceOffsets, mLocalSourceIndexes );
00418 
00419     exchangeVHalo( targetHalo, sourceHalo );
00420 
00421     scatterV( targetArray, targetOffsets, mHaloTargetIndexes, targetHalo );
00422 }
00423 
00424 /* ------------------------------------------------------------------------------- */
00425 
00426 template<typename ValueType>
00427 void Redistributor::gatherV ( LAMAArray<ValueType>& targetArray,
00428                               const LAMAArray<ValueType>& sourceArray, 
00429                               const LAMAArray<IndexType>& sourceOffsets,
00430                               const LAMAArray<IndexType>& sourceIndexes )
00431 {
00432     const IndexType n = sourceIndexes.size();
00433 
00434     HostWriteAccess<ValueType> wTargetArray( targetArray );
00435     HostReadAccess<ValueType> rSourceArray( sourceArray );
00436     HostReadAccess<IndexType> rSourceOffsets( sourceOffsets );
00437     HostReadAccess<IndexType> rSourceIndexes( sourceIndexes );
00438  
00439     // Note: we have no target offsets array
00440 
00441     IndexType targetOffset = 0;
00442 
00443     for ( IndexType ii = 0; ii < n; ii++)
00444     {
00445         IndexType i = rSourceIndexes[ii];
00446 
00447         for ( IndexType j = rSourceOffsets[i]; j < rSourceOffsets[i + 1]; ++j )
00448         {
00449             wTargetArray[targetOffset++] = rSourceArray[j];
00450         }
00451     }
00452 }
00453 
00454 /* ------------------------------------------------------------------------------- */
00455 
00456 template<typename ValueType>
00457 void Redistributor::scatterV ( LAMAArray<ValueType>& targetArray,
00458                               const LAMAArray<IndexType>& targetOffsets,
00459                               const LAMAArray<IndexType>& targetIndexes,
00460                               const LAMAArray<ValueType>& sourceArray )
00461 {
00462     const IndexType n = targetIndexes.size();
00463 
00464     HostWriteAccess<ValueType> wTargetArray( targetArray );
00465     HostReadAccess<IndexType> rTargetOffsets( targetOffsets );
00466     HostReadAccess<IndexType> rTargetIndexes( targetIndexes );
00467     HostReadAccess<ValueType> rSourceArray( sourceArray );
00468 
00469     // Note: we have no source offsets array, no parallelization possible
00470 
00471     IndexType sourceOffset = 0;
00472 
00473     for ( IndexType ii = 0; ii < n; ii++)
00474     {
00475         IndexType i = rTargetIndexes[ii];
00476 
00477         for ( IndexType j = rTargetOffsets[i]; j < rTargetOffsets[i + 1]; ++j )
00478         {
00479             wTargetArray[j] = rSourceArray[sourceOffset++];
00480         }
00481     }
00482 }
00483 
00484 /* ------------------------------------------------------------------------------- */
00485 
00486 template<typename ValueType>
00487 void Redistributor::copyV ( LAMAArray<ValueType>& targetArray,
00488                             const LAMAArray<IndexType>& targetOffsets,
00489                             const LAMAArray<IndexType>& targetIndexes,
00490                             const LAMAArray<ValueType>& sourceArray, 
00491                             const LAMAArray<IndexType>& sourceOffsets,
00492                             const LAMAArray<IndexType>& sourceIndexes )
00493 {
00494     LAMA_ASSERT_EQUAL_ERROR( targetIndexes.size(), sourceIndexes.size() );
00495 
00496     const IndexType n = targetIndexes.size();
00497 
00498     HostWriteAccess<ValueType> wTargetArray( targetArray );
00499     HostReadAccess<IndexType> rTargetOffsets( targetOffsets );
00500     HostReadAccess<IndexType> rTargetIndexes( targetIndexes );
00501     HostReadAccess<ValueType> rSourceArray( sourceArray );
00502     HostReadAccess<IndexType> rSourceOffsets( sourceOffsets );
00503     HostReadAccess<IndexType> rSourceIndexes( sourceIndexes );
00504 
00505     for ( IndexType ii = 0; ii < n; ii++)
00506     {   
00507         IndexType sourceI = rSourceIndexes[ii];
00508         IndexType targetI = rTargetIndexes[ii];
00509 
00510         IndexType k = rTargetOffsets[ targetI ];
00511 
00512         for ( IndexType j = rSourceOffsets[sourceI]; j < rSourceOffsets[ sourceI + 1 ]; ++j )
00513         {
00514             wTargetArray[k] = rSourceArray[j];
00515             ++k;
00516         }
00517 
00518         LAMA_ASSERT_EQUAL_DEBUG( k, rTargetOffsets[ targetI + 1 ] );
00519     }   
00520 }
00521 
00522 /* ------------------------------------------------------------------------------- */
00523 
00524 template<typename ValueType>
00525 void Redistributor::exchangeHalo ( LAMAArray<ValueType>& targetHalo, 
00526                                    const LAMAArray<ValueType>& sourceHalo ) const
00527 {
00528     const Communicator& comm = mSourceDistribution->getCommunicator();
00529 
00530     // use asynchronous communication to avoid deadlocks
00531 
00532     comm.exchangeByPlanAsync( targetHalo, mHalo.getRequiredPlan(),
00533                               sourceHalo, mHalo.getProvidesPlan() );
00534  
00535     // synchronization is done implicitly
00536 }
00537 
00538 /* ------------------------------------------------------------------------------- */
00539 
00540 template<typename ValueType>
00541 void Redistributor::exchangeHaloN ( LAMAArray<ValueType>& targetHalo, 
00542                                     const LAMAArray<ValueType>& sourceHalo,
00543                                     const IndexType n ) const
00544 {
00545     const Communicator& comm = mSourceDistribution->getCommunicator();
00546 
00547     // Communication plans are built by multiplication with n
00548 
00549     CommunicationPlan requiredN ( mHalo.getRequiredPlan(), n );
00550     CommunicationPlan providesN ( mHalo.getProvidesPlan(), n );
00551 
00552     LAMA_LOG_DEBUG( logger, "requiredN ( n = " << n << "): " << requiredN );
00553     LAMA_LOG_DEBUG( logger, "providesN ( n = " << n << "): " << providesN );
00554 
00555     // use asynchronous communication to avoid deadlocks
00556 
00557     comm.exchangeByPlan( targetHalo, requiredN,
00558                          sourceHalo, providesN );
00559  
00560     // synchronization is done implicitly at the end of this scope
00561 }
00562 
00563 /* ------------------------------------------------------------------------------- */
00564 
00565 template<typename ValueType>
00566 void Redistributor::exchangeVHalo ( LAMAArray<ValueType>& targetHalo, 
00567                                     const LAMAArray<ValueType>& sourceHalo ) const
00568 {
00569     const Communicator& comm = mSourceDistribution->getCommunicator();
00570 
00571     LAMA_ASSERT_ERROR( mRequiredPlan.get(), "There was no previous call of buildVPlan" );
00572 
00573     comm.exchangeByPlanAsync( targetHalo, *mRequiredPlan,
00574                               sourceHalo, *mProvidesPlan );
00575 }
00576 
00577 } // namespace
00578 
00579 #endif // LAMA_REDISTRIBUTOR_HPP_