LAMA
|
00001 00034 #ifndef LAMA_REDISTRIBUTOR_HPP_ 00035 #define LAMA_REDISTRIBUTOR_HPP_ 00036 00037 // for dll_import 00038 #include <lama/config.hpp> 00039 00040 // base classes 00041 #include <lama/Printable.hpp> 00042 00043 // others 00044 #include <lama/distribution/Distribution.hpp> 00045 #include <lama/distribution/Halo.hpp> 00046 00047 #include <lama/LAMAArray.hpp> 00048 #include <lama/HostReadAccess.hpp> 00049 #include <lama/HostWriteAccess.hpp> 00050 00051 namespace lama 00052 { 00053 00061 class LAMA_DLL_IMPORTEXPORT Redistributor : public Printable 00062 { 00063 public: 00064 00073 Redistributor( DistributionPtr targetDistribution, DistributionPtr sourceDistribution ); 00074 00077 DistributionPtr getTargetDistributionPtr() const; 00078 00079 DistributionPtr getSourceDistributionPtr() const; 00080 00081 IndexType getSourceLocalSize() const { return mSourceSize; } 00082 00083 IndexType getTargetLocalSize() const { return mTargetSize; } 00084 00098 template<typename ValueType> 00099 void redistribute ( LAMAArray<ValueType>& targetArray, const LAMAArray<ValueType>& sourceArray ) const; 00100 00114 template<typename ValueType> 00115 void redistributeN ( LAMAArray<ValueType>& targetArray, 00116 const LAMAArray<ValueType>& sourceArray, 00117 IndexType n ) const; 00118 00121 template<typename ValueType> 00122 void redistributeV ( LAMAArray<ValueType>& targetArray, 00123 const LAMAArray<IndexType>& targetOffsets, 00124 const LAMAArray<ValueType>& sourceArray, 00125 const LAMAArray<IndexType>& sourceOffsets ) const; 00126 00127 template<typename ValueType> 00128 static void gather ( LAMAArray<ValueType>& targetArray, 00129 const LAMAArray<ValueType>& sourceArray, const LAMAArray<IndexType>& sourceIndexes ) 00130 { 00131 HostWriteAccess<ValueType> target( targetArray ); 00132 HostReadAccess<ValueType> source( sourceArray ); 00133 HostReadAccess<IndexType> indexes( sourceIndexes ); 00134 00135 for ( IndexType i = 0; i < indexes.size(); i++) 00136 { 00137 LAMA_LOG_DEBUG( logger, "target[" << i << "] = source[" << indexes[i] 00138 << "] = " << source[indexes[i]] ); 00139 00140 target[i] = source[ indexes[i] ]; 00141 } 00142 } 00143 00144 template<typename ValueType> 00145 static void gatherN ( LAMAArray<ValueType>& targetArray, 00146 const LAMAArray<ValueType>& sourceArray, 00147 const LAMAArray<IndexType>& sourceIndexes, 00148 const IndexType n ) 00149 { 00150 HostWriteAccess<ValueType> target( targetArray ); 00151 HostReadAccess<ValueType> source( sourceArray ); 00152 HostReadAccess<IndexType> indexes( sourceIndexes ); 00153 00154 #pragma omp parallel for 00155 for ( IndexType i = 0; i < indexes.size(); i++) 00156 { 00157 LAMA_LOG_DEBUG( logger, "targetN[" << i << "] = sourceN[" << indexes[i] 00158 << "] = " << source[indexes[i] * n] << " ..." ); 00159 00160 for ( IndexType j = 0; j < n; j++) 00161 { 00162 target[i * n + j] = source[ indexes[i] * n + j]; 00163 } 00164 } 00165 } 00166 00167 template<typename ValueType> 00168 static void gatherV ( LAMAArray<ValueType>& targetArray, 00169 const LAMAArray<ValueType>& sourceArray, 00170 const LAMAArray<IndexType>& sourceOffsets, 00171 const LAMAArray<IndexType>& sourceIndexes ); 00172 00173 template<typename ValueType> 00174 static void scatter ( LAMAArray<ValueType>& targetArray, 00175 const LAMAArray<IndexType>& targetIndexes, const LAMAArray<ValueType>& sourceArray ) 00176 { 00177 HostWriteAccess<ValueType> target( targetArray ); 00178 HostReadAccess<IndexType> indexes( targetIndexes ); 00179 HostReadAccess<ValueType> source( sourceArray ); 00180 00181 for ( IndexType i = 0; i < indexes.size(); i++) 00182 { 00183 LAMA_LOG_DEBUG( logger, "target[" << indexes[i] << "] = source[" << i 00184 << "] = " << source[i] ); 00185 00186 target[ indexes[i] ] = source[ i ]; 00187 } 00188 } 00189 00190 template<typename ValueType> 00191 static void scatterN ( LAMAArray<ValueType>& targetArray, 00192 const LAMAArray<IndexType>& targetIndexes, 00193 const LAMAArray<ValueType>& sourceArray, 00194 const IndexType n ) 00195 { 00196 HostWriteAccess<ValueType> target( targetArray ); 00197 HostReadAccess<IndexType> indexes( targetIndexes ); 00198 HostReadAccess<ValueType> source( sourceArray ); 00199 00200 #pragma omp parallel for 00201 for ( IndexType i = 0; i < indexes.size(); i++) 00202 { 00203 LAMA_LOG_DEBUG( logger, "targetN[" << indexes[i] << "] = sourceN[" << i 00204 << "] = " << source[i * n] << " ..." ); 00205 00206 for ( IndexType j = 0; j < n; j++) 00207 { 00208 target[ indexes[i] * n + j ] = source[ i * n + j]; 00209 } 00210 } 00211 } 00212 00213 template<typename ValueType> 00214 static void scatterV ( LAMAArray<ValueType>& targetArray, 00215 const LAMAArray<IndexType>& targetOffsets, 00216 const LAMAArray<IndexType>& targetIndexes, 00217 const LAMAArray<ValueType>& sourceArray ); 00218 00219 template<typename ValueType> 00220 static void copy ( LAMAArray<ValueType>& targetArray, const LAMAArray<IndexType>& targetIndexes, 00221 const LAMAArray<ValueType>& sourceArray, const LAMAArray<IndexType>& sourceIndexes ) 00222 { 00223 HostWriteAccess<ValueType> target( targetArray ); 00224 HostReadAccess<ValueType> source( sourceArray ); 00225 HostReadAccess<IndexType> tindexes( targetIndexes ); 00226 HostReadAccess<IndexType> sindexes( sourceIndexes ); 00227 00228 LAMA_ASSERT_ERROR( tindexes.size() == sindexes.size(), "index size mismatch" ); 00229 00230 for ( IndexType i = 0; i < tindexes.size(); i++) 00231 { 00232 LAMA_LOG_DEBUG( logger, "target[" << tindexes[i] << "] = source[" << sindexes[i] 00233 << "] = " << source[ sindexes[i] ] ); 00234 00235 target[ tindexes[i] ] = source[ sindexes[i] ]; 00236 } 00237 } 00238 00239 template<typename ValueType> 00240 static void copyN ( LAMAArray<ValueType>& targetArray, 00241 const LAMAArray<IndexType>& targetIndexes, 00242 const LAMAArray<ValueType>& sourceArray, 00243 const LAMAArray<IndexType>& sourceIndexes, 00244 IndexType n ) 00245 { 00246 HostWriteAccess<ValueType> target( targetArray ); 00247 HostReadAccess<ValueType> source( sourceArray ); 00248 HostReadAccess<IndexType> tindexes( targetIndexes ); 00249 HostReadAccess<IndexType> sindexes( sourceIndexes ); 00250 00251 LAMA_ASSERT_ERROR( tindexes.size() == sindexes.size(), "index size mismatch" ); 00252 00253 #pragma omp parallel for 00254 for ( IndexType i = 0; i < tindexes.size(); i++) 00255 { 00256 LAMA_LOG_DEBUG( logger, "targetN[" << tindexes[i] << "] = sourceN[" << sindexes[i] 00257 << "] = " << source[ sindexes[i] * n ] << " ..." ); 00258 00259 for ( IndexType j = 0; j < n; j++ ) 00260 { 00261 target[ tindexes[i] * n + j ] = source[ sindexes[i] * n + j ]; 00262 } 00263 } 00264 } 00265 00266 template<typename ValueType> 00267 static void copyV ( LAMAArray<ValueType>& targetArray, 00268 const LAMAArray<IndexType>& targetOffsets, 00269 const LAMAArray<IndexType>& targetIndexes, 00270 const LAMAArray<ValueType>& sourceArray, 00271 const LAMAArray<IndexType>& sourceOffsets, 00272 const LAMAArray<IndexType>& sourceIndexes ); 00273 00274 IndexType getHaloSourceSize() const { return mHaloSourceIndexes.size(); } 00275 IndexType getHaloTargetSize() const { return mHaloTargetIndexes.size(); } 00276 00277 template<typename ValueType> 00278 void exchangeHalo ( LAMAArray<ValueType>& targetHalo, const LAMAArray<ValueType>& sourceHalo ) const; 00279 00280 template<typename ValueType> 00281 void exchangeHaloN ( LAMAArray<ValueType>& targetHalo, 00282 const LAMAArray<ValueType>& sourceHalo, 00283 const IndexType n) const; 00284 00285 void buildVPlans( const IndexType haloSourceSizes[], const IndexType haloTargetSizes[] ) const; 00286 00291 void buildRowPlans( const LAMAArray<IndexType>& targetSizes, const LAMAArray<IndexType>& sourceSizes ) const; 00292 00293 IndexType getVHaloSourceSize() const { return mProvidesPlan->totalQuantity(); } 00294 IndexType getVHaloTargetSize() const { return mRequiredPlan->totalQuantity(); } 00295 00296 template<typename ValueType> 00297 void exchangeVHalo ( LAMAArray<ValueType>& targetHalo, const LAMAArray<ValueType>& sourceHalo ) const; 00298 00299 const LAMAArray<IndexType>& getLocalSourceIndexes()const { return mLocalSourceIndexes; }; 00300 const LAMAArray<IndexType>& getLocalTargetIndexes()const { return mLocalTargetIndexes; }; 00301 const LAMAArray<IndexType>& getHaloSourceIndexes() const { return mHaloSourceIndexes; }; 00302 const LAMAArray<IndexType>& getHaloTargetIndexes() const { return mHaloTargetIndexes; }; 00303 00304 private: 00305 00308 virtual void writeAt( std::ostream& stream ) const; 00309 00310 DistributionPtr mSourceDistribution; 00311 DistributionPtr mTargetDistribution; 00312 00313 IndexType mSourceSize; // = mSourceDistribution->getLocalSize() 00314 IndexType mTargetSize; // = mTargetDistribution->getLocalSize() 00315 00316 LAMAArray<IndexType> mLocalSourceIndexes; 00317 LAMAArray<IndexType> mLocalTargetIndexes; 00318 00319 LAMAArray<IndexType> mHaloSourceIndexes; 00320 LAMAArray<IndexType> mHaloTargetIndexes; 00321 00322 IndexType mNumLocalValues; // common number of local values 00323 00324 Halo mHalo; // Halo structure for exchanging non-local values 00325 00326 mutable std::auto_ptr<CommunicationPlan> mProvidesPlan; 00327 mutable std::auto_ptr<CommunicationPlan> mRequiredPlan; 00328 00329 LAMA_LOG_DECL_STATIC_LOGGER(logger); 00330 }; 00331 00332 /* ------------------------------------------------------------------------------- */ 00333 00334 template<typename ValueType> 00335 void Redistributor::redistribute ( LAMAArray<ValueType>& targetArray, 00336 const LAMAArray<ValueType>& sourceArray ) const 00337 { 00338 { 00339 // make sure that target array has sufficient memory 00340 00341 HostWriteOnlyAccess<ValueType> target( targetArray, mTargetSize ); 00342 } 00343 00344 // allocate memory for source (provides) and target (required) halo 00345 00346 LAMAArray<ValueType> sourceHalo( getHaloSourceSize() ); 00347 LAMAArray<ValueType> targetHalo( getHaloTargetSize() ); 00348 00349 LAMA_LOG_DEBUG( logger, "gather: sourceHalo " << mHaloSourceIndexes.size() << " values" ); 00350 00351 gather( sourceHalo, sourceArray, mHaloSourceIndexes ); 00352 00353 LAMA_LOG_DEBUG( logger, "copy: source -> target " << mLocalTargetIndexes.size() << " values" ); 00354 00355 copy( targetArray, mLocalTargetIndexes, sourceArray, mLocalSourceIndexes ); 00356 00357 exchangeHalo( targetHalo, sourceHalo ); 00358 00359 LAMA_LOG_DEBUG( logger, "scatter: targetHalo " << mHaloTargetIndexes.size() << " values" ); 00360 00361 scatter( targetArray, mHaloTargetIndexes, targetHalo ); 00362 } 00363 00364 /* ------------------------------------------------------------------------------- */ 00365 00366 template<typename ValueType> 00367 void Redistributor::redistributeN ( LAMAArray<ValueType>& targetArray, 00368 const LAMAArray<ValueType>& sourceArray, 00369 IndexType n ) const 00370 { 00371 { 00372 // make sure that target array has sufficient memory 00373 00374 HostWriteOnlyAccess<ValueType> target( targetArray, mTargetSize * n ); 00375 } 00376 00377 // allocate memory for source (provides) and target (required) halo 00378 00379 LAMAArray<ValueType> sourceHalo( n * getHaloSourceSize() ); 00380 LAMAArray<ValueType> targetHalo( n * getHaloTargetSize() ); 00381 00382 LAMA_LOG_DEBUG( logger, "gather: sourceHalo " << mHaloSourceIndexes.size() 00383 << " * " << n << " values" ); 00384 00385 gatherN( sourceHalo, sourceArray, mHaloSourceIndexes, n ); 00386 00387 LAMA_LOG_DEBUG( logger, "copy: source -> target " << mLocalTargetIndexes.size() 00388 << " * " << n << " values" ); 00389 00390 copyN( targetArray, mLocalTargetIndexes, sourceArray, mLocalSourceIndexes, n ); 00391 00392 exchangeHaloN( targetHalo, sourceHalo, n ); 00393 00394 LAMA_LOG_DEBUG( logger, "scatter: targetHalo " << mHaloTargetIndexes.size() 00395 << " * " << n << " values" ); 00396 00397 scatterN( targetArray, mHaloTargetIndexes, targetHalo, n ); 00398 00399 } 00400 00401 /* ------------------------------------------------------------------------------- */ 00402 00403 template<typename ValueType> 00404 void Redistributor::redistributeV ( LAMAArray<ValueType>& targetArray, 00405 const LAMAArray<IndexType>& targetOffsets, 00406 const LAMAArray<ValueType>& sourceArray, 00407 const LAMAArray<IndexType>& sourceOffsets ) const 00408 { 00409 // allocate memory for source (provides) and target (required) halo 00410 00411 LAMAArray<ValueType> sourceHalo( getVHaloSourceSize() ); 00412 LAMAArray<ValueType> targetHalo( getVHaloTargetSize() ); 00413 00414 gatherV( sourceHalo, sourceArray, sourceOffsets, getHaloSourceIndexes() ); 00415 00416 copyV( targetArray, targetOffsets, mLocalTargetIndexes, 00417 sourceArray, sourceOffsets, mLocalSourceIndexes ); 00418 00419 exchangeVHalo( targetHalo, sourceHalo ); 00420 00421 scatterV( targetArray, targetOffsets, mHaloTargetIndexes, targetHalo ); 00422 } 00423 00424 /* ------------------------------------------------------------------------------- */ 00425 00426 template<typename ValueType> 00427 void Redistributor::gatherV ( LAMAArray<ValueType>& targetArray, 00428 const LAMAArray<ValueType>& sourceArray, 00429 const LAMAArray<IndexType>& sourceOffsets, 00430 const LAMAArray<IndexType>& sourceIndexes ) 00431 { 00432 const IndexType n = sourceIndexes.size(); 00433 00434 HostWriteAccess<ValueType> wTargetArray( targetArray ); 00435 HostReadAccess<ValueType> rSourceArray( sourceArray ); 00436 HostReadAccess<IndexType> rSourceOffsets( sourceOffsets ); 00437 HostReadAccess<IndexType> rSourceIndexes( sourceIndexes ); 00438 00439 // Note: we have no target offsets array 00440 00441 IndexType targetOffset = 0; 00442 00443 for ( IndexType ii = 0; ii < n; ii++) 00444 { 00445 IndexType i = rSourceIndexes[ii]; 00446 00447 for ( IndexType j = rSourceOffsets[i]; j < rSourceOffsets[i + 1]; ++j ) 00448 { 00449 wTargetArray[targetOffset++] = rSourceArray[j]; 00450 } 00451 } 00452 } 00453 00454 /* ------------------------------------------------------------------------------- */ 00455 00456 template<typename ValueType> 00457 void Redistributor::scatterV ( LAMAArray<ValueType>& targetArray, 00458 const LAMAArray<IndexType>& targetOffsets, 00459 const LAMAArray<IndexType>& targetIndexes, 00460 const LAMAArray<ValueType>& sourceArray ) 00461 { 00462 const IndexType n = targetIndexes.size(); 00463 00464 HostWriteAccess<ValueType> wTargetArray( targetArray ); 00465 HostReadAccess<IndexType> rTargetOffsets( targetOffsets ); 00466 HostReadAccess<IndexType> rTargetIndexes( targetIndexes ); 00467 HostReadAccess<ValueType> rSourceArray( sourceArray ); 00468 00469 // Note: we have no source offsets array, no parallelization possible 00470 00471 IndexType sourceOffset = 0; 00472 00473 for ( IndexType ii = 0; ii < n; ii++) 00474 { 00475 IndexType i = rTargetIndexes[ii]; 00476 00477 for ( IndexType j = rTargetOffsets[i]; j < rTargetOffsets[i + 1]; ++j ) 00478 { 00479 wTargetArray[j] = rSourceArray[sourceOffset++]; 00480 } 00481 } 00482 } 00483 00484 /* ------------------------------------------------------------------------------- */ 00485 00486 template<typename ValueType> 00487 void Redistributor::copyV ( LAMAArray<ValueType>& targetArray, 00488 const LAMAArray<IndexType>& targetOffsets, 00489 const LAMAArray<IndexType>& targetIndexes, 00490 const LAMAArray<ValueType>& sourceArray, 00491 const LAMAArray<IndexType>& sourceOffsets, 00492 const LAMAArray<IndexType>& sourceIndexes ) 00493 { 00494 LAMA_ASSERT_EQUAL_ERROR( targetIndexes.size(), sourceIndexes.size() ); 00495 00496 const IndexType n = targetIndexes.size(); 00497 00498 HostWriteAccess<ValueType> wTargetArray( targetArray ); 00499 HostReadAccess<IndexType> rTargetOffsets( targetOffsets ); 00500 HostReadAccess<IndexType> rTargetIndexes( targetIndexes ); 00501 HostReadAccess<ValueType> rSourceArray( sourceArray ); 00502 HostReadAccess<IndexType> rSourceOffsets( sourceOffsets ); 00503 HostReadAccess<IndexType> rSourceIndexes( sourceIndexes ); 00504 00505 for ( IndexType ii = 0; ii < n; ii++) 00506 { 00507 IndexType sourceI = rSourceIndexes[ii]; 00508 IndexType targetI = rTargetIndexes[ii]; 00509 00510 IndexType k = rTargetOffsets[ targetI ]; 00511 00512 for ( IndexType j = rSourceOffsets[sourceI]; j < rSourceOffsets[ sourceI + 1 ]; ++j ) 00513 { 00514 wTargetArray[k] = rSourceArray[j]; 00515 ++k; 00516 } 00517 00518 LAMA_ASSERT_EQUAL_DEBUG( k, rTargetOffsets[ targetI + 1 ] ); 00519 } 00520 } 00521 00522 /* ------------------------------------------------------------------------------- */ 00523 00524 template<typename ValueType> 00525 void Redistributor::exchangeHalo ( LAMAArray<ValueType>& targetHalo, 00526 const LAMAArray<ValueType>& sourceHalo ) const 00527 { 00528 const Communicator& comm = mSourceDistribution->getCommunicator(); 00529 00530 // use asynchronous communication to avoid deadlocks 00531 00532 comm.exchangeByPlanAsync( targetHalo, mHalo.getRequiredPlan(), 00533 sourceHalo, mHalo.getProvidesPlan() ); 00534 00535 // synchronization is done implicitly 00536 } 00537 00538 /* ------------------------------------------------------------------------------- */ 00539 00540 template<typename ValueType> 00541 void Redistributor::exchangeHaloN ( LAMAArray<ValueType>& targetHalo, 00542 const LAMAArray<ValueType>& sourceHalo, 00543 const IndexType n ) const 00544 { 00545 const Communicator& comm = mSourceDistribution->getCommunicator(); 00546 00547 // Communication plans are built by multiplication with n 00548 00549 CommunicationPlan requiredN ( mHalo.getRequiredPlan(), n ); 00550 CommunicationPlan providesN ( mHalo.getProvidesPlan(), n ); 00551 00552 LAMA_LOG_DEBUG( logger, "requiredN ( n = " << n << "): " << requiredN ); 00553 LAMA_LOG_DEBUG( logger, "providesN ( n = " << n << "): " << providesN ); 00554 00555 // use asynchronous communication to avoid deadlocks 00556 00557 comm.exchangeByPlan( targetHalo, requiredN, 00558 sourceHalo, providesN ); 00559 00560 // synchronization is done implicitly at the end of this scope 00561 } 00562 00563 /* ------------------------------------------------------------------------------- */ 00564 00565 template<typename ValueType> 00566 void Redistributor::exchangeVHalo ( LAMAArray<ValueType>& targetHalo, 00567 const LAMAArray<ValueType>& sourceHalo ) const 00568 { 00569 const Communicator& comm = mSourceDistribution->getCommunicator(); 00570 00571 LAMA_ASSERT_ERROR( mRequiredPlan.get(), "There was no previous call of buildVPlan" ); 00572 00573 comm.exchangeByPlanAsync( targetHalo, *mRequiredPlan, 00574 sourceHalo, *mProvidesPlan ); 00575 } 00576 00577 } // namespace 00578 00579 #endif // LAMA_REDISTRIBUTOR_HPP_