Note that an automatic replacement of all STL algorithm by a parallel equivalent isn't a good idea, so that in any case you will have to update your code.
in the following example, I just define a small lambda and apply that one on a 1d array using std::transform.
I called a parallel version using the PPL(in the VS'2012). Only have 2 cores but seems lead to the expected factor 2.
#include <ppl.h>
void main ()
{
float otherScalingFactor = 0.3f;
#define size 59999999
float* local_arr = new float[size]; std::fill(local_arr, local_arr + size, 1.5f);
float* out = new float[size];
auto normalize = [&] (float x) { return fabs(x / otherScalingFactor) * otherScalingFactor + expf(x) * cosf(x) ; };
time_t start = time(NULL);
std::transform(local_arr, local_arr + size, out, normalize);
time_t end = time(NULL);
std::cout << "end in ..." << difftime(end, start) << std::endl;
std::fill(out, out + size, 0.0f);
start = time(NULL);
concurrency::parallel_transform(local_arr, local_arr + size, out, normalize);
end = time(NULL);
std::cout << "end in ..." << difftime(end, start) << std::endl;
delete [] local_arr;
delete [] out;
getchar();
}
Aucun commentaire :
Enregistrer un commentaire