Divide and Conquer
A general paradigm for parallel algorithms in which the master and slaves are doing the following:
| Master | Slaves |
loop while needed
divide the data among the slaves
compute something
retrieve and combine the results
from the slaves
process the final result
|
loop while needed receive data from the master compute something send the result to the master |
Data Exchange
Communication Models

Linear communication
Each process sends the data to process number 0.
Master() // id == 0
{
for (i=1; i<no_proc; i++) {
Collect(recv_data, i);
Process_data(recv_data, my_data);
}
Process_final_result(my_data);
}
Slave()
{
Report(my_data, 0);
}
Optimization: in which the process 0 collects the data in any order. This way the other processes report the data in the order in which they finish their job.
Master() // id == 0
{
for (i=1; i<no_proc; i++) {
Collect_any(recv_data);
Process_data(recv_data, my_data);
}
Process_final_result(my_data);
}
Slave()
{
Report(my_data, 0);
}
Examples:
Computing an integral.
Divide_receive_data()
{
if (id == 0) {
input a, b;
Global_to_local(a, b);
{
}
Computation() // Function Compute_partial_integral
{
my_intg = 0;
low = id / no_proc;
dx = 1.0 / (n * no_proc);
for (i=0; i<n; i++)
my_intg += (F(low+(i+1)*dx) - F(low+i*dx)) / dx;
}
Process_data(recv_intg, my_intg)
{
my_intg += recv_intg;
}
Process_final_result(my_intg)
{
output "The integral is equal to ", my_intg;
}
Computing the Sum
In a shared memory model. Using no_threads for the number of
threads and id for the thread id, going from 0
to no_threads-1.
void *Thread(void *arg) {
int id = Get_id(), n, step, sum=0;
Global_to_local(n);
step = n/no_threads; // no_threads is global
for (int i=id*step; i<(id+1)*step; i++)
sum += a[i];
Report(sum, id);
}
// Linear Report Data
void Report(int sum, int id) {
lock(&data_mutex);
global_sum += sum;
threads_done++;
unlock(&data_mutex);
}
The data does not need to be collected in order. The master can check when the number of threads that are done is equal to no_threads to report the result.
// Hierarchical Report
void Report(int sum, int id) {
result[id] = sum; // the result array is global
int step = 1;
while (2*step <= no_threads) {
Barrier(no_threads);
if (id % (2*step) == 0) {
result[id] += result[id+step];
}
step *= 2;
}
if (id == 0)
cout << result[0];
}

Finding if a number is prime
bool Is_prime(int n)
{
for (int i=2; i<= sqrt(n); i++)
if (n%i == 0)
return false;
return true;
}
Divide the interval [2, sqrt(n)] into no_threads intervals.
Parallel Prime Number
d = ceil((sqrt(n)-2)/no_threads);
Computation()
{
my_res = true;
for (i=id*d+2; i<(id+1)*d+2 && i<= sqrt(n) && my_res; i++)
if (n%i == 0)
my_res = false;
}
Report_data(my_res)
{
lock(&mutex);
if (!my_res)
global_res= false;
unlock(&mutex);
}
Master_prime() // for id = 0
{
input n; // global
d = ceil((sqrt(n)-2)/no_threads); // global
global_res = true;
Barrier(no_threads);
int my_res=true; // local
for (i=2; i<=d+1 && my_res; i++)
if (n%i == 0)
my_res = false;
Report_data(my_res);
Barrier(no_threads);
output global_res;
}
Slave_prime(id) {
Barrier(no_threads);
int my_res=true; // locals
int lim = min((id+1)*d+1, sqrt(n));
for (i=id*d+2; i<=lim && my_res; i++)
if (n%i == 0)
my_res = false;
Report_data(my_res);
Barrier(no_threads);
}
Computing with Interruption
Determining If a Number Is Prime
Global_to_local_res(int & my_res)
{
lock(&mutex);
if (!global_res)
my_res = false;
unlock(&mutex);
}
Master_prime() // for id = 0
{
input n; // global
d = ceil((sqrt(n)-2)/no_threads);
global_res = true;
Barrier(no_threads);
int my_res=true; // local
for (i=2; i<=d+1 && my_res; i++) {
Global_to_local_res(my_res);
if (n%i == 0)
my_res = false;
}
Report_data(my_res);
Barrier(no_threads);
output my_res;
}
Slave_prime(id)
{
Barrier(no_threads);
int my_res=true; // locals
int lim = min((id+1)*d+1, sqrt(n));
for (i=id*d+2; i<=lim && my_res; i++) {
Global_to_local_res(my_res);
if (n%i == 0)
my_res = false;
}
Report_data(my_res);
Barrier(no_threads);
}