并行归并排序Word格式.docx

资源描述

并行归并排序Word格式.docx

《并行归并排序Word格式.docx》由会员分享，可在线阅读，更多相关《并行归并排序Word格式.docx（21页珍藏版）》请在冰点文库上搜索。

并行归并排序Word格式.docx

=end）{if（array[i]<

array[j]）other[k++]=array[i++];

else

other[k++]=array[j++];

while（i<

=（begin+end）/2）

other[k++]=array[i++];

while（j<

=end）other[k++]=array[j++];

for（k=begin;

=end;

++k）array[k]=other[k];

}else

if（array[end]<

array[begin]）Swap（array[end],array[begin]）;

}

voidOutput（int*array,intn）

for（inti=0;

++i）

cout<

array[i]<

;

cout«

endl;

intmain（）

串行归并排序算法"

endl«

thenumbersare:

Output（array,N）;

MergeSort（array,0,N-1）;

thesortedresult:

inti;

cin>

return0;

1、3运行结果

Ic:

sandSeftiDLKsVAdBinistratorVBjrDomentsWisnuaJL

thenumbersare•46?

45623178262223443212thesortedresultil4122326346?

782234324S6

1、4复杂度分析

通过算法分析很容易发现串行归并排序算法的时间复杂度地推公式为：

（1）

丿if（n=1）

T（n）=n

[2T（3）+o（n）if（n>

1）

这是一个时间复杂度的递推公式，我们需要消去等号右侧的T（n）,把T（n）

写成n的函数。

其实符合一定条件的Recurrenee的展开有数学公式可以套，这里我们略去严格的数学证明，只是从直观上看一下这个递推公式的结果。

当n=1时可以设T

（1）=g，当n>

1时可以设T（n）=2丁（"

厂3，我们取C1和C2中较大

的一个设为c,把原来的公式改为：

T（n）=」

if（n=1）

n、.,八

2匕）cnif（nJ

参照主定理公式T（n）=aTf（n），可以知道当n>

1时，有以下

等式成立：

b=2

f（n）二cn

同时又有下面的等式成立：

f（n）=cn=&

（nlogb）

则有T（n）满足主定理公式的第二种情况，也即是T（n）的时间复杂度为：

T（n）=°

（nlo“lgn）=°

（nlgn）

并行归并排序算法

由串行归并排序算法可知，归并的各个数据区间都是独立的，没有依赖关系。

因此归并排序是一种很容易进行并行化的算法。

其方案是先将待排序区间划分成若干个相等的小区间，然后并行地对这些小区间进行排序，最后再通过归并的方法将所有排好序的小区间归并成一个有序系列。

由于归并时是一层层向上进行的，因此需要将区间划分成2k个小区间，这样第1轮归并时，可以将2k个小区间归并成2kJ个区间。

这样过k轮归并操作后就归并成一个有序的大区间。

这也正是并行归并排序的算法思想。

2、1算法流程图

并行归并排序算法的思想可以通过下面的流程图表示：

开始

并行归并排序算发流程图

2、2代码分析

功能函数头文件：

MergeSort.h

#defineMAX_MERGE_TURN24

#defineMIN_PARALLEL_SORT_COUNT3

#defineMIN_MERGE_COUNT2

#defineCACHE_LINE_SIZE64typedefunsignedintUINT;

omp.h>

windows.h>

cstdlib>

ctime>

usingnamespacestd;

intcompare（int*one,int*two）{

if（*one>

*two）

return1;

elseif（*two>

*one）

return-1;

elsereturn0;

void*GetCacheAlignedAddr（void*pAddr）

intm=CACHE_LINE_SIZE;

void*pRet=（void*）（（（UINT）pAddr+m-1）&

（-m））;

returnpRet;

/**串行归并函数

归并好的数据存放在输出参数ppNewDat中

@paramvoid**ppData-待归并的数据

@paramintnStart1-

@paramintnEnd1-

第个区间的开始位置（包括nStart1）

第个区间的结束位置（包括nEnd1）

@paramintnStart2-第个区间的开始位置（包括nStart2）

@paramintnEnd2-第个区间的结束位置（包括nEnd2）

@paramCOMPAREFUNCfunc比-较函数

@paramvoid**ppNewData-存放归并后的数据

@returnvoid**-返回归并后的数据指针（等于ppNewData）

int**Serial_Merge（int**ppData,intnStart1,intnEnd1,intnStart2,intnEnd2,int（*func）（int*,int*）,int**ppNewData）

inti,j,k;

i=nStart1;

j=nStart2;

k=nStart1;

for（i=nStart1;

=nEnd1;

）

for（;

=nEnd2;

j++）

if（（*func）（ppData[i],ppData[j]）<

0）

ppNewData[k]=ppData[i];

++k;

i++;

break;

ppNewData[k]=ppData[j];

++k;

//如果j已经到了尾部

if（j>

nEnd2）

i++）

if（i>

nEnd1）

for（i=nStart1;

i++）{

ppData[i]=ppNewData[i];

returnppData;

/**串行归并排序函数

@paramvoid**ppData-待排序数据@paramintnBegin-排序区间的开始位置

@paramintnEnd-排序区间的结束位置

@paramCOMPAREFUNCfunc数-据大小比较函数

@returnvoid-无

voidSerial_MergeSort（int**ppData,intnBegin,intnEnd,int（*func）（int*,int*））

if（nEnd-nBegin<

MIN_MERGE_COUNT）{

int*temp;

if（*ppData[nBegin]>

*ppData[nEnd]）

temp=ppData[nBegin];

ppData[nBegin]=ppData[nEnd];

ppData[nEnd]=temp;

return;

int**tempData=newint*[nEnd-nBegin+1];

inti;

inttmpInt=0;

for（i=0;

nEnd-nBegin+1;

i++）

tempData[i]=&

tmpInt;

intnMid=（nBegin+nEnd）>

//除以

Serial_MergeSort（ppData,nBegin,nMid,func）;

Serial_MergeSort（ppData,nMid+1,nEnd,func）;

Serial_Merge（ppData,nBegin,nMid,nMid+1,nEnd,func,tempData）;

/**并行归并快速排序函数

@paramvoid**ppData-待排序数据

@paramintnLen-待排序数据长度

@paramCOMPAREFUNCfunc数-据比较回调函数@returnvoid-无

voidParallel_MergeSort（int**ppData,intnLen,int（*func）（int*,int*））

inti,k;

intnStep;

intnLoopCount;

intnCore;

intnStart1,nEnd1;

if（nLen<

MIN_PARALLEL_SORT_COUNT）

Serial_MergeSort（ppData,0,nLen-1,func）;

return;

nCore=omp_get_num_procs（）;

intnCount=nLen/MIN_PARALLEL_SORT_COUNT;

intnTurn=MAX_MERGE_TURN;

nLoopCount=1<

nTurn;

〃nLoopCount等于的nTurn次方

while（nLoopCount>

nCount）

nLoopCount=nLoopCount>

--nTurn;

//判断nTurn是否为奇数

if（（nLoopCount>

nCore）&

（nTurn>

0x1）&

（（nTurn&

0x1）==0x1））

//把nTurn变成偶数，便于后面不拷贝数据nLoopCount=nLoopCount>

nStep=nLen/nLoopCount;

int*p=newint[nLoopCount*2+CACHE_LINE_SIZE];

int*pnPos=（int*）GetCacheAlignedAddr（p）;

//将数据分成nLoopCount个小区间，并行地对每个区间进行串行排序

#pragmaompparallelforprivate（nStart1,nEnd1）num_threads（nCore）

for（i=0;

nLoopCount;

nStart1=i*nStep;

nEnd1=nStart1+nStep-1;

if（i==nLoopCount-1）

{nEnd1=nLen-1;

Serial_MergeSort（ppData,nStart1,nEnd1,func）;

pnPos[i+i]=nStart1;

pnPos[i+i+1]=nEnd1;

//对排好序的各个相邻区间进行并行归并操作

int**pp=newint*[nLen+CACHE_LINE_SIZE];

int**ppOutData=（int**）GetCacheAlignedAddr（（int*）pp）;

int**ppMergeData=ppData;

int**ppTempOutData=ppOutData;

int**ppSwap;

nStep=2;

for（k=0;

k++）

#pragmaompparallelfornum_threads（nCore）

nLoopCount-1;

i+=2）

intnPos=i*nStep;

Serial_Merge（ppMergeData,pnPos[nPos],pnPos[nPos+1],pnPos[nPos+nStep],pnPos[nPos+nStep+1],func,ppTempOutData）;

pnPos[nPos+1]=pnPos[nPos+nStep+1];

nStep+=nStep;

ppSwap=ppMergeData;

ppMergeData=ppTempOutData;

ppTempOutData=ppSwap;

//将数据写回到ppData中，如果nTurn为偶数则下面的判断内的循环不会被

执行

if（ppMergeData==ppOutData）

#pragmaompparallelfornum_threads（nCore）for（i=0;

nLen;

ppData[i]=ppOutData[i];

delete[]p;

delete[]pp;

测试文件：

Test.cpp

#include"

MergeSort.h"

//testMergeSort

voidtestFunc（intsize）

Sleep（IOOO）;

//防止srand取同样的值

intcost;

SYSTEMTIMElpsystimeStr;

SYSTEMTIMElpsystimeEnd;

newint*[size];

newint[size];

int**ppDataForSerial=int**ppDataForParallel=int*tempArrForSerial=int*tempArrForParallel=

srand（（unsignedint）time（O））;

for（i=O;

size;

tempArrForSerial[i]=rand（）;

tempArrForParallel[i]=tempArrForSerial[i];

ppDataForSerial[i]=tempArrForSerial+i;

ppDataForParallel[i]=tempArrForParallel+i;

cout<

测试"

size<

个数字串行归并算法：

endl;

GetLocalTime（&

lpsystimeStr）;

printf（"

开始时间：

％年％月％日星

期%u%u:

%u:

%u\n：

lpsystimeStr.wYear,lpsystimeStr.wMonth,lpsystimeStr.wDay,lpsystimeStr.wDayOfWeek,lpsystimeStr.wHour,lpsystimeStr.wMinute,lpsystimeStr.wSecond,lpsystimeStr.wMilliseconds）;

Serial_MergeSort（ppDataForSerial,0,size-1,compare）;

lpsystimeEnd）;

结束时间：

%1年%i月％日星

lpsystimeEnd.wYear,lpsystimeEnd.wMonth,lpsystimeEnd.wDay,lpsystimeEnd.wDayOfWeek,lpsystimeEnd.wHour,lpsystimeEnd.wMinute,lpsystimeEnd.wSecond,lpsystimeEnd.wMilliseconds）;

cost=lpsystimeEnd.wMilliseconds-lpsystimeStr.wMilliseconds;

if（cost<

=0）

cost=cost+（lpsystimeEnd.wSecond-lpsystimeStr.wSecond）*

1000;

共耗时：

cost<

ms。

串行归并排序后前个数字：

10;

if（0==i%6）

*ppDataForSerial[i]<

;

个数字并行归并算法：

%U年%i月％日星

Parallel_MergeSort（ppDataForParallel,size,compare）;

%1年%月%日星

IIII

共耗时：

并行归并排序后前个数字：

for（i=0;

*ppDataForParallel[i]<

delete[]tempArrForSerial;

delete[]tempArrForParallel;

delete[]ppDataForSerial;

delete[]ppDataForParallel;

testFunc（500）;

testFunc（10000）;

testFunc（50000）;

testFunc（100000）;

testFunc（500000）;

testFunc（800000）;

testFunc（1000000）;

return0;

2、3运行结果

90耶109911VS&

G1995127G473158^

1411H715351155631787124889

25730261763005636583

洌试10fl呼个数窃任归并算

2012^3g!

片间:

泗12年3月即日

（7V耗日7hlt0nso

舁轩归并排序石前⑷个数字

测试50©

个数宇幷jf归并尊注旺始时㉑汕曄年m月汐日J结朿日寸间,2“2年3月貂日J共耗时・^Gna□

坪忏归井排序石前加个数字:

7710

展开阅读全文