00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef _MERGE_SORT
00024 #define _MERGE_SORT
00025
00026
00027
00028
00029
00030 #include <memory>
00031 #include <algorithm>
00032 #include <fstream>
00033 #include <cassert>
00034
00035 using namespace std;
00036
00037
00038
00041 template<typename T>
00042 class CStreamPos{
00044 T mContent;
00045 public:
00046 CStreamPos(T inContent):mContent(inContent){
00047
00048 }
00049
00050 CStreamPos(long int inContent):mContent(inContent){
00051
00052 }
00053
00054 operator T()const{
00055 return mContent;
00056 }
00057 operator long int()const{
00058 return mContent;
00059 }
00060
00061 CStreamPos<T>& operator++ (){
00062 mContent=mContent+static_cast<T>(1);
00063 return *this;
00064 }
00065 CStreamPos<T> operator++ (int){
00066 CStreamPos<T> lReturnValue=*this;
00067 mContent=mContent+static_cast<T>(1);
00068 return lReturnValue;
00069 }
00070
00071 CStreamPos<T> operator% (int inMod)const{
00072 return mContent % inMod;
00073 }
00074 CStreamPos<T> operator/ (int inMod)const{
00075 return mContent / inMod;
00076 }
00077 bool operator< (CStreamPos<T>& inThan)const{
00078 return this->mContent < inThan.mContent;
00079 }
00080 bool operator! ()const{
00081 return !(this->mContent);
00082 }
00083 CStreamPos<T> operator+ (CStreamPos<T>& inSummand){
00084 return CStreamPos<T>(mContent + inSummand.mContent);
00085 }
00086 };
00087
00088 #if __GNUC__==2
00089 #define STREAMPOS_T long long int
00090 #else
00091 #define STREAMPOS_T CStreamPos<fstream::pos_type>
00092 #endif
00093
00094
00105 template<class T>
00106 void merge_streams(istream& in1, const STREAMPOS_T inCount1,
00107 istream& in2, const STREAMPOS_T inCount2,
00108 ostream& out,
00109 int inNumberOfPageElements=1){
00110
00111 {
00112
00113
00114
00115
00116
00117 const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00118
00119
00120
00121 if(!(inCount1)){
00122 for(STREAMPOS_T i=0;
00123 i<inCount2;
00124 i++){
00125 T l2;
00126 in2.read((char*)&l2,
00127 sizeof(l2));
00128 assert(in2);
00129
00130 out.write((char*)&l2,
00131 sizeof(l2));
00132 assert(out);
00133 }
00134 return;
00135 }
00136 if(!inCount2){
00137 for(STREAMPOS_T i=0;
00138 i<inCount1;
00139 i++){
00140 T l1;
00141 in1.read((char*)&l1,
00142 sizeof(l1));
00143 assert(in1);
00144 out.write((char*)&l1,
00145 sizeof(l1));
00146 assert(out);
00147 }
00148 return;
00149 }
00150
00151 STREAMPOS_T lI1(0);
00152 STREAMPOS_T lI2(0);
00153
00154
00155 T l1;
00156 in1.read((char*)&l1,
00157 sizeof(l1));
00158 assert(in1);
00159 T l2;
00160 in2.read((char*)&l2,
00161 sizeof(l2));
00162 assert(in2);
00163
00164 while((lI1<inCount1)
00165 &&(lI2<inCount2)){
00166 if(l1<l2){
00167
00168 out.write((char*)&l1,
00169 sizeof(l1));
00170 assert(out);
00171
00172
00173 lI1++;
00174 if(lI1<inCount1){
00175 in1.read((char*)&l1,
00176 sizeof(l1));
00177 assert(in1);
00178 }
00179 }else{
00180
00181 out.write((char*)&l2,
00182 sizeof(l2));
00183
00184 lI2++;
00185 if(lI2<inCount2){
00186 in2.read((char*)&l2,
00187 sizeof(l2));
00188 assert(in2);
00189 }
00190 }
00191 }
00192 while(lI1<inCount1){
00193
00194 out.write((char*)&l1,
00195 sizeof(l1));
00196
00197 lI1++;
00198 if(lI1<inCount1){
00199 in1.read((char*)&l1,
00200 sizeof(l1));
00201 assert(in1);
00202 }
00203 }
00204 while(lI2<inCount2){
00205
00206 out.write((char*)&l2,
00207 sizeof(l2));
00208 assert(out);
00209
00210 lI2++;
00211 if(lI2<inCount2){
00212 in2.read((char*)&l2,
00213 sizeof(l2));
00214 assert(in2);
00215 }
00216 }
00217 }
00218 }
00219 template<typename T>
00220 void first_level_quicksort(int inNumberOfPageElements,
00221 const char* inFileToBeSortedName,
00222 const char* inTemporaryName){
00223
00224 cout << "Starting quicksort: "
00225 << inNumberOfPageElements
00226 << " elements per page." << endl
00227 << "Sorting files " << inFileToBeSortedName << endl
00228 << "to " << inTemporaryName << endl;
00229 cout << "NOW ALLOCATING A PAGE" << inNumberOfPageElements << endl;
00230 auto_ptr<T> lPage(new T[inNumberOfPageElements]);
00231
00232 cout << "H" << flush;
00233
00234 const STREAMPOS_T lPageSize(sizeof(T)*inNumberOfPageElements);
00235
00236 cout << "I" << flush;
00237
00238 STREAMPOS_T lFileSize(0);
00239 ifstream lToBeSorted1(inFileToBeSortedName);
00240 assert(lToBeSorted1);
00241 lToBeSorted1.seekg(0,
00242 ios::end);
00243 lFileSize=lToBeSorted1.tellg();
00244 lToBeSorted1.clear();
00245 lToBeSorted1.seekg(0,
00246 ios::beg);
00247 cout << "E" << flush;
00248
00249 ofstream lTemporary(inTemporaryName);
00250 assert(lTemporary);
00251 cout << "R" << flush;
00252
00253 STREAMPOS_T lSum(0);
00254
00255 T* lBegin(lPage.get());
00256 T* lEnd(lPage.get());
00257
00258 cout << "FIRSTLEVELQUICK" << lFileSize << ";" << lSum<< endl;
00259 while((lSum<lFileSize)
00260 && lToBeSorted1){
00261
00262 cout << "." << flush;
00263
00264 int lRead(0);
00265 if(lSum+lPageSize < lFileSize){
00266 lToBeSorted1.read((char*)lPage.get(),
00267 lPageSize);
00268 lRead=lPageSize;
00269 }else{
00270 lToBeSorted1.read((char*)lPage.get(),
00271 lFileSize-lSum);
00272 lRead=lFileSize-lSum;
00273 }
00274 if(lRead){
00275 lEnd=lBegin+(lRead)/sizeof(T);
00276 sort(lBegin,lEnd);
00277 lTemporary.write((char*)lPage.get(),lRead);
00278 assert(lTemporary);
00279 }
00280 }
00281 cout << "." << endl;
00282
00283 }
00284
00295 template<class T>
00296 char* merge_sort_streams(const char* inFileToBeSortedName,
00297 const char* inTemporaryName,
00298 int inNumberOfPageElements=(1 << 20)
00299 ){
00300
00301 const char* lFileToBeSortedName(inFileToBeSortedName);
00302 const char* lTemporaryName(inTemporaryName);
00303
00304 STREAMPOS_T lFileSize(0);
00305 ifstream lToBeSorted1(inFileToBeSortedName);
00306 lToBeSorted1.seekg(0,
00307 ios::end);
00308 lFileSize=lToBeSorted1.tellg();
00309 lToBeSorted1.close();
00310
00311 ofstream lTemporary;
00312 ifstream lToBeSorted2;
00313
00314 #ifdef first_level_quick
00315 first_level_quicksort<T>(inNumberOfPageElements,
00316 inFileToBeSortedName,
00317 inTemporaryName);
00318 swap(lFileToBeSortedName,
00319 lTemporaryName);
00320 #else
00321 cout << "STARTING mit MERGESIZE1" << endl;
00322 inNumberOfPageElements=1;
00323 #endif
00324 STREAMPOS_T lCount(0);
00325 for(STREAMPOS_T iMergeSize(sizeof(T)*inNumberOfPageElements);
00326 (iMergeSize < lFileSize)
00327 || !(lCount%2)
00328
00329
00330
00331
00332 ;
00333 (iMergeSize = iMergeSize << 1),
00334 (lCount=lCount+static_cast<STREAMPOS_T>(1))){
00335
00336 cout << "MERGESORT MergeSize "
00337 << iMergeSize
00338 << endl;
00339
00340 lToBeSorted1.open(lFileToBeSortedName);
00341 lToBeSorted1.clear();
00342
00343 lToBeSorted2.open(lFileToBeSortedName);
00344 lToBeSorted2.clear();
00345
00346 lTemporary.open(lTemporaryName);
00347 lTemporary.clear();
00348
00349
00350 for(STREAMPOS_T i(0);
00351 i<lFileSize;
00352 i = i + iMergeSize*2){
00353 lToBeSorted1.seekg(i);
00354
00355 if(!lToBeSorted1){
00356 cerr << __FILE__ << ":" << __LINE__ << " lToBeSorted false, after seekg("
00357 << static_cast<long int>(i)
00358 << ")"
00359 << endl;
00360 }
00361
00362 assert(lToBeSorted1);
00363
00364 if(i+iMergeSize<lFileSize){
00365 lToBeSorted2.seekg(i+iMergeSize);
00366 assert(lToBeSorted2);
00367 }
00368
00369 STREAMPOS_T lMergeSize1=lFileSize-i;
00370 if(lFileSize < i){
00371 lMergeSize1=0;
00372 }
00373 STREAMPOS_T lMergeSize2=lFileSize-i-iMergeSize;
00374 if(lFileSize < i + iMergeSize){
00375 lMergeSize2=0;
00376 }
00377
00378
00379
00380
00381 if(lMergeSize1>iMergeSize){
00382 lMergeSize1=iMergeSize;
00383 }
00384 if(lMergeSize2>iMergeSize){
00385 lMergeSize2=iMergeSize;
00386 }
00387
00388 #if __GNUC__==2
00389 merge_streams<T>(lToBeSorted1,
00390 lMergeSize1/sizeof(T),
00391 lToBeSorted2,
00392 lMergeSize2/sizeof(T),
00393 lTemporary,
00394 inNumberOfPageElements
00395 );
00396 #else
00397 merge_streams<T>(lToBeSorted1,
00398 lMergeSize1.operator/(sizeof(T)),
00399 lToBeSorted2,
00400 lMergeSize2.operator/(sizeof(T)),
00401 lTemporary,
00402 inNumberOfPageElements
00403 );
00404 #endif
00405 }
00406
00407 lTemporary.close();
00408 lToBeSorted1.close();
00409 lToBeSorted2.close();
00410 swap(lFileToBeSortedName,
00411 lTemporaryName);
00412 cout << "endmerge" << endl;
00413 }
00414 return strdup(lFileToBeSortedName);
00415 }
00416 #endif