#include #include #include #include #include #include #include #include #include using namespace std; void readData(vector& lines, const string& filename) { ifstream file(filename); if (!file.is_open()) { cerr << "Failed to open file: " << filename << endl; return; } string header; getline(file, header); // Read and discard the header string line; while (getline(file, line)) { if (!line.empty()) { lines.push_back(line); } } } void readIntegerData(vector& numbers, const string& filename) { ifstream file(filename); if (!file.is_open()) { cerr << "Failed to open file: " << filename << endl; return; } string header; getline(file, header); // Read and discard the header string line; while (getline(file, line)) { stringstream ss(line); int number; while (ss >> number) { numbers.push_back(number); } } } void sortChunk(vector& lines, int start, int end) { sort(lines.begin() + start, lines.begin() + end); } void sortStringBuckets(vector& lines, int numThreads) { int n = lines.size(); int chunkSize = n / numThreads; // Calculate chunk size vector threads; // To hold all threads for (int i = 0; i < numThreads; i++) { int start = i * chunkSize; int end = (i == numThreads - 1) ? n : (start + chunkSize); // Handle last chunk threads.emplace_back(sortChunk, ref(lines), start, end); // Create thread to sort the chunk } for (auto& thread : threads) { thread.join(); // Wait for all threads to finish } } void sortIntegerBuckets(vector& numbers, int numThreads) { int n = numbers.size(); int chunkSize = n / numThreads; vector threads; for (int i = 0; i < numThreads; i++) { int start = i * chunkSize; int end = (i == numThreads - 1) ? n : (start + chunkSize); threads.push_back(thread([&numbers, start, end]() { sort(numbers.begin() + start, numbers.begin() + end); })); } for (auto& t : threads) { t.join(); } } void iterativeParallelMerge(vector& lines) { int n = lines.size(); vector temp(n); for (int size = 1; size < n; size *= 2) { #pragma omp parallel for schedule(static, 1) for (int i = 0; i < n - size; i += 2 * size) { int left = i; int mid = min(i + size - 1, n - 1); int right = min(i + 2 * size - 1, n - 1); if (size > 256) { inplace_merge(lines.begin() + left, lines.begin() + mid + 1, lines.begin() + right + 1); } else { int k = left, j = mid + 1; for (int l = left; l <= right; ++l) { if (k > mid) temp[l] = move(lines[j++]); else if (j > right) temp[l] = move(lines[k++]); else if (lines[k] < lines[j]) temp[l] = move(lines[k++]); else temp[l] = move(lines[j++]); } move(temp.begin() + left, temp.begin() + right + 1, lines.begin() + left); } } } } void mergeIntegers(vector& arr, int left, int mid, int right) { vector temp(right - left + 1); int i = left, j = mid + 1, k = 0; while (i <= mid && j <= right) { if (arr[i] <= arr[j]) { temp[k++] = arr[i++]; } else { temp[k++] = arr[j++]; } } while (i <= mid) { temp[k++] = arr[i++]; } while (j <= right) { temp[k++] = arr[j++]; } for (i = left, k = 0; i <= right; ++i, ++k) { arr[i] = temp[k]; } } void iterativeParallelMergeInteger(vector& numbers) { int n = numbers.size(); const int inplaceMergeThreshold = 1024; for (int size = 1; size < n; size *= 2) { #pragma omp parallel for schedule(static, 1) for (int i = 0; i < n - size; i += 2 * size) { int left = i; int mid = std::min(i + size - 1, n - 1); int right = std::min(i + 2 * size - 1, n - 1); if (size > inplaceMergeThreshold) { std::inplace_merge(numbers.begin() + left, numbers.begin() + mid + 1, numbers.begin() + right + 1); } else { mergeIntegers(numbers, left, mid, right); } } } } template void printLines(const vector& data, const string& label, int count = 5) { cout << label << endl; int n = data.size(); for (int i = 0; i < min(count, n); i++) { cout << data[i] << endl; } if (n > count) { cout << "... (omitting middle lines) ..." << endl; for (int i = n - min(count, n); i < n; i++) { cout << data[i] << endl; } } cout << endl; } int main(int argc, char **argv) { if (argc < 2) { cerr << "Usage: " << argv[0] << " " << endl; return 1; } string filename = argv[1]; // Determining sort type based on some condition or user input int numThreads = 8; int sort_type = 1; // Assume this is decided beforehand vector lines; vector numbers; if (sort_type == 1) { cout << "reading data.." << endl; readData(lines, filename); printLines(lines, "First 5 and last 5 lines before sort:"); cout << "sorting data as strings.." << endl; auto start_t1 = chrono::high_resolution_clock::now(); sortStringBuckets(lines, numThreads); iterativeParallelMerge(lines); auto end_t1 = std::chrono::high_resolution_clock::now(); chrono::duration sort_duration = end_t1 - start_t1; printLines(lines, "First 5 and last 5 lines after sort:"); cout << "\nsorting time: " << sort_duration.count() << " ms\n"; } else { cout << "reading data.." << endl; readIntegerData(numbers, filename); printLines(lines, "First 5 and last 5 lines before sort:"); cout << "sorting data as ints.." << endl; auto start_t1 = chrono::high_resolution_clock::now(); sortIntegerBuckets(numbers, numThreads); iterativeParallelMergeInteger(numbers); auto end_t1 = std::chrono::high_resolution_clock::now(); chrono::duration sort_duration = end_t1 - start_t1; printLines(lines, "First 5 and last 5 lines after sort:"); cout << "\nsorting time: " << sort_duration.count() << " ms\n"; } return 0; }