|
@@ -19,7 +19,6 @@
|
|
|
|
|
|
bool debug_ipc_is_root = false;
|
|
|
FILE *debug_ipc_file = nullptr;
|
|
|
-FILE *debug_ipc_response_file = nullptr;
|
|
|
|
|
|
int rank, cluster_size;
|
|
|
std::map<uint64_t, int> start_indices_map; // maps indices to MPI ranks
|
|
@@ -44,7 +43,6 @@ void debug_ipc_init() {
|
|
|
if (rank == 0) {
|
|
|
const char *root_env = std::getenv("IPC_DEBUG_ROOT");
|
|
|
const char *file_env = std::getenv("IPC_DEBUG_FILE");
|
|
|
- const char *response_file_env = std::getenv("IPC_DEBUG_RESPONSE_FILE");
|
|
|
|
|
|
if (file_env == nullptr) {
|
|
|
return;
|
|
@@ -68,44 +66,17 @@ void debug_ipc_init() {
|
|
|
exit(-1);
|
|
|
}
|
|
|
|
|
|
- // Open response file for the back channel
|
|
|
- if (response_file_env != nullptr) {
|
|
|
- if (debug_ipc_is_root) {
|
|
|
- debug_ipc_response_file = fopen(response_file_env, "w");
|
|
|
- } else {
|
|
|
- debug_ipc_response_file = fopen(response_file_env, "r");
|
|
|
- }
|
|
|
-
|
|
|
- if (debug_ipc_file == nullptr) {
|
|
|
- printf("[IPCDBG] Error, could not open named pipe for responses: %s", strerror(errno));
|
|
|
- exit(-1);
|
|
|
- }
|
|
|
-
|
|
|
- }
|
|
|
|
|
|
buffer.resize(INITIAL_BUFFER_SIZE);
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
-/** Communicate assertion result to client and possibly hang if assertion failed
|
|
|
- */
|
|
|
-inline void handle_assertion(bool assertion_status = true) {
|
|
|
- if (!debug_ipc_response_file)
|
|
|
- return;
|
|
|
-
|
|
|
- char res = assertion_status ? 1 : 0;
|
|
|
- if (debug_ipc_is_root) {
|
|
|
- fwrite(&res, 1, 1, debug_ipc_response_file);
|
|
|
- } else {
|
|
|
- fread(&res, 1, 1, debug_ipc_response_file);
|
|
|
- }
|
|
|
-
|
|
|
- if (res == 0) {
|
|
|
- std::cout << "[IPCDBG] Entering endless loop, attach debugger to PID " << getpid();
|
|
|
- while (1) {
|
|
|
- sleep(1);
|
|
|
- }
|
|
|
+void endless_loop() {
|
|
|
+ printf("Entering endless loop, attach debugger to PID %i \n", getpid());
|
|
|
+ fflush(stdout);
|
|
|
+ while (1) {
|
|
|
+ sleep(1);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -127,22 +98,25 @@ void debug_ipc_assert_equal(T value) {
|
|
|
|
|
|
if (other_value != value) {
|
|
|
std::cout << "[IPCDBG] Assertion failed!"
|
|
|
- << " Root has " << value << " but client has " << other_value << std::endl;
|
|
|
+ << " Root has " << value << " but client has " << other_value;
|
|
|
print_backtrace();
|
|
|
+ std::cout << "Entering endless loop, attach debugger to PID " << getpid();
|
|
|
+ fflush(stdout);
|
|
|
+ while (1) {
|
|
|
+ sleep(1);
|
|
|
+ }
|
|
|
} else {
|
|
|
#ifdef TRACE
|
|
|
std::cout << "[IPCDBG] Assertion passed, value = " << value << std::endl;
|
|
|
#endif
|
|
|
}
|
|
|
- handle_assertion(other_value == value);
|
|
|
} else {
|
|
|
size_t written = fwrite(&value, expected_size, 1, debug_ipc_file);
|
|
|
|
|
|
if (written != 1) {
|
|
|
printf("[IPCDBG] Could not write enough bytes. Error: %s\n", strerror(errno));
|
|
|
- exit(-1);
|
|
|
+ endless_loop();
|
|
|
}
|
|
|
- handle_assertion();
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -161,27 +135,24 @@ void debug_ipc_assert_equal_vector(std::vector<T> value) {
|
|
|
|
|
|
if (read != array_byte_length) {
|
|
|
printf("[IPCDBG] Could not read enough bytes. Error: %s\n", strerror(errno));
|
|
|
- exit(-1);
|
|
|
+ endless_loop();
|
|
|
}
|
|
|
|
|
|
assert(reinterpret_cast<uint64_t>(buffer.data()) % 8 == 0); // Make sure the array is properly aligned
|
|
|
T *local_array = value.data();
|
|
|
T *other_array = reinterpret_cast<T *>(buffer.data());
|
|
|
|
|
|
- bool items_equal = true;
|
|
|
for (size_t i = 0; i < value.size(); ++i) {
|
|
|
if (local_array[i] != other_array[i]) {
|
|
|
std::cout << "[IPCDBG] Assertion failed in vector at index " << i
|
|
|
<< ". Root has " << local_array[i] << " but client has " << other_array[i] << std::endl;
|
|
|
print_backtrace();
|
|
|
- items_equal = false;
|
|
|
- break;
|
|
|
+ endless_loop();
|
|
|
}
|
|
|
}
|
|
|
- handle_assertion(items_equal);
|
|
|
+
|
|
|
} else {
|
|
|
fwrite(value.data(), 1, array_byte_length, debug_ipc_file);
|
|
|
- handle_assertion();
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -225,28 +196,23 @@ void debug_ipc_assert_equal_array(void *value, size_t size) {
|
|
|
|
|
|
if (read != size) {
|
|
|
printf("[IPCDBG] Could not read enough bytes. Error: %s\n", strerror(errno));
|
|
|
- exit(-1);
|
|
|
+ endless_loop();
|
|
|
}
|
|
|
|
|
|
- bool items_equal = true;
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
|
if (array[i] != other_array[i]) {
|
|
|
printf("[IPCDBG] Assertion failed in byte %lu!\n", i);
|
|
|
print_backtrace();
|
|
|
- items_equal = false;
|
|
|
- break;
|
|
|
+ endless_loop();
|
|
|
}
|
|
|
}
|
|
|
- handle_assertion(items_equal);
|
|
|
} else {
|
|
|
size_t written = fwrite(value, 1, size, debug_ipc_file);
|
|
|
|
|
|
if (written != size) {
|
|
|
printf("[IPCDBG] Could not write enough bytes. Error: %s\n", strerror(errno));
|
|
|
- exit(-1);
|
|
|
+ endless_loop();
|
|
|
}
|
|
|
-
|
|
|
- handle_assertion();
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -318,5 +284,4 @@ void debug_ipc_assert_equal_mpi_double_array(double *array, size_t array_length)
|
|
|
if (rank == 0) {
|
|
|
debug_ipc_assert_equal_vector(float_buffer);
|
|
|
}
|
|
|
- MPI_Barrier(MPI_COMM_WORLD); // make sure other ranks do not carry on until the root has also left the assertion sub-routine
|
|
|
}
|