6 Commits 667724addb ... bb1f51ff64

Author SHA1 Message Date
  Christoph Stelz bb1f51ff64 Enter endless loop on I/O errors 3 months ago
  Christoph Stelz 667724addb Remove all flushes 3 months ago
  Christoph Stelz 031b48f604 Add flush to all writes 3 months ago
  Christoph Stelz 71a1679d57 remove fflush 3 months ago
  Christoph Stelz e0a6756b0b Put MPI_Barrier inside assertion routine 3 months ago
  Christoph Stelz f58dc52e39 Add response file, make client stop when assertion fails 3 months ago
1 changed files with 18 additions and 53 deletions
  1. 18 53
      src/ipc_debug.cpp

+ 18 - 53
src/ipc_debug.cpp

@@ -19,7 +19,6 @@
 
 bool debug_ipc_is_root = false;
 FILE *debug_ipc_file = nullptr;
-FILE *debug_ipc_response_file = nullptr;
 
 int rank, cluster_size;
 std::map<uint64_t, int> start_indices_map; // maps indices to MPI ranks
@@ -44,7 +43,6 @@ void debug_ipc_init() {
     if (rank == 0) {
         const char *root_env = std::getenv("IPC_DEBUG_ROOT");
         const char *file_env = std::getenv("IPC_DEBUG_FILE");
-        const char *response_file_env = std::getenv("IPC_DEBUG_RESPONSE_FILE");
         
         if (file_env == nullptr) {
             return;
@@ -68,44 +66,17 @@ void debug_ipc_init() {
             exit(-1);
         }
 
-        // Open response file for the back channel
-        if (response_file_env != nullptr) {
-            if (debug_ipc_is_root) {
-                debug_ipc_response_file = fopen(response_file_env, "w");
-            } else {
-                debug_ipc_response_file = fopen(response_file_env, "r");
-            }
-
-            if (debug_ipc_file == nullptr) {
-                printf("[IPCDBG] Error, could not open named pipe for responses: %s", strerror(errno));
-                exit(-1);
-            }
-
-        }
 
         buffer.resize(INITIAL_BUFFER_SIZE);
     }
 
 }
 
-/** Communicate assertion result to client and possibly hang if assertion failed
- */
-inline void handle_assertion(bool assertion_status = true) {
-    if (!debug_ipc_response_file)
-        return;
-
-    char res = assertion_status ? 1 : 0;
-    if (debug_ipc_is_root) {
-        fwrite(&res, 1, 1, debug_ipc_response_file);
-    } else {
-        fread(&res, 1, 1, debug_ipc_response_file);
-    }
-
-    if (res == 0) {
-        std::cout << "[IPCDBG] Entering endless loop, attach debugger to PID " << getpid();
-        while (1) {
-            sleep(1);
-        }
+void endless_loop() {
+    printf("Entering endless loop, attach debugger to PID %i \n", getpid());
+    fflush(stdout);
+    while (1) {
+        sleep(1);
     }
 }
 
@@ -127,22 +98,25 @@ void debug_ipc_assert_equal(T value) {
 
         if (other_value != value) {
             std::cout << "[IPCDBG] Assertion failed!"
-                      << " Root has " << value << " but client has " << other_value << std::endl;
+                      << " Root has " << value << " but client has " << other_value;
             print_backtrace();
+            std::cout << "Entering endless loop, attach debugger to PID " << getpid();
+            fflush(stdout);
+            while (1) {
+                sleep(1);
+            }
         } else {
 #ifdef TRACE
             std::cout << "[IPCDBG] Assertion passed, value = " << value << std::endl;
 #endif
         }
-        handle_assertion(other_value == value);
     } else {
         size_t written = fwrite(&value, expected_size, 1, debug_ipc_file);
 
         if (written != 1) {
             printf("[IPCDBG] Could not write enough bytes. Error: %s\n", strerror(errno));
-            exit(-1);
+            endless_loop();
         }
-        handle_assertion();
     }
 }
 
@@ -161,27 +135,24 @@ void debug_ipc_assert_equal_vector(std::vector<T> value) {
 
         if (read != array_byte_length) {
             printf("[IPCDBG] Could not read enough bytes. Error: %s\n", strerror(errno));
-            exit(-1);
+            endless_loop();
         }
 
         assert(reinterpret_cast<uint64_t>(buffer.data()) % 8 == 0); // Make sure the array is properly aligned
         T *local_array = value.data();
         T *other_array = reinterpret_cast<T *>(buffer.data());
 
-        bool items_equal = true;
         for (size_t i = 0; i < value.size(); ++i) {
             if (local_array[i] != other_array[i]) {
                 std::cout << "[IPCDBG] Assertion failed in vector  at index " << i 
                     << ". Root has " << local_array[i] << " but client has " << other_array[i] << std::endl;
                 print_backtrace();
-                items_equal = false;
-                break;
+                endless_loop();
             }
         }
-        handle_assertion(items_equal);
+
     } else {
         fwrite(value.data(), 1, array_byte_length, debug_ipc_file);
-        handle_assertion();
     }
 }
 
@@ -225,28 +196,23 @@ void debug_ipc_assert_equal_array(void *value, size_t size) {
 
         if (read != size) {
             printf("[IPCDBG] Could not read enough bytes. Error: %s\n", strerror(errno));
-            exit(-1);
+            endless_loop();
         }
 
-        bool items_equal = true;
         for (size_t i = 0; i < size; i++) {
             if (array[i] != other_array[i]) {
                 printf("[IPCDBG] Assertion failed in byte %lu!\n", i);
                 print_backtrace();
-                items_equal = false;
-                break;
+                endless_loop();
             }
         }
-        handle_assertion(items_equal);
     } else {
         size_t written = fwrite(value, 1, size, debug_ipc_file);
 
         if (written != size) {
             printf("[IPCDBG] Could not write enough bytes. Error: %s\n", strerror(errno));
-            exit(-1);
+            endless_loop();
         }
-
-        handle_assertion();
     }
 }
 
@@ -318,5 +284,4 @@ void debug_ipc_assert_equal_mpi_double_array(double *array, size_t array_length)
     if (rank == 0) {
         debug_ipc_assert_equal_vector(float_buffer);
     }
-    MPI_Barrier(MPI_COMM_WORLD); // make sure other ranks do not carry on until the root has also left the assertion sub-routine
 }