openai · Fazziekey · Aug 21, 2023
diff --git a/human_eval/execution.py b/human_eval/execution.py
@@ -9,42 +9,31 @@
 import signal
 import tempfile
 
-
-def check_correctness(problem: Dict, completion: str, timeout: float,
-                      completion_id: Optional[int] = None) -> Dict:
-    """
-    Evaluates the functional correctness of a completion by running the test
-    suite provided in the problem. 
-
-    :param completion_id: an optional completion ID so we can match
-        the results later even if execution finishes asynchronously.
-    """
-
-    def unsafe_execute():
-
-        with create_tempdir():
-
-            # These system calls are needed when cleaning up tempdir.
-            import os
-            import shutil
-            rmtree = shutil.rmtree
-            rmdir = os.rmdir
-            chdir = os.chdir
-
-            # Disable functionalities that can make destructive changes to the test.
-            reliability_guard()
-
-            # Construct the check program and run it.
-            check_program = (
-                problem["prompt"] + completion + "\n" +
-                problem["test"] + "\n" +
-                f"check({problem['entry_point']})"
-            )
-
-            try:
-                exec_globals = {}
-                with swallow_io():
-                    with time_limit(timeout):
+def unsafe_execute(problem, completion, result, timeout):
+
+    with create_tempdir():
+
+        # These system calls are needed when cleaning up tempdir.
+        import os
+        import shutil
+        rmtree = shutil.rmtree
+        rmdir = os.rmdir
+        chdir = os.chdir
+
+        # Disable functionalities that can make destructive changes to the test.
+        reliability_guard()
+
+        # Construct the check program and run it.
+        check_program = (
+            problem["prompt"] + completion + "\n" +
+            problem["test"] + "\n" +
+            f"check({problem['entry_point']})"
+        )
+
+        try:
+            exec_globals = {}
+            with swallow_io():
+                with time_limit(timeout):
 # WARNING
 # This program exists to execute untrusted model-generated code. Although
 # it is highly unlikely that model-generated code will do something overtly
@@ -55,22 +44,42 @@ def unsafe_execute():
 # information on how OpenAI sandboxes its code, see the accompanying paper.
 # Once you have read this disclaimer and taken appropriate precautions, 
 # uncomment the following line and proceed at your own risk:
-#                         exec(check_program, exec_globals)
-                result.append("passed")
-            except TimeoutException:
-                result.append("timed out")
-            except BaseException as e:
-                result.append(f"failed: {e}")
-
-            # Needed for cleaning up.
-            shutil.rmtree = rmtree
-            os.rmdir = rmdir
-            os.chdir = chdir
+                    exec(check_program, exec_globals)
+            result.append("passed")
+        except TimeoutException:
+            result.append("timed out")
+        except BaseException as e:
+            result.append(f"failed: {e}")
+
+        # Needed for cleaning up.
+        shutil.rmtree = rmtree
+        os.rmdir = rmdir
+        os.chdir = chdir
+
+
+def check_correctness(problem: Dict, completion: str, timeout: float,
+                      completion_id: Optional[int] = None) -> Dict:
+    """
+    Evaluates the functional correctness of a completion by running the test
+    suite provided in the problem. 
+
+    :param completion_id: an optional completion ID so we can match
+        the results later even if execution finishes asynchronously.
+    """
 
     manager = multiprocessing.Manager()
     result = manager.list()
 
-    p = multiprocessing.Process(target=unsafe_execute)
+    # p = multiprocessing.Process(target=unsafe_execute)
+    p = multiprocessing.Process(
+        target=unsafe_execute,
+        args=(
+            problem, 
+            completion, 
+            result, 
+            timeout
+        ),
+    )
     p.start()
     p.join(timeout=timeout + 1)
     if p.is_alive():