Merge branch 'main' into AIK-3829

AikidoSec · Nov 14, 2024 · 685b390 · 685b390
2 parents e34a1f7 + b13ccc0
commit 685b390
Show file tree

Hide file tree

Showing 41 changed files with 728 additions and 122 deletions.
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -65,11 +65,8 @@ jobs:
           docker compose -f docker-compose.yml -f docker-compose.benchmark.yml up --build -d
       - name: Install wrk
         run: |
-          sudo apt-get install build-essential libssl-dev git -y
-          git clone https://github.com/wg/wrk.git wrk
-          cd wrk
-          sudo make
-          sudo cp wrk /usr/local/bin
+          sudo apt-get update
+          sudo apt-get install -y wrk
       - name: Set up Python 3.9
         uses: actions/setup-python@v2
         with:

diff --git a/README.md b/README.md
@@ -18,7 +18,7 @@ Zen protects your Python apps by preventing user input containing dangerous stri
 Zen will autonomously protect your Python applications from the inside against:
 
 * 🛡️ [NoSQL injection attacks](https://www.aikido.dev/blog/web-application-security-vulnerabilities)
-* 🛡️ [SQL injection attacks]([https://www.aikido.dev/blog/web-application-security-vulnerabilities](https://owasp.org/www-community/attacks/SQL_Injection))
+* 🛡️ [SQL injection attacks](https://www.aikido.dev/blog/the-state-of-sql-injections)
 * 🛡️ [Command injection attacks](https://owasp.org/www-community/attacks/Command_Injection)
 * 🛡️ [Path traversal attacks](https://owasp.org/www-community/attacks/Path_Traversal)
 * 🛡️ [Server-side request forgery (SSRF)](./docs/ssrf.md)

diff --git a/aikido_zen/api_discovery/README.md b/aikido_zen/api_discovery/README.md
@@ -1,3 +1,2 @@
 # Feature flag
-
-This feature is currently disabled by default. Enable it by setting the environment variable `AIKIDO_FEATURE_COLLECT_API_SCHEMA` to `true`.
+This feature is now on by default.
diff --git a/aikido_zen/api_discovery/get_api_info.py b/aikido_zen/api_discovery/get_api_info.py
@@ -10,9 +10,6 @@
 def get_api_info(context):
     """Generates an apispec based on the context passed along"""
     try:
-        # Check if feature flag COLLECT_API_SCHEMA is enabled
-        if not is_feature_enabled("COLLECT_API_SCHEMA"):
-            return {}
         body_info = get_body_info(context)
         query_info = get_query_info(context)
         auth_info = get_auth_types(context)

diff --git a/aikido_zen/background_process/cloud_connection_manager/on_detected_attack.py b/aikido_zen/background_process/cloud_connection_manager/on_detected_attack.py
@@ -5,6 +5,7 @@
 from aikido_zen.helpers.logging import logger
 from aikido_zen.helpers.limit_length_metadata import limit_length_metadata
 from aikido_zen.helpers.get_ua_from_context import get_ua_from_context
+from aikido_zen.helpers.serialize_to_json import serialize_to_json
 
 
 def on_detected_attack(connection_manager, attack, context, blocked, stack):
@@ -37,7 +38,7 @@ def on_detected_attack(connection_manager, attack, context, blocked, stack):
                 "route": context.route,
             },
         }
-        logger.debug(json.dumps(payload))
+        logger.debug(serialize_to_json(payload))
         result = connection_manager.api.report(
             connection_manager.token,
             payload,

diff --git a/aikido_zen/config.py b/aikido_zen/config.py
@@ -1,3 +1,3 @@
 """Contains package versions"""
 
-PKG_VERSION = "1.0.12"
+PKG_VERSION = "1.0.13"
diff --git a/aikido_zen/context/__init__.py b/aikido_zen/context/__init__.py
@@ -10,8 +10,9 @@
 from aikido_zen.helpers.logging import logger
 from .wsgi import set_wsgi_attributes_on_context
 from .asgi import set_asgi_attributes_on_context
+from .extract_route_params import extract_route_params
 
-UINPUT_SOURCES = ["body", "cookies", "query", "headers", "xml"]
+UINPUT_SOURCES = ["body", "cookies", "query", "headers", "xml", "route_params"]
 current_context = contextvars.ContextVar("current_context", default=None)
 
 WSGI_SOURCES = ["django", "flask"]
@@ -56,6 +57,7 @@ def __init__(self, context_obj=None, body=None, req=None, source=None):
 
         # Define variables using parsed request :
         self.route = build_route_from_url(self.url)
+        self.route_params = extract_route_params(self.url)
         self.subdomains = get_subdomains_from_url(self.url)
 
         self.executed_middleware = False
@@ -79,6 +81,7 @@ def __reduce__(self):
                     "xml": self.xml,
                     "outgoing_req_redirects": self.outgoing_req_redirects,
                     "executed_middleware": self.executed_middleware,
+                    "route_params": self.route_params,
                 },
                 None,
                 None,

diff --git a/aikido_zen/context/extract_route_params.py b/aikido_zen/context/extract_route_params.py
@@ -0,0 +1,31 @@
+"""Exports extract_route_params function"""
+
+from urllib.parse import quote, unquote
+from aikido_zen.helpers.try_parse_url_path import try_parse_url_path
+from aikido_zen.helpers.build_route_from_url import replace_url_segment_with_param
+
+
+def extract_route_params(url):
+    """Will try and build an array of user input based on the url"""
+    results = []
+    try:
+        path = try_parse_url_path(url)
+        segments = path.split("/")
+        for segment in segments:
+            segment = unquote(segment)
+            if segment.isalnum():
+                continue  # Ignore alphanumerical parts of the url
+
+            if segment is not quote(segment):
+                results.append(segment)  # This is not a standard piece of the URL
+            elif replace_url_segment_with_param(segment) is not segment:
+                results.append(segment)  # Might be a secret, a hash, ...
+
+        if len(results) > 0 or "." in unquote(path):
+            # There are already phishy parts of the url OR
+            # urldecoded path contains dots, which is uncommon and could point to path traversal.
+            results.append(path[1:])  # Add path after slash as user input
+
+    except Exception:
+        pass
+    return results
diff --git a/aikido_zen/context/extract_route_params_test.py b/aikido_zen/context/extract_route_params_test.py
@@ -0,0 +1,112 @@
+import pytest
+from .extract_route_params import extract_route_params
+
+
+def test_with_urlencoded_urls():
+    url1 = "http://localhost:8080/app/shell/ls%20-la"
+    assert extract_route_params(url1) == ["ls -la", "app/shell/ls%20-la"]
+
+    url2 = "http://localhost:8080/app/shell/ls -la"
+    assert extract_route_params(url2) == ["ls -la", "app/shell/ls -la"]
+
+
+def test_uses_keys():
+    url = "http://localhost:8080/app/shell/[email protected]/017shell/127.0.0.1/"
+    assert extract_route_params(url) == [
+        "[email protected]",
+        "127.0.0.1",
+        "app/shell/[email protected]/017shell/127.0.0.1/",
+    ]
+
+
+def test_normal_urls():
+    assert extract_route_params("http://localhost:8080/a/b/abc2393027def/def") == []
+
+
+def test_with_empty_route():
+    url1 = "http://localhost:8080"
+    assert extract_route_params(url1) == []
+
+    url2 = "http://localhost:8080"
+    assert extract_route_params(url2) == []
+
+
+def test_special_characters():
+    url1 = "http://localhost:8080/app/shell/!@#$%^&*()"  # Everything past hashtag is not url anymore
+    assert extract_route_params(url1) == ["!@", "app/shell/!@"]
+
+    url2 = "http://localhost:8080/app/shell/space test"
+    assert extract_route_params(url2) == ["space test", "app/shell/space test"]
+
+    url3 = "http://localhost:8080/app/shell/hello%20world"
+    assert extract_route_params(url3) == ["hello world", "app/shell/hello%20world"]
+
+
+def test_numeric_segments():
+    # Alphanum is ignored:
+    url1 = "http://localhost:8080/app/shell/12345"
+    assert extract_route_params(url1) == []
+
+    url2 = "http://localhost:8080/app/shell/67890/abc"
+    assert extract_route_params(url2) == []
+
+
+def test_mixed_segments():
+    url1 = "http://localhost:8080/app/shell/abc123/!@#"
+    assert extract_route_params(url1) == ["!@", "app/shell/abc123/!@"]
+
+    url2 = "http://localhost:8080/app/shell/abc/123/!@#"
+    assert extract_route_params(url2) == ["!@", "app/shell/abc/123/!@"]
+
+
+def test_encoded_and_unencoded():
+    url1 = "http://localhost:8080/app/shell/%E2%9C%93"
+    assert extract_route_params(url1) == ["✓", "app/shell/%E2%9C%93"]
+
+    url2 = "http://localhost:8080/app/shell/%E2%9C%93/normal"
+    assert extract_route_params(url2) == ["✓", "app/shell/%E2%9C%93/normal"]
+
+
+def test_no_params():
+    url1 = "http://localhost:8080/app/shell/"
+    assert extract_route_params(url1) == []
+
+    url2 = "http://localhost:8080/app/"
+    assert extract_route_params(url2) == []
+
+
+def test_edge_cases():
+    url1 = "http://localhost:8080/app/shell/.."
+    assert extract_route_params(url1) == ["..", "app/shell/.."]
+
+    url2 = "http://localhost:8080/app/shell/./"
+    assert extract_route_params(url2) == ["app/shell/./"]
+
+
+def test_long_urls():
+    url1 = "http://localhost:8080/app./shell/" + "a" * 1000
+    assert extract_route_params(url1) == ["app.", "app./shell/" + "a" * 1000]
+
+    url2 = "http://localhost:8080/app./shell/" + "b" * 1000 + "/c" * 1000
+    assert extract_route_params(url2) == [
+        "app.",
+        "app./shell/" + "b" * 1000 + "/c" * 1000,
+    ]
+
+
+def test_query_parameters():
+    # Test query parameters are ignored:
+    url1 = "http://localhost:8080/app/./shell/?param=value"
+    assert extract_route_params(url1) == ["app/./shell/"]
+
+    url2 = "http://localhost:8080/app/./shell/?key1=value1&key2=value2"
+    assert extract_route_params(url2) == ["app/./shell/"]
+
+
+def test_fragment_identifiers():
+    # Fragments should be ignored:
+    url1 = "http://localhost:8080/app/./shell/#section1"
+    assert extract_route_params(url1) == ["app/./shell/"]
+
+    url2 = "http://localhost:8080/app/shell/#/path/to/resource"
+    assert extract_route_params(url2) == []
diff --git a/aikido_zen/context/init_test.py b/aikido_zen/context/init_test.py
@@ -1,7 +1,15 @@
 import pytest
 import pickle
 import json
-from aikido_zen.context import Context, get_current_context
+from aikido_zen.context import Context, get_current_context, current_context
+
+
+@pytest.fixture(autouse=True)
+def run_around_tests():
+    yield
+    # Make sure to reset context after every test so it does not
+    # interfere with other tests
+    current_context.set(None)
 
 
 def test_get_current_context_no_context():
@@ -46,6 +54,7 @@ def test_wsgi_context_1():
         "xml": {},
         "outgoing_req_redirects": [],
         "executed_middleware": False,
+        "route_params": [],
     }
 
 
@@ -86,6 +95,7 @@ def test_wsgi_context_2():
         "xml": {},
         "outgoing_req_redirects": [],
         "executed_middleware": False,
+        "route_params": [],
     }
 
 

diff --git a/aikido_zen/helpers/extract_data_from_xml_body.py b/aikido_zen/helpers/extract_data_from_xml_body.py
@@ -1,18 +1,26 @@
 """Exports extract_data_from_xml_body helper function"""
 
 import aikido_zen.context as ctx
+from aikido_zen.helpers.logging import logger
 
 
 def extract_data_from_xml_body(user_input, root_element):
     """Extracts all attributes from the xml and adds them to context"""
-    context = ctx.get_current_context()
-    if not isinstance(context.body, str) or user_input != context.body:
-        return
+    try:
+        context = ctx.get_current_context()
+        if (
+            not context
+            or not isinstance(context.body, str)
+            or user_input != context.body
+        ):
+            return
 
-    extracted_xml_attrs = context.xml
-    for el in root_element:
-        for k, v in el.items():
-            if not extracted_xml_attrs.get(k):
-                extracted_xml_attrs[k] = set()
-            extracted_xml_attrs[k].add(v)
-    context.set_as_current_context()
+        extracted_xml_attrs = context.xml
+        for el in root_element:
+            for k, v in el.items():
+                if not extracted_xml_attrs.get(k):
+                    extracted_xml_attrs[k] = set()
+                extracted_xml_attrs[k].add(v)
+        context.set_as_current_context()
+    except Exception as e:
+        logger.debug("Exception occured when extracting XML: %s", e)
diff --git a/aikido_zen/helpers/extract_data_from_xml_body_test.py b/aikido_zen/helpers/extract_data_from_xml_body_test.py
@@ -14,6 +14,17 @@ def mock_context():
     return mock_ctx
 
 
+def test_does_not_crash_when_context_none(mock_context):
+    with patch("aikido_zen.context.get_current_context", return_value=None):
+        user_input = "valid_input"
+        root_element = [
+            {"attr1": "value1", "attr2": "value2"},
+            {"attr1": "value3", "attr3": "value4"},
+        ]
+
+        extract_data_from_xml_body(user_input, root_element)
+
+
 def test_extract_data_from_xml_body_valid_input(mock_context):
     with patch("aikido_zen.context.get_current_context", return_value=mock_context):
         user_input = "valid_input"

diff --git a/aikido_zen/helpers/get_argument.py b/aikido_zen/helpers/get_argument.py
@@ -0,0 +1,10 @@
+"""Exports get_argument"""
+
+
+def get_argument(args, kwargs, pos, name):
+    """Checks kwargs and args for your argument"""
+    if name in kwargs:
+        return kwargs.get(name)
+    if args and len(args) > pos:
+        return args[pos]
+    return None
diff --git a/aikido_zen/helpers/get_argument_test.py b/aikido_zen/helpers/get_argument_test.py
@@ -0,0 +1,62 @@
+import pytest
+from .get_argument import get_argument
+
+
+def test_get_argument_with_only_kwargs():
+    """Test when only kwargs are provided."""
+    result = get_argument((), {"arg1": "value1"}, 0, "arg1")
+    assert result == "value1", f"Expected 'value1', got {result}"
+
+
+def test_get_argument_with_only_args():
+    """Test when only args are provided."""
+    result = get_argument(("value2",), {}, 0, "arg1")
+    assert result == "value2", f"Expected 'value2', got {result}"
+
+
+def test_get_argument_with_args_and_kwargs():
+    """Test when both args and kwargs are provided, with priority to kwargs."""
+    result = get_argument(("value2",), {"arg1": "value1"}, 0, "arg1")
+    assert result == "value1", f"Expected 'value1', got {result}"
+
+
+def test_get_argument_with_positional_index():
+    """Test when args are provided and a specific position is requested."""
+    result = get_argument(("value2", "value3"), {}, 1, "arg1")
+    assert result == "value3", f"Expected 'value3', got {result}"
+
+
+def test_get_argument_with_positional_index_out_of_bounds():
+    """Test when the positional index is out of bounds."""
+    result = get_argument(("value2",), {}, 1, "arg1")
+    assert result is None, f"Expected None, got {result}"
+
+
+def test_get_argument_with_none_in_kwargs():
+    """Test when the argument in kwargs is None."""
+    result = get_argument((), {"arg1": None}, 0, "arg1")
+    assert result is None, f"Expected None, got {result}"
+
+
+def test_get_argument_with_none_in_args():
+    """Test when the argument in args is None."""
+    result = get_argument((None,), {}, 0, "arg1")
+    assert result is None, f"Expected None, got {result}"
+
+
+def test_get_argument_with_empty_args_and_kwargs():
+    """Test when both args and kwargs are empty."""
+    result = get_argument((), {}, 0, "arg1")
+    assert result is None, f"Expected None, got {result}"
+
+
+def test_get_argument_with_multiple_kwargs():
+    """Test when multiple kwargs are provided."""
+    result = get_argument((), {"arg1": "value1", "arg2": "value2"}, 0, "arg1")
+    assert result == "value1", f"Expected 'value1', got {result}"
+
+
+def test_get_argument_with_positional_index_and_kwargs():
+    """Test when both args and kwargs are provided, with positional index."""
+    result = get_argument(("value2", "value3"), {"arg1": "value1"}, 0, "arg1")
+    assert result == "value1", f"Expected 'value1', got {result}"