From 6af6de90eb8d43de5c2f947854b86cb707bed95d Mon Sep 17 00:00:00 2001
From: Ted Conbeer <tconbeer@users.noreply.github.com>
Date: Mon, 28 Nov 2022 17:55:44 +0000
Subject: [PATCH] fix #326: do not match unsupported ddl followed by open
 parens

---
 CHANGELOG.md                                  |  1 +
 src/sqlfmt/rules/__init__.py                  |  5 +-
 .../unformatted/214_get_unique_attributes.sql | 54 +++++++++++++++++++
 .../test_general_formatting.py                |  1 +
 tests/unit_tests/test_rule.py                 |  1 +
 5 files changed, 60 insertions(+), 2 deletions(-)
 create mode 100644 tests/data/unformatted/214_get_unique_attributes.sql

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37bc774d..9fea527a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file.
 -   sqlfmt now supports `alter function` and `drop function` statements ([#310](https://github.com/tconbeer/sqlfmt/issues/310), [#311](https://github.com/tconbeer/sqlfmt/issues/311)), and Snowflake's `create external function` statements ([#322](https://github.com/tconbeer/sqlfmt/issues/322)).
 -   sqlfmt better supports numeric constants (number literals), including those using scientific notation (e.g., `1.5e-9`) and the unary `+` or `-` operators (e.g., `+3`), and is now smarter about when the `-` symbol is the unary negative or binary subtraction operator. ([#321](https://github.com/tconbeer/sqlfmt/issues/321) - thank you [@liaopeiyuan](https://github.com/liaopeiyuan)!).
 -   fixed a bug where we added extra whitespace to the end of empty comment lines ([#319](https://github.com/tconbeer/sqlfmt/issues/319) - thank you [@eherde](https://github.com/eherde)!).
+-   fixed an issue where wrapping unsupported DDL in jinja would cause a parsing error ([#326](https://github.com/tconbeer/sqlfmt/issues/326) - thank you [@ETG-msimons](https://github.com/ETG-msimons)!).
 -   fixed a bug where we could have unsafely run *black* against jinja that contained Python keywords and their safe alternatives (e.g., `return(return_())`).
 
 ## [0.13.0] - 2022-11-01
diff --git a/src/sqlfmt/rules/__init__.py b/src/sqlfmt/rules/__init__.py
index eda2b07b..505c7aa0 100644
--- a/src/sqlfmt/rules/__init__.py
+++ b/src/sqlfmt/rules/__init__.py
@@ -266,7 +266,7 @@
                 # spark: "insert overwrite" without the trailing "into"
                 # redshift/pg: "insert into" only
                 # bigquery: bare "insert" is okay
-                r"insert(\s+overwrite)?(\s+into)?(?!\()",
+                r"insert(\s+overwrite)?(\s+into)?",
                 r"list",
                 r"lock",
                 r"merge",
@@ -286,7 +286,8 @@
                 r"update",
                 r"validate",
             )
-            + rf"\b({SQL_COMMENT}|{SQL_QUOTED_EXP}|[^'`\"$;])*?"
+            + r"(?!\()"
+            + rf"\b({SQL_COMMENT}|{SQL_QUOTED_EXP}|[^'`\"$;w])*?"
         )
         + rf"{NEWLINE}*"
         + group(r";", r"$"),
diff --git a/tests/data/unformatted/214_get_unique_attributes.sql b/tests/data/unformatted/214_get_unique_attributes.sql
new file mode 100644
index 00000000..22e4103e
--- /dev/null
+++ b/tests/data/unformatted/214_get_unique_attributes.sql
@@ -0,0 +1,54 @@
+{# Source: https://github.com/tconbeer/sqlfmt/issues/326 #}
+{% macro get_unique_attributes(source_table, node_col) %}
+
+{% set attribute_query %} 
+    select
+        distinct x.key as attributes
+    from {{ source_table }} x
+    where startswith(x.key, '@')  -- attributes get parsed as keys that start with '@'
+        and length(x.key) > 1 -- but keys of just '@' designate the node itself
+        and regexp_count(x.path, '\\[') > 1 -- we don't need the attributes from the xml root node
+        and not startswith(x.key, '@xmlns') -- we don't need attributed data about xml namespaces
+{% endset %}
+
+{% set results = run_query(attribute_query) %}
+
+{% if execute %}
+    {% set results_list = results.columns[0].values() %}
+{% else %}
+    {% set results_list = [] %}
+{% endif %}
+
+{% for attribute in results_list %}
+    , get({{ node_col }}, '{{ attribute }}')::varchar(256) as attribute_{{ dbt_utils.slugify(attribute) | replace("@", "") }}
+{% endfor %}
+
+{% endmacro %}
+)))))__SQLFMT_OUTPUT__(((((
+{# Source: https://github.com/tconbeer/sqlfmt/issues/326 #}
+{% macro get_unique_attributes(source_table, node_col) %}
+
+{% set attribute_query %} 
+    select
+        distinct x.key as attributes
+    from {{ source_table }} x
+    where startswith(x.key, '@')  -- attributes get parsed as keys that start with '@'
+        and length(x.key) > 1 -- but keys of just '@' designate the node itself
+        and regexp_count(x.path, '\\[') > 1 -- we don't need the attributes from the xml root node
+        and not startswith(x.key, '@xmlns') -- we don't need attributed data about xml namespaces
+{% endset %}
+
+{% set results = run_query(attribute_query) %}
+
+{% if execute %} {% set results_list = results.columns[0].values() %}
+{% else %} {% set results_list = [] %}
+{% endif %}
+
+{% for attribute in results_list %}
+,
+get({{ node_col }}, '{{ attribute }}')::varchar(
+    256
+) as attribute_{{ dbt_utils.slugify(attribute) | replace("@", "") }}
+{% endfor %}
+
+{% endmacro %}
diff --git a/tests/functional_tests/test_general_formatting.py b/tests/functional_tests/test_general_formatting.py
index bc31fc2a..5374f10c 100644
--- a/tests/functional_tests/test_general_formatting.py
+++ b/tests/functional_tests/test_general_formatting.py
@@ -53,6 +53,7 @@
         "unformatted/211_http_2019_cdn_17_20.sql",
         "unformatted/212_http_2019_cms_14_02.sql",
         "unformatted/213_gitlab_fct_sales_funnel_target.sql",
+        "unformatted/214_get_unique_attributes.sql",
         "unformatted/300_jinjafmt.sql",
         "unformatted/400_create_fn_and_select.sql",
         "unformatted/401_explain_select.sql",
diff --git a/tests/unit_tests/test_rule.py b/tests/unit_tests/test_rule.py
index ba1f34d8..5b2811b7 100644
--- a/tests/unit_tests/test_rule.py
+++ b/tests/unit_tests/test_rule.py
@@ -351,6 +351,7 @@ def test_regex_exact_match(
         (MAIN, "unterm_keyword", "MAINion"),
         (MAIN, "unterm_keyword", "delete"),
         (MAIN, "unsupported_ddl", "insert('abc', 1, 2, 'Z')"),
+        (MAIN, "unsupported_ddl", "get(foo, 'bar')"),
         (JINJA, "jinja_set_block_start", "{% set foo = 'baz' %}"),
         (GRANT, "unterm_keyword", "select"),
         (FUNCTION, "unterm_keyword", "secure"),