From 6af6de90eb8d43de5c2f947854b86cb707bed95d Mon Sep 17 00:00:00 2001 From: Ted Conbeer Date: Mon, 28 Nov 2022 17:55:44 +0000 Subject: [PATCH] fix #326: do not match unsupported ddl followed by open parens --- CHANGELOG.md | 1 + src/sqlfmt/rules/__init__.py | 5 +- .../unformatted/214_get_unique_attributes.sql | 54 +++++++++++++++++++ .../test_general_formatting.py | 1 + tests/unit_tests/test_rule.py | 1 + 5 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 tests/data/unformatted/214_get_unique_attributes.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 37bc774d..9fea527a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. - sqlfmt now supports `alter function` and `drop function` statements ([#310](https://github.com/tconbeer/sqlfmt/issues/310), [#311](https://github.com/tconbeer/sqlfmt/issues/311)), and Snowflake's `create external function` statements ([#322](https://github.com/tconbeer/sqlfmt/issues/322)). - sqlfmt better supports numeric constants (number literals), including those using scientific notation (e.g., `1.5e-9`) and the unary `+` or `-` operators (e.g., `+3`), and is now smarter about when the `-` symbol is the unary negative or binary subtraction operator. ([#321](https://github.com/tconbeer/sqlfmt/issues/321) - thank you [@liaopeiyuan](https://github.com/liaopeiyuan)!). - fixed a bug where we added extra whitespace to the end of empty comment lines ([#319](https://github.com/tconbeer/sqlfmt/issues/319) - thank you [@eherde](https://github.com/eherde)!). +- fixed an issue where wrapping unsupported DDL in jinja would cause a parsing error ([#326](https://github.com/tconbeer/sqlfmt/issues/326) - thank you [@ETG-msimons](https://github.com/ETG-msimons)!). - fixed a bug where we could have unsafely run *black* against jinja that contained Python keywords and their safe alternatives (e.g., `return(return_())`). ## [0.13.0] - 2022-11-01 diff --git a/src/sqlfmt/rules/__init__.py b/src/sqlfmt/rules/__init__.py index eda2b07b..505c7aa0 100644 --- a/src/sqlfmt/rules/__init__.py +++ b/src/sqlfmt/rules/__init__.py @@ -266,7 +266,7 @@ # spark: "insert overwrite" without the trailing "into" # redshift/pg: "insert into" only # bigquery: bare "insert" is okay - r"insert(\s+overwrite)?(\s+into)?(?!\()", + r"insert(\s+overwrite)?(\s+into)?", r"list", r"lock", r"merge", @@ -286,7 +286,8 @@ r"update", r"validate", ) - + rf"\b({SQL_COMMENT}|{SQL_QUOTED_EXP}|[^'`\"$;])*?" + + r"(?!\()" + + rf"\b({SQL_COMMENT}|{SQL_QUOTED_EXP}|[^'`\"$;w])*?" ) + rf"{NEWLINE}*" + group(r";", r"$"), diff --git a/tests/data/unformatted/214_get_unique_attributes.sql b/tests/data/unformatted/214_get_unique_attributes.sql new file mode 100644 index 00000000..22e4103e --- /dev/null +++ b/tests/data/unformatted/214_get_unique_attributes.sql @@ -0,0 +1,54 @@ +{# Source: https://github.com/tconbeer/sqlfmt/issues/326 #} +{% macro get_unique_attributes(source_table, node_col) %} + +{% set attribute_query %} + select + distinct x.key as attributes + from {{ source_table }} x + where startswith(x.key, '@') -- attributes get parsed as keys that start with '@' + and length(x.key) > 1 -- but keys of just '@' designate the node itself + and regexp_count(x.path, '\\[') > 1 -- we don't need the attributes from the xml root node + and not startswith(x.key, '@xmlns') -- we don't need attributed data about xml namespaces +{% endset %} + +{% set results = run_query(attribute_query) %} + +{% if execute %} + {% set results_list = results.columns[0].values() %} +{% else %} + {% set results_list = [] %} +{% endif %} + +{% for attribute in results_list %} + , get({{ node_col }}, '{{ attribute }}')::varchar(256) as attribute_{{ dbt_utils.slugify(attribute) | replace("@", "") }} +{% endfor %} + +{% endmacro %} +)))))__SQLFMT_OUTPUT__((((( +{# Source: https://github.com/tconbeer/sqlfmt/issues/326 #} +{% macro get_unique_attributes(source_table, node_col) %} + +{% set attribute_query %} + select + distinct x.key as attributes + from {{ source_table }} x + where startswith(x.key, '@') -- attributes get parsed as keys that start with '@' + and length(x.key) > 1 -- but keys of just '@' designate the node itself + and regexp_count(x.path, '\\[') > 1 -- we don't need the attributes from the xml root node + and not startswith(x.key, '@xmlns') -- we don't need attributed data about xml namespaces +{% endset %} + +{% set results = run_query(attribute_query) %} + +{% if execute %} {% set results_list = results.columns[0].values() %} +{% else %} {% set results_list = [] %} +{% endif %} + +{% for attribute in results_list %} +, +get({{ node_col }}, '{{ attribute }}')::varchar( + 256 +) as attribute_{{ dbt_utils.slugify(attribute) | replace("@", "") }} +{% endfor %} + +{% endmacro %} diff --git a/tests/functional_tests/test_general_formatting.py b/tests/functional_tests/test_general_formatting.py index bc31fc2a..5374f10c 100644 --- a/tests/functional_tests/test_general_formatting.py +++ b/tests/functional_tests/test_general_formatting.py @@ -53,6 +53,7 @@ "unformatted/211_http_2019_cdn_17_20.sql", "unformatted/212_http_2019_cms_14_02.sql", "unformatted/213_gitlab_fct_sales_funnel_target.sql", + "unformatted/214_get_unique_attributes.sql", "unformatted/300_jinjafmt.sql", "unformatted/400_create_fn_and_select.sql", "unformatted/401_explain_select.sql", diff --git a/tests/unit_tests/test_rule.py b/tests/unit_tests/test_rule.py index ba1f34d8..5b2811b7 100644 --- a/tests/unit_tests/test_rule.py +++ b/tests/unit_tests/test_rule.py @@ -351,6 +351,7 @@ def test_regex_exact_match( (MAIN, "unterm_keyword", "MAINion"), (MAIN, "unterm_keyword", "delete"), (MAIN, "unsupported_ddl", "insert('abc', 1, 2, 'Z')"), + (MAIN, "unsupported_ddl", "get(foo, 'bar')"), (JINJA, "jinja_set_block_start", "{% set foo = 'baz' %}"), (GRANT, "unterm_keyword", "select"), (FUNCTION, "unterm_keyword", "secure"),