Skip to content

Commit

Permalink
Merge pull request #831 from matouma/pending-version-change/0.2.3
Browse files Browse the repository at this point in the history
Pending version change/0.2.3
  • Loading branch information
touma-I authored Nov 26, 2024
2 parents 5a018e6 + 0587637 commit a10ef0a
Show file tree
Hide file tree
Showing 116 changed files with 219 additions and 182 deletions.
12 changes: 10 additions & 2 deletions .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2
DPK_MICRO_VERSION=2
# The suffix is generally always set in the main/development branch and only nulled out when creating release branches.
# It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi.
DPK_VERSION_SUFFIX=.dev2
DPK_VERSION_SUFFIX=

DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX)

Expand All @@ -39,7 +39,7 @@ DPK_LIB_KFP_SHARED=$(DPK_VERSION)
KFP_DOCKER_VERSION=$(DOCKER_IMAGE_VERSION)
KFP_DOCKER_VERSION_v2=$(DOCKER_IMAGE_VERSION)

DPK_CONNECTOR_VERSION=0.2.3.dev0
DPK_CONNECTOR_VERSION=0.2.3

################## ################## ################## ################## ################## ##################
# Begin versions that the repo depends on.
Expand All @@ -59,3 +59,11 @@ else
WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support
endif

################################################################################
# This defines the transforms' package version number as would be used
# when publishing the wheel. In general, only the micro version
# number should be advanced relative to the DPK_VERSION.
#
# If you change the versions numbers, be sure to run "make set-versions" to
# update version numbers across the transform (e.g., pyproject.toml).
TRANSFORMS_PKG_VERSION=0.2.2
2 changes: 1 addition & 1 deletion data-connector-lib/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_connector"
version = "0.2.3.dev1"
version = "0.2.3"
requires-python = ">=3.10,<3.13"
keywords = [
"data",
Expand Down
2 changes: 1 addition & 1 deletion data-processing-lib/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit"
version = "0.2.2.dev2"
version = "0.2.2"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10,<3.13"
description = "Data Preparation Toolkit Library for Ray and Python"
Expand Down
4 changes: 2 additions & 2 deletions data-processing-lib/spark/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_spark"
version = "0.2.2.dev2"
version = "0.2.2"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10,<3.13"
description = "Data Preparation Toolkit Library for Spark"
Expand All @@ -11,7 +11,7 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit==0.2.2.dev2",
"data-prep-toolkit==0.2.2",
"pyspark>=3.5.2",
"psutil>=6.0.0",
"PyYAML>=6.0.2"
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/createRayClusterComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/deleteRayClusterComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/executeRayJobComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ inputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
2 changes: 1 addition & 1 deletion kfp/kfp_ray_components/executeSubWorkflowComponent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ outputs:

implementation:
container:
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
image: "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"
# command is a list of strings (command-line arguments).
# The YAML language has two syntaxes for lists, and you can use either of them.
# Here we use the "flow syntax" - comma-separated strings inside square brackets.
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v1"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -13,7 +13,7 @@ authors = [
]
dependencies = [
"kfp==1.8.22",
"data-prep-toolkit-kfp-shared==0.2.2.dev2",
"data-prep-toolkit-kfp-shared==0.2.2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v2"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -14,7 +14,7 @@ authors = [
dependencies = [
"kfp==2.8.0",
"kfp-kubernetes==1.2.0",
"data-prep-toolkit-kfp-shared==0.2.2.dev2",
"data-prep-toolkit-kfp-shared==0.2.2",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_shared"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -14,7 +14,7 @@ authors = [
dependencies = [
"requests",
"kubernetes",
"data-prep-toolkit[ray]==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2",
]

[build-system]
Expand Down
37 changes: 37 additions & 0 deletions release-notes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,42 @@
# Data Prep Kit Release notes

## Release 0.2.2 - 11/25/2024

### General
1. Update RAG example to use granite model
1. Updated transforms with Docling 2
1. Added single package for dpk with extra for \[spark\] and \[ray\]
1. Added single package for transforms with extra for \[all\] or \[individual-transform-name\]


### data-prep-toolkit libraries (python, ray, spark)

1. Fix metadata logging even when actors crash
1. Add multilock for ray workers downloads/cleanup
1. Multiple updates to spark runtime
1. Added support for python 3.12
1. refactoring of data access code


### KFP Workloads

1. Modify superpipeline params type Str/json
1. Set kuberay apiserver version
1. Add Super pipeline for code transforms


### Transforms

1. Enhance pdf2parquet with docling2 support for extracting HTML, DOCS, etc.
1. Added web2parquet transform
1. Added HAP transform

### HTTP Connector 0.2.3

1. Enhanced parameter/configuration allows the user to customize crawler settings
1. implement subdomain focus feature in data-prep-connector


## Release 0.2.2- HTTP Connector Module - 10/23/2024

### General
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/kfp_ray/code2parquet_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


# components
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"

# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_python"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "code2parquet Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev2
data-prep-toolkit==0.2.2
parameterized
pandas
6 changes: 3 additions & 3 deletions transforms/code/code2parquet/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_ray"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "code2parquet Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,8 +10,8 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit[ray]==0.2.2.dev2",
"dpk-code2parquet-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2",
"dpk-code2parquet-transform-python==0.2.2",
"parameterized",
"pandas",
]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_profiler/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_profiler_transform_python"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "Code Profiler Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_profiler/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
data-prep-toolkit==0.2.2.dev2
data-prep-toolkit==0.2.2
parameterized
pandas
aiolimiter==1.1.0
Expand Down
6 changes: 3 additions & 3 deletions transforms/code/code_profiler/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_profiler_transform_ray"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "Code Profiler Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Pankaj Thorat", email = "[email protected]" },
]
dependencies = [
"dpk-code-profiler-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
"dpk-code-profiler-transform-python==0.2.2",
"data-prep-toolkit[ray]==0.2.2",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/kfp_ray/code_quality_wf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:latest"

# components
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"

# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_python"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "Code Quality Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev2
data-prep-toolkit==0.2.2
bs4==0.0.2
transformers==4.38.2
6 changes: 3 additions & 3 deletions transforms/code/code_quality/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_ray"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "Code Quality Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Shivdeep Singh", email = "[email protected]" },
]
dependencies = [
"dpk-code-quality-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
"dpk-code-quality-transform-python==0.2.2",
"data-prep-toolkit[ray]==0.2.2",
]

[build-system]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
task_image = "quay.io/dataprep1/data-prep-kit/header_cleanser-ray:latest"

# components
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"

# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_python"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "License and Copyright Removal Transform for Python"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.2.dev2
data-prep-toolkit==0.2.2
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'

6 changes: 3 additions & 3 deletions transforms/code/header_cleanser/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_ray"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "License and copyright removal Transform for Ray"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Yash kalathiya", email = "[email protected]" },
]
dependencies = [
"dpk-header-cleanser-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
"dpk-header-cleanser-transform-python==0.2.2",
"data-prep-toolkit[ray]==0.2.2",
"scancode-toolkit==32.1.0",
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


# components
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:latest"
base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.2"

# path to kfp component specifications files
component_spec_path = "../../../../kfp/kfp_ray_components/"
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/license_select/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_license_select_transform_python"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "License Select Python Transform"
license = {text = "Apache-2.0"}
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/license_select/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
data-prep-toolkit==0.2.2.dev2
data-prep-toolkit==0.2.2
6 changes: 3 additions & 3 deletions transforms/code/license_select/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_license_select_transform_ray"
version = "0.2.2.dev2"
version = "0.2.2"
requires-python = ">=3.10,<3.13"
description = "License Select Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,8 +10,8 @@ authors = [
{ name = "Mark Lewis", email = "[email protected]" },
]
dependencies = [
"dpk-license-select-transform-python==0.2.2.dev2",
"data-prep-toolkit[ray]==0.2.2.dev2",
"dpk-license-select-transform-python==0.2.2",
"data-prep-toolkit[ray]==0.2.2",
]

[build-system]
Expand Down
Loading

0 comments on commit a10ef0a

Please sign in to comment.