Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: get PR description from googleapis commits #2531

Merged
merged 42 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
eb93ac3
feat: get commit message from googleapis
JoeWang1127 Feb 26, 2024
df8fe50
add comments
JoeWang1127 Mar 1, 2024
eb0da6b
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 1, 2024
9a1be4c
move to integration test
JoeWang1127 Mar 1, 2024
c1f556c
add paths from generation_config
JoeWang1127 Mar 1, 2024
ccb1a21
add generate_pr_description.py
JoeWang1127 Mar 2, 2024
057da91
add an integration test
JoeWang1127 Mar 2, 2024
da3fc4c
add an unit test
JoeWang1127 Mar 2, 2024
1e6ab5e
fix unit tests
JoeWang1127 Mar 2, 2024
a1c786f
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 2, 2024
d128a2d
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 3, 2024
939c513
change func name
JoeWang1127 Mar 3, 2024
7079327
compare result with golden file
JoeWang1127 Mar 4, 2024
a08a866
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 4, 2024
b77aa1d
fix integration tests
JoeWang1127 Mar 4, 2024
eecaba0
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 4, 2024
259c4ef
move get_commit_messages to surface
JoeWang1127 Mar 5, 2024
f09fb3b
format pr description
JoeWang1127 Mar 5, 2024
5e1d85e
remove common protos
JoeWang1127 Mar 5, 2024
7e9be85
add integration test for split repo
JoeWang1127 Mar 5, 2024
9f3bbca
format code
JoeWang1127 Mar 5, 2024
8324776
format pr description
JoeWang1127 Mar 5, 2024
cc9088e
change parameter comments
JoeWang1127 Mar 5, 2024
334298b
change parameter comments
JoeWang1127 Mar 5, 2024
aabd9c1
add generator version in pr description
JoeWang1127 Mar 5, 2024
176871d
find the versioned proto_path from a file path
JoeWang1127 Mar 5, 2024
93e60de
add library name in pr description
JoeWang1127 Mar 5, 2024
d4d16f6
format pr description
JoeWang1127 Mar 5, 2024
a279549
do not include library_name in split repo
JoeWang1127 Mar 5, 2024
1946d64
bring back PiperOrigin-RevId
JoeWang1127 Mar 6, 2024
be189c5
use NESTED_COMMIT
JoeWang1127 Mar 6, 2024
188a632
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 6, 2024
5d7c915
add comments
JoeWang1127 Mar 6, 2024
240038f
format pr description
JoeWang1127 Mar 6, 2024
33df40b
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 6, 2024
cd7f639
format multi-line commit messages
JoeWang1127 Mar 6, 2024
1715714
add unit test
JoeWang1127 Mar 6, 2024
6cf0045
add unit tests
JoeWang1127 Mar 7, 2024
b473438
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 7, 2024
2b507aa
refactor class and unit tests
JoeWang1127 Mar 7, 2024
2ce846a
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 7, 2024
c03d6dd
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions library_generation/generate_pr_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#!/usr/bin/env python3
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import click

from library_generation.model.generation_config import from_yaml
from library_generation.utilities import get_commit_messages
from library_generation.utilities import get_file_paths


@click.group(invoke_without_command=False)
@click.pass_context
@click.version_option(message="%(version)s")
def main(ctx):
pass


@main.command()
@click.option(
"--generation-config-yaml",
required=True,
type=str,
help="""
Path to generation_config.yaml that contains the metadata about
library generation.
""",
)
@click.option(
"--baseline-commit",
required=True,
type=str,
help="""
The baseline commit from which the commit message is considered.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is baseline-commit inclusive?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No.

The baseline commit is included in the previous generation.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add comments to make it clear?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed the comments.

This commit should be an ancestor of googleapis commit in configuration.
""",
)
@click.option(
"--repo-url",
type=str,
default="https://github.com/googleapis/googleapis.git",
show_default=True,
help="""
GitHub repository URL.
""",
)
def generate(
generation_config_yaml: str,
repo_url: str,
baseline_commit: str,
) -> str:
return generate_pr_descriptions(
generation_config_yaml=generation_config_yaml,
repo_url=repo_url,
baseline_commit=baseline_commit,
)


def generate_pr_descriptions(
generation_config_yaml: str,
repo_url: str,
baseline_commit: str,
) -> str:
config = from_yaml(generation_config_yaml)
paths = get_file_paths(config)
return get_commit_messages(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add another commit for the generator version update? This is currently done as
fix(deps): [Many APIs] Update the Java code generator (gapic-generator-java) to 2.35.0 and duplicated in the release notes.
Ideally, if there is a generator version update, then we should generate a commit like
feat: Regenerate with the Java code generator (gapic-generator-java) v2.35.0

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generator updates will not use GitHub action as it only happens once in two weeks.

I'll setup a meeting to discuss this issue.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added in the PR description.

repo_url=repo_url,
latest_commit=config.googleapis_commitish,
baseline_commit=baseline_commit,
paths=paths,
)


if __name__ == "__main__":
main()
30 changes: 28 additions & 2 deletions library_generation/test/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
import unittest
from distutils.dir_util import copy_tree
from distutils.file_util import copy_file
from filecmp import cmp
from filecmp import dircmp

from git import Repo
from pathlib import Path
from typing import List
from typing import Dict

from library_generation.generate_pr_description import generate_pr_descriptions
from library_generation.generate_repo import generate_from_yaml
from library_generation.model.generation_config import from_yaml, GenerationConfig
from library_generation.test.compare_poms import compare_xml
Expand All @@ -49,6 +51,28 @@


class IntegrationTest(unittest.TestCase):
def test_get_commit_message_success(self):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this as an integration test because it required network connection for git clone and checkout of googleapis repository.

repo_url = "https://github.com/googleapis/googleapis.git"
baseline_commit = "a17d4caf184b050d50cacf2b0d579ce72c31ce74"
description = generate_pr_descriptions(
f"{config_dir}/google-cloud-java/{config_name}",
JoeWang1127 marked this conversation as resolved.
Show resolved Hide resolved
repo_url=repo_url,
baseline_commit=baseline_commit,
)
description_file = f"{config_dir}/pr-description.txt"
if os.path.isfile(f"{description_file}"):
os.remove(f"{description_file}")
with open(f"{description_file}", "w+") as f:
f.write(description)
# There are five commits from googleapis commitish to baseline commit,
# inclusively. Only two of the commits contain changes in proto_path
# that are in configuration. Therefore, only two commit messages should
# be included in the PR description.
self.assertTrue(
cmp(f"{config_dir}/pr-description-golden.txt", f"{description_file}")
)
os.remove(f"{description_file}")

def test_generate_repo(self):
shutil.rmtree(f"{golden_dir}", ignore_errors=True)
os.makedirs(f"{golden_dir}", exist_ok=True)
Expand Down Expand Up @@ -150,7 +174,7 @@ def __pull_repo_to(cls, default_dest: Path, repo: str, committish: str) -> str:
repo = Repo(dest)
else:
dest = default_dest
repo_dest = f"{golden_dir}/{repo}"
shutil.rmtree(dest, ignore_errors=True)
repo_url = f"{repo_prefix}/{repo}"
print(f"Cloning repository {repo_url}")
repo = Repo.clone_from(repo_url, dest)
Expand All @@ -169,6 +193,8 @@ def __get_library_names_from_config(cls, config: GenerationConfig) -> List[str]:
def __get_config_files(cls, path: str) -> List[tuple[str, str]]:
config_files = []
for sub_dir in Path(path).resolve().iterdir():
if sub_dir.is_file():
continue
repo = sub_dir.name
if repo == "golden":
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,17 @@ libraries:
- proto_path: google/cloud/alloydb/connectors/v1
- proto_path: google/cloud/alloydb/connectors/v1alpha
- proto_path: google/cloud/alloydb/connectors/v1beta

- api_shortname: documentai
name_pretty: Document AI
product_documentation: https://cloud.google.com/compute/docs/documentai/
api_description: allows developers to unlock insights from your documents with machine
learning.
library_name: document-ai
release_level: stable
issue_tracker: https://issuetracker.google.com/savedsearches/559755
GAPICs:
- proto_path: google/cloud/documentai/v1
- proto_path: google/cloud/documentai/v1beta1
- proto_path: google/cloud/documentai/v1beta2
- proto_path: google/cloud/documentai/v1beta3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the commits description should be wrapped between BEGIN_COMMIT_OVERRIDE and END_COMMIT_OVERRIDE so that release please can recognize them, see README of release-please for details.

So in this example, the golden files should at least include

BEGIN_COMMIT_OVERRIDE

feat: expose model_type in v1 processor, so that user can see the model_type after get or list processor version
feat: add model_type in v1beta3 processor proto

END_COMMIT_OVERRIDE

And we can add any other metadata info before BEGIN_COMMIT_OVERRIDE to help us identify this PR, for example, maybe something like "This PR is generated with proto changes between googleapis commits x and y".

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
7a9a855287b5042410c93e5a510f40efd4ce6cb1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we don't have the logic for telling exactly which library is affected, but maybe in the meantime we can format this commit hash to be an actual link to the commit in googleapis for further inspection. WDYT?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, that's a good idea.

I'll create a follow up PR to format the commit message.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a good idea, but we don't want to include them in the release notes, so please put it outside of BEGIN_COMMIT_OVERRIDE and END_COMMIT_OVERRIDE if you want to include them in the PR description.

Copy link
Collaborator Author

@JoeWang1127 JoeWang1127 Mar 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. The commit link is outside of BEGIN_NESTED_COMMIT and END_NESTED_COMMIT.

feat: expose model_type in v1 processor, so that user can see the model_type after get or list processor version
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we include the service name in the PR description? Something like

feat: [service_name] expose model_type in v1 processor, so that user can see the model_type after get or list processor version

Similar to what we are doing today for the title of the owlbot PR, e.g. googleapis/google-cloud-java#10422.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do it.

What are the benefit of doing so and what happens if a commit has multiple service names (generally one commit should only have changes in one service, but we don't have guarantee that).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The benefit is that release notes would be much clearer, see the latest release notes. In my experience, I never seen a commit affect multiple services, maybe a service and a common proto. I don't think we have to worry about it right now, but in case it happens, let's not fail the process and just pick the first service name.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


PiperOrigin-RevId: 603727585
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove this in the PR description.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is part of the commit message and it is included in Owlbot PR.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is, but I don't think they provide much value and we don't want the release notes to include this message, can we try to parse the commit message and remove this part? Again, take a look at the existing release notes, we should try to make our PR description as close as possible.

In addition, if we really want to keep the existing commit history, we can put it outside of the BEGIN_COMMIT_OVERRIDE and END_COMMIT_OVERRIDE.

Copy link
Collaborator Author

@JoeWang1127 JoeWang1127 Mar 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about I only keep the first line of each commit message? The first line is usually the commit summary.

I've changed the golden file, PTAL.



c7fd8bd652ac690ca84f485014f70b52eef7cb9e
feat: add model_type in v1beta3 processor proto

PiperOrigin-RevId: 603726122
19 changes: 19 additions & 0 deletions library_generation/test/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from library_generation.model.gapic_inputs import parse as parse_build_file
from library_generation.model.generation_config import from_yaml
from library_generation.model.library_config import LibraryConfig
from library_generation.utilities import get_file_paths

script_dir = os.path.dirname(os.path.realpath(__file__))
resources_dir = os.path.join(script_dir, "resources")
Expand Down Expand Up @@ -214,6 +215,24 @@ def test_from_yaml_succeeds(self):
self.assertEqual("google/cloud/asset/v1p5beta1", gapics[3].proto_path)
self.assertEqual("google/cloud/asset/v1p7beta1", gapics[4].proto_path)

def test_get_file_paths_from_yaml_success(self):
paths = get_file_paths(from_yaml(f"{test_config_dir}/generation_config.yaml"))
self.assertEqual(
{
"google/cloud/asset/v1",
"google/cloud/asset/v1p1beta1",
"google/cloud/asset/v1p2beta1",
"google/cloud/asset/v1p5beta1",
"google/cloud/asset/v1p7beta1",
"google/api",
"google/longrunning",
"google/rpc",
"google/shopping/type",
"google/type",
},
paths,
)

@parameterized.expand(
[
("BUILD_no_additional_protos.bazel", " "),
Expand Down
65 changes: 65 additions & 0 deletions library_generation/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import shutil
import re
from git import Commit, Repo
from pathlib import Path
from lxml import etree
from library_generation.model.bom_config import BomConfig
Expand All @@ -33,6 +34,13 @@
group_id_tag = "groupId"
artifact_tag = "artifactId"
version_tag = "version"
common_protos = {
"google/api",
"google/longrunning",
"google/rpc",
"google/shopping/type",
"google/type",
}


def __render(template_name: str, output_name: str, **kwargs):
Expand Down Expand Up @@ -134,6 +142,14 @@ def __handle_special_bom(
]


def __is_qualified_commit(paths: set[str], commit: Commit) -> bool:
for file in commit.stats.files.keys():
idx = file.rfind("/")
if file[:idx] in paths:
return True
return False


def create_argument(arg_key: str, arg_container: object) -> List[str]:
"""
Generates a list of two elements [argument, value], or returns
Expand Down Expand Up @@ -483,3 +499,52 @@ def get_version_from(
for line in f.readlines():
if artifact_id in line:
return line.split(":")[index].strip()


def get_file_paths(config: GenerationConfig) -> set[str]:
"""
Get versioned proto_path from configuration file, plus known paths of
common protos.

:param config:
:return: versioned proto_path plus paths of common protos
"""
paths = set()
for library in config.libraries:
for gapic_config in library.gapic_configs:
paths.add(gapic_config.proto_path)
return paths.union(common_protos)
JoeWang1127 marked this conversation as resolved.
Show resolved Hide resolved


def get_commit_messages(
JoeWang1127 marked this conversation as resolved.
Show resolved Hide resolved
repo_url: str, latest_commit: str, baseline_commit: str, paths: set[str]
) -> str:
"""
Combine commit messages of a repository from latest_commit to
baseline_commit. Only commits which change files in a pre-defined
file paths will be considered.
Note that baseline_commit should be an ancestor of latest_commit.

:param repo_url: the url of the repository.
:param latest_commit: the newest commit to be considered in
selecting commit message.
:param baseline_commit: the oldest commit to be considered in
selecting commit message. This commit should be an ancestor of
:param paths: a set of file paths
:return: commit messages.
"""
tmp_dir = "/tmp/repo"
shutil.rmtree(tmp_dir, ignore_errors=True)
os.mkdir(tmp_dir)
messages = []
repo = Repo.clone_from(repo_url, tmp_dir)
commit = repo.commit(latest_commit)
while str(commit.hexsha) != baseline_commit:
if __is_qualified_commit(paths, commit):
messages.append(f"{commit.hexsha}\n{commit.message}")
commit_parents = commit.parents
if len(commit_parents) == 0:
break
commit = commit_parents[0]
shutil.rmtree(tmp_dir, ignore_errors=True)
return "\n\n".join(messages)
Loading