Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Increase PyPI parsing flexibility #3423

Merged
merged 12 commits into from
Aug 26, 2023
80 changes: 67 additions & 13 deletions cmd/package_managers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,46 @@ package cmd
import (
"encoding/json"
"fmt"
"io"
"regexp"

ngt "github.com/ossf/scorecard/v4/cmd/internal/nuget"
pmc "github.com/ossf/scorecard/v4/cmd/internal/packagemanager"
sce "github.com/ossf/scorecard/v4/errors"
)

var _GITHUB_DOMAIN_REGEXP = regexp.MustCompile(`^https?://github[.]com/([^/]+)/([^/.]+)`)
var _GITHUB_SUBDOMAIN_REGEXP = regexp.MustCompile(`^https?://([^.]+)[.]github[.]io/([^/.]+).*`)
joshgc marked this conversation as resolved.
Show resolved Hide resolved
var _GITLAB_DOMAIN_REGEXP = regexp.MustCompile(`^https?://gitlab[.]com/([^/]+)/([^/.]+)`)
joshgc marked this conversation as resolved.
Show resolved Hide resolved

func makeGithubRepo(urlAndPathParts []string) string {
if len(urlAndPathParts) < 3 {
return ""
}
if urlAndPathParts[1] == "sponsors" {
return ""
}
return fmt.Sprintf("https://github.com/%s/%s", urlAndPathParts[1], urlAndPathParts[2])
}

var _PYPI_MATCHERS = []func(string) string{
spencerschrock marked this conversation as resolved.
Show resolved Hide resolved
func(url string) string {
return makeGithubRepo(_GITHUB_DOMAIN_REGEXP.FindStringSubmatch(url))
},

func(url string) string {
return makeGithubRepo(_GITHUB_SUBDOMAIN_REGEXP.FindStringSubmatch(url))
},

func(url string) string {
match := _GITLAB_DOMAIN_REGEXP.FindStringSubmatch(url)
if len(match) >= 3 {
return fmt.Sprintf("https://gitlab.com/%s/%s", match[1], match[2])
}
return ""
},
}

type packageMangerResponse struct {
associatedRepo string
exists bool
Expand Down Expand Up @@ -77,9 +111,8 @@ type npmSearchResults struct {

type pypiSearchResults struct {
Info struct {
ProjectUrls struct {
Source string `json:"Source"`
} `json:"project_urls"`
ProjectURL string `json:"project_url"`
ProjectURLs map[string]string `json:"project_urls"`
} `json:"info"`
}

Expand Down Expand Up @@ -108,6 +141,36 @@ func fetchGitRepositoryFromNPM(packageName string, packageManager pmc.Client) (s
return v.Objects[0].Package.Links.Repository, nil
}

func findGitRepositoryInPYPIResponse(packageName string, response io.Reader) (string, error) {
v := &pypiSearchResults{}
err := json.NewDecoder(response).Decode(v)
if err != nil {
return "", sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("failed to parse pypi package json: %v", err))
}
joshgc marked this conversation as resolved.
Show resolved Hide resolved

v.Info.ProjectURLs["key_not_used"] = v.Info.ProjectURL
joshgc marked this conversation as resolved.
Show resolved Hide resolved
validURL := ""
joshgc marked this conversation as resolved.
Show resolved Hide resolved
for _, url := range v.Info.ProjectURLs {
for _, matcher := range _PYPI_MATCHERS {
if repo := matcher(url); repo != "" {
if validURL == "" {
validURL = repo
} else if validURL != repo {
return "", sce.WithMessage(sce.ErrScorecardInternal,
fmt.Sprintf("found too many possible source repos for pypi package: %s", packageName))
}
}
}
}

if validURL == "" {
return "", sce.WithMessage(sce.ErrScorecardInternal,
fmt.Sprintf("could not find source repo for pypi package: %s", packageName))
} else {
return validURL, nil
}
}

// Gets the GitHub repository URL for the pypi package.
func fetchGitRepositoryFromPYPI(packageName string, manager pmc.Client) (string, error) {
pypiSearchURL := "https://pypi.org/pypi/%s/json"
Expand All @@ -117,16 +180,7 @@ func fetchGitRepositoryFromPYPI(packageName string, manager pmc.Client) (string,
}

defer resp.Body.Close()
v := &pypiSearchResults{}
err = json.NewDecoder(resp.Body).Decode(v)
if err != nil {
return "", sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("failed to parse pypi package json: %v", err))
}
if v.Info.ProjectUrls.Source == "" {
return "", sce.WithMessage(sce.ErrScorecardInternal,
fmt.Sprintf("could not find source repo for pypi package: %s", packageName))
}
return v.Info.ProjectUrls.Source, nil
return findGitRepositoryInPYPIResponse(packageName, resp.Body)
}

// Gets the GitHub repository URL for the rubygems package.
Expand Down
236 changes: 122 additions & 114 deletions cmd/package_managers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"io"
"net/http"
"strings"
"testing"

"github.com/golang/mock/gomock"
Expand Down Expand Up @@ -161,6 +162,121 @@ func Test_fetchGitRepositoryFromNPM(t *testing.T) {
}
}

func Test_findGitRepositoryInPYPIResponse(t *testing.T) {
t.Parallel()
tests := []struct {
name string
partialPYPIResponse string
want string
wantErrStr string
}{
{
name: "findGitRepositoryInPYPIResponse_none",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"not_a_project_url": "https://github.com/htaslan/color",
"project_urls": {
"Homepage": "http://git_NOT_VALID_hub.com/htaslan/color"
}
}
}
`,
want: "",
wantErrStr: "could not find source repo for pypi package: somePackage",
},
{
name: "findGitRepositoryInPYPIResponse_project_url",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"project_url": "https://github.com/htaslan/color/",
"project_urls": {
"Homepage": "http://git_NOT_VALID_hub.com/htaslan/color"
}
}
}
`,
want: "https://github.com/htaslan/color",
wantErrStr: "",
},
{
name: "findGitRepositoryInPYPIResponse_project_urls",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",

"project_url": "http://git_NOT_VALID_hub.com/htaslan/color",
"project_urls": {
"RandomKey": "https://github.com/htaslan/color/",
"SponsorsIgnored": "https://github.com/sponsors/htaslan",
"AnotherRandomKey": "http://git_NOT_VALID_hub.com/htaslan/color"
}
}
}
`,
want: "https://github.com/htaslan/color",
wantErrStr: "",
},
{
name: "findGitRepositoryInPYPIResponse_dedup",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"project_url": "foo",
"project_urls": {
"RandomKey": "https://github.com/htaslan/color/",
"AnotherRandomKey": "http://htaslan.github.io/color"
}
}
}
`,
want: "https://github.com/htaslan/color",
wantErrStr: "",
},
{
name: "findGitRepositoryInPYPIResponse_toomany",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"project_url": "foo",
"project_urls": {
"RandomKey": "https://github.com/htaslan/color/",
"AnotherRandomKey": "https://gitlab.com/htaslan/color"
}
}
}
`,
want: "",
wantErrStr: "found too many possible source repos for pypi package: somePackage",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got, err := findGitRepositoryInPYPIResponse("somePackage", strings.NewReader(tt.partialPYPIResponse))
if err != nil && (!strings.Contains(err.Error(), tt.wantErrStr) || tt.wantErrStr == "") {
t.Errorf("findGitRepositoryInPYPIResponse() error = \"%v\" did not contain wantErrStr = \"%v\" testcase name %v", err, tt.wantErrStr, tt.name)
return
}
if err == nil && tt.wantErrStr != "" {
t.Errorf("findGitRepositoryInPYPIResponse() had nil error, but wanted wantErrStr = \"%v\" testcase name %v", tt.wantErrStr, tt.name)
return
}

if got != tt.want {
t.Errorf("findGitRepositoryInPYPIResponse() = %v, want %v", got, tt.want)
}
})
}
}

func Test_fetchGitRepositoryFromPYPI(t *testing.T) {
t.Parallel()
type args struct {
Expand All @@ -177,7 +293,7 @@ func Test_fetchGitRepositoryFromPYPI(t *testing.T) {
name: "fetchGitRepositoryFromPYPI",
//nolint
args: args{
packageName: "npm-package",
packageName: "some-package",
//nolint
result: `
{
Expand Down Expand Up @@ -279,137 +395,29 @@ func Test_fetchGitRepositoryFromPYPI(t *testing.T) {

`,
},
want: "foo",
want: "https://github.com/htaslan/color",
wantErr: false,
},
{
name: "fetchGitRepositoryFromNPM_error",
name: "fetchGitRepositoryFromPYPI_error",

args: args{
packageName: "npm-package",
packageName: "pypi-package",
result: "",
},
want: "",
wantErr: true,
},
{
name: "fetchGitRepositoryFromNPM_error",
name: "fetchGitRepositoryFromPYPI_error",

args: args{
packageName: "npm-package",
packageName: "pypi-package",
result: "foo",
},
want: "",
wantErr: true,
},
{
name: "empty project url",
//nolint
args: args{
packageName: "npm-package",
//nolint
result: `
{
"info": {
"author": "Hüseyin Tekinaslan",
"author_email": "[email protected]",
"bugtrack_url": null,
"classifiers": [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Software Development :: Libraries :: Python Modules"
],
"description": "UNKNOWN",
"description_content_type": null,
"docs_url": null,
"downoad_url": null,
"downloads": {
"last_day": -1,
"last_month": -1,
"last_week": -1
},
"home_page": "http://github.com/htaslan/color",
"keywords": "colorize pycolorize color pycolor",
"license": "MIT",
"maintainer": null,
"maintainer_email": null,
"name": "color",
"package_url": "https://pypi.org/project/color/",
"platform": "UNKNOWN",
"project_url": "https://pypi.org/project/color/",
"project_urls": {
"Homepage": "http://github.com/htaslan/color",
"Source": ""
},
"release_url": "https://pypi.org/project/color/0.1/",
"requires_dist": null,
"requires_python": null,
"summary": "python module for colorize string",
"version": "0.1",
"yanked": false,
"yanked_reason": null
},
"last_serial": 2041956,
"releases": {
"0.1": [
{
"comment_text": "a python module of colorize string",
"digests": {
"md5": "1a4577069c636b28d85052db9a384b95",
"sha256": "de5b51fea834cb067631beaa1ec11d7753f1e3615e836e2e4c34dcf2b343eac2"
},
"downloads": -1,
"filename": "color-0.1.1.tar.gz",
"has_sig": false,
"md5_digest": "1a4577069c636b28d85052db9a384b95",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 3568,
"upload_time": "2016-04-01T13:23:25",
"upload_time_iso_8601": "2016-04-01T13:23:25.284973Z",
"url": "https://files.pythonhosted.org/packages/88/04/0defd6f424e5bafb5abc75510cbe119a85d80b5505f1de5cd9a16d89ba8c/color-0.1.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
]
},
"urls": [
{
"comment_text": "a python module of colorize string",
"digests": {
"md5": "1a4577069c636b28d85052db9a384b95",
"sha256": "de5b51fea834cb067631beaa1ec11d7753f1e3615e836e2e4c34dcf2b343eac2"
},
"downloads": -1,
"filename": "color-0.1.1.tar.gz",
"has_sig": false,
"md5_digest": "1a4577069c636b28d85052db9a384b95",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 3568,
"upload_time": "2016-04-01T13:23:25",
"upload_time_iso_8601": "2016-04-01T13:23:25.284973Z",
"url": "https://files.pythonhosted.org/packages/88/04/0defd6f424e5bafb5abc75510cbe119a85d80b5505f1de5cd9a16d89ba8c/color-0.1.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"vulnerabilities": []
}
`,
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
tt := tt
Expand Down