Skip to content

Commit

Permalink
Make PyPI parsing more flexible to find any github or gitlab url, and…
Browse files Browse the repository at this point in the history
… hope its unique
  • Loading branch information
joshgc committed Aug 24, 2023
1 parent d6ed810 commit 83d46c4
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 127 deletions.
64 changes: 51 additions & 13 deletions cmd/package_managers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,18 @@ package cmd
import (
"encoding/json"
"fmt"
"io"
"regexp"

ngt "github.com/ossf/scorecard/v4/cmd/internal/nuget"
pmc "github.com/ossf/scorecard/v4/cmd/internal/packagemanager"
sce "github.com/ossf/scorecard/v4/errors"
)

var _GITHUB_DOMAIN_REGEXP = regexp.MustCompile(`^https?://github.com/([^/]+)/([^/.]+)`)
var _GITHUB_SUBDOMAIN_REGEXP = regexp.MustCompile(`^https?://([^.]+).github.io/([^/.]+).*`)
var _GITLAB_DOMAIN_REGEXP = regexp.MustCompile(`^https?://gitlab.com/([^/]+)/([^/.]+)`)

type packageMangerResponse struct {
associatedRepo string
exists bool
Expand Down Expand Up @@ -77,9 +83,8 @@ type npmSearchResults struct {

type pypiSearchResults struct {
Info struct {
ProjectUrls struct {
Source string `json:"Source"`
} `json:"project_urls"`
ProjectURL string `json:"project_url"`
ProjectURLs map[string]string `json:"project_urls"`
} `json:"info"`
}

Expand Down Expand Up @@ -108,6 +113,48 @@ func fetchGitRepositoryFromNPM(packageName string, packageManager pmc.Client) (s
return v.Objects[0].Package.Links.Repository, nil
}

func addRepoIfValid(validURLs map[string]any, url string) {
match := _GITHUB_DOMAIN_REGEXP.FindStringSubmatch(url)
if len(match) >= 3 {
validURLs[fmt.Sprintf("https://github.com/%s/%s", match[1], match[2])] = nil
}

match = _GITHUB_SUBDOMAIN_REGEXP.FindStringSubmatch(url)
if len(match) >= 3 {
validURLs[fmt.Sprintf("https://github.com/%s/%s", match[1], match[2])] = nil
}

match = _GITLAB_DOMAIN_REGEXP.FindStringSubmatch(url)
if len(match) >= 3 {
validURLs[fmt.Sprintf("https://gitlab.com/%s/%s", match[1], match[2])] = nil
}
}

func findGitRepositoryInPYPIResponse(packageName string, response io.Reader) (string, error) {
v := &pypiSearchResults{}
err := json.NewDecoder(response).Decode(v)
if err != nil {
return "", sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("failed to parse pypi package json: %v", err))
}

validURLs := make(map[string]any)
addRepoIfValid(validURLs, v.Info.ProjectURL)
for _, url := range v.Info.ProjectURLs {
addRepoIfValid(validURLs, url)
}

if len(validURLs) > 1 {
return "", sce.WithMessage(sce.ErrScorecardInternal,
fmt.Sprintf("found too many possible source repos for pypi package: %s", packageName))
}

for url, _ := range validURLs {
return url, nil
}
return "", sce.WithMessage(sce.ErrScorecardInternal,
fmt.Sprintf("could not find source repo for pypi package: %s", packageName))
}

// Gets the GitHub repository URL for the pypi package.
func fetchGitRepositoryFromPYPI(packageName string, manager pmc.Client) (string, error) {
pypiSearchURL := "https://pypi.org/pypi/%s/json"
Expand All @@ -117,16 +164,7 @@ func fetchGitRepositoryFromPYPI(packageName string, manager pmc.Client) (string,
}

defer resp.Body.Close()
v := &pypiSearchResults{}
err = json.NewDecoder(resp.Body).Decode(v)
if err != nil {
return "", sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("failed to parse pypi package json: %v", err))
}
if v.Info.ProjectUrls.Source == "" {
return "", sce.WithMessage(sce.ErrScorecardInternal,
fmt.Sprintf("could not find source repo for pypi package: %s", packageName))
}
return v.Info.ProjectUrls.Source, nil
return findGitRepositoryInPYPIResponse(packageName, resp.Body)
}

// Gets the GitHub repository URL for the rubygems package.
Expand Down
235 changes: 121 additions & 114 deletions cmd/package_managers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"errors"
"io"
"net/http"
"strings"
"testing"

"github.com/golang/mock/gomock"
Expand Down Expand Up @@ -161,6 +162,120 @@ func Test_fetchGitRepositoryFromNPM(t *testing.T) {
}
}

func Test_findGitRepositoryInPYPIResponse(t *testing.T) {
t.Parallel()
tests := []struct {
name string
partialPYPIResponse string
want string
wantErrStr string
}{
{
name: "findGitRepositoryInPYPIResponse_none",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"not_a_project_url": "https://pypi.org/project/color/",
"project_urls": {
"Homepage": "http://git_NOT_VALID_hub.com/htaslan/color"
}
}
}
`,
want: "",
wantErrStr: "could not find source repo for pypi package: somePackage",
},
{
name: "findGitRepositoryInPYPIResponse_project_url",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"project_url": "https://github.com/htaslan/color/",
"project_urls": {
"Homepage": "http://git_NOT_VALID_hub.com/htaslan/color"
}
}
}
`,
want: "https://github.com/htaslan/color",
wantErrStr: "",
},
{
name: "findGitRepositoryInPYPIResponse_project_urls",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"project_url": "http://git_NOT_VALID_hub.com/htaslan/color",
"project_urls": {
"RandomKey": "https://github.com/htaslan/color/",
"AnotherRandomKey": "http://git_NOT_VALID_hub.com/htaslan/color"
}
}
}
`,
want: "https://github.com/htaslan/color",
wantErrStr: "",
},
{
name: "findGitRepositoryInPYPIResponse_dedup",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"project_url": "foo",
"project_urls": {
"RandomKey": "https://github.com/htaslan/color/",
"AnotherRandomKey": "http://htaslan.github.io/color"
}
}
}
`,
want: "https://github.com/htaslan/color",
wantErrStr: "",
},
{
name: "findGitRepositoryInPYPIResponse_toomany",
partialPYPIResponse: `
{
"info": {
"platform": "UNKNOWN",
"project_url": "foo",
"project_urls": {
"RandomKey": "https://github.com/htaslan/color/",
"AnotherRandomKey": "https://gitlab.com/htaslan/color"
}
}
}
`,
want: "",
wantErrStr: "found too many possible source repos for pypi package: somePackage",
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
got, err := findGitRepositoryInPYPIResponse("somePackage", strings.NewReader(tt.partialPYPIResponse))
if err != nil && (!strings.Contains(err.Error(), tt.wantErrStr) || tt.wantErrStr == "") {
t.Errorf("findGitRepositoryInPYPIResponse() error = \"%v\" did not contain wantErrStr = \"%v\" testcase name %v", err, tt.wantErrStr, tt.name)
return
}
if err == nil && tt.wantErrStr != "" {
t.Errorf("findGitRepositoryInPYPIResponse() had nil error, but wanted wantErrStr = \"%v\" testcase name %v", tt.wantErrStr, tt.name)
return
}

if got != tt.want {
t.Errorf("findGitRepositoryInPYPIResponse() = %v, want %v", got, tt.want)
}
})
}
}

func Test_fetchGitRepositoryFromPYPI(t *testing.T) {
t.Parallel()
type args struct {
Expand All @@ -177,7 +292,7 @@ func Test_fetchGitRepositoryFromPYPI(t *testing.T) {
name: "fetchGitRepositoryFromPYPI",
//nolint
args: args{
packageName: "npm-package",
packageName: "some-package",
//nolint
result: `
{
Expand Down Expand Up @@ -279,137 +394,29 @@ func Test_fetchGitRepositoryFromPYPI(t *testing.T) {
`,
},
want: "foo",
want: "https://github.com/htaslan/color",
wantErr: false,
},
{
name: "fetchGitRepositoryFromNPM_error",
name: "fetchGitRepositoryFromPYPI_error",

args: args{
packageName: "npm-package",
packageName: "pypi-package",
result: "",
},
want: "",
wantErr: true,
},
{
name: "fetchGitRepositoryFromNPM_error",
name: "fetchGitRepositoryFromPYPI_error",

args: args{
packageName: "npm-package",
packageName: "pypi-package",
result: "foo",
},
want: "",
wantErr: true,
},
{
name: "empty project url",
//nolint
args: args{
packageName: "npm-package",
//nolint
result: `
{
"info": {
"author": "Hüseyin Tekinaslan",
"author_email": "[email protected]",
"bugtrack_url": null,
"classifiers": [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Software Development :: Libraries :: Python Modules"
],
"description": "UNKNOWN",
"description_content_type": null,
"docs_url": null,
"downoad_url": null,
"downloads": {
"last_day": -1,
"last_month": -1,
"last_week": -1
},
"home_page": "http://github.com/htaslan/color",
"keywords": "colorize pycolorize color pycolor",
"license": "MIT",
"maintainer": null,
"maintainer_email": null,
"name": "color",
"package_url": "https://pypi.org/project/color/",
"platform": "UNKNOWN",
"project_url": "https://pypi.org/project/color/",
"project_urls": {
"Homepage": "http://github.com/htaslan/color",
"Source": ""
},
"release_url": "https://pypi.org/project/color/0.1/",
"requires_dist": null,
"requires_python": null,
"summary": "python module for colorize string",
"version": "0.1",
"yanked": false,
"yanked_reason": null
},
"last_serial": 2041956,
"releases": {
"0.1": [
{
"comment_text": "a python module of colorize string",
"digests": {
"md5": "1a4577069c636b28d85052db9a384b95",
"sha256": "de5b51fea834cb067631beaa1ec11d7753f1e3615e836e2e4c34dcf2b343eac2"
},
"downloads": -1,
"filename": "color-0.1.1.tar.gz",
"has_sig": false,
"md5_digest": "1a4577069c636b28d85052db9a384b95",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 3568,
"upload_time": "2016-04-01T13:23:25",
"upload_time_iso_8601": "2016-04-01T13:23:25.284973Z",
"url": "https://files.pythonhosted.org/packages/88/04/0defd6f424e5bafb5abc75510cbe119a85d80b5505f1de5cd9a16d89ba8c/color-0.1.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
]
},
"urls": [
{
"comment_text": "a python module of colorize string",
"digests": {
"md5": "1a4577069c636b28d85052db9a384b95",
"sha256": "de5b51fea834cb067631beaa1ec11d7753f1e3615e836e2e4c34dcf2b343eac2"
},
"downloads": -1,
"filename": "color-0.1.1.tar.gz",
"has_sig": false,
"md5_digest": "1a4577069c636b28d85052db9a384b95",
"packagetype": "sdist",
"python_version": "source",
"requires_python": null,
"size": 3568,
"upload_time": "2016-04-01T13:23:25",
"upload_time_iso_8601": "2016-04-01T13:23:25.284973Z",
"url": "https://files.pythonhosted.org/packages/88/04/0defd6f424e5bafb5abc75510cbe119a85d80b5505f1de5cd9a16d89ba8c/color-0.1.1.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"vulnerabilities": []
}
`,
},
want: "",
wantErr: true,
},
}
for _, tt := range tests {
tt := tt
Expand Down

0 comments on commit 83d46c4

Please sign in to comment.