Skip to content

Commit

Permalink
feat: configurable ignore paths and agents
Browse files Browse the repository at this point in the history
  • Loading branch information
josip committed Nov 22, 2023
1 parent a48b153 commit 0190b69
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 92 deletions.
29 changes: 29 additions & 0 deletions internal/kerotest/kerotest.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const DashPass = "pass"
const PixelPath = "/px.gif"
const pixelReferrerPath = "/blog/hello-mars"
const pixelReferrer = "http://localhost:1234" + pixelReferrerPath
const PrefixToIgnore = "/hello"

var WaitRequest = httptest.NewRequest("GET", WaitPath, nil)

Expand Down Expand Up @@ -154,4 +155,32 @@ func ExpectPixelToTrack(t *testing.T, k *kero.Kero) {
if len(res) != 1 {
t.Fatal("expected pixel to track paths but it did not")
}

res, err = k.Query(
kero.HttpReqMetricName,
kero.MetricLabels{
kero.HttpPathLabel: PixelPath,
},
0,
time.Now().Unix(),
)
if err != nil {
t.Fatal("failed to query db", err)
}

if len(res) != 0 {
t.Fatal("expected request to pixel path not to be tracked")
}
}

func IgnoredHelloRequest() *http.Request {
req := httptest.NewRequest("GET", "/hello/mars", nil)
return req
}

func ExpectHelloIgnored(t *testing.T, k *kero.Kero) {
tracked := k.Count(kero.HttpReqMetricName, 0, time.Now().Unix())
if tracked != 0 {
t.Fatal("expected request to ignore prefix not to be tracked")
}
}
98 changes: 98 additions & 0 deletions kero.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ type Kero struct {
IgnoreCommonPaths bool
IgnoreBots bool
IgnoreDNT bool

// path prefixes to which requests will be ignored. see file for default list.
IgnoredPrefixes []string
// path suffixes to which requestw will be ignored. see file for default list.
IgnoredSuffixes []string
// user-agent values to be ignored. see file for default list.
IgnoredAgents []string
}

type MetricLabels map[string]string
Expand Down Expand Up @@ -55,6 +62,93 @@ const CityLabel = "$city"
const IsBotLabel = "$is_bot"
const VisitorIdLabel = "$visitor_id"

var defaultIgnoredPathPrefixes = []string{
"/.",
"/_",
// various bad bots testing for wordpress
"//",
"/wp",
"/public",
"/wordpress",
}

var defaultIgnoredPathSuffixes = []string{
".js",
".js.map",
".css",
".css.map",
".png",
".jpg",
".jpeg",
".webp",
".gif",
".svg",
".woff",
".woff2",
".otf",
".ttf",
".ico",
".mov",
".mpg",
".mpg3",
".mpg4",
".wav",
".ogg",
// various bad bots
".php",
".asp",
".aspx",
".wlwmanifest.xml",
}

var defaultIgnoredAgents = []string{
// go
"go-http-client",
"github.com/monaco-io",
"gentleman",
// node.js
"node-fetch",
"undici",
"axios",
// objective-c + swift
"alamofire",
"nsurlconnection",
"nsurlsession",
"urlsession",
"swifthttp",
// python
"python-", //-urlib3, -requests
// java
"apache-httpclient",
// php requests
"php-",
"zend",
"laminas",
"guzzlehttp",
// c#/.net todo
// C/c++ todo
// apps
"curl",
"wget",
"rapidapi",
"postman",
// Apple App Site Association
"aasa",
// RSS readers
"linkship",
"feedbin",
"feedly",
"artykul",
// others
"x11",
// render.com health check
"render",
"dataprovider.com",
"researchscan",
"zgrab",
"NetcraftSurveyAgent",
}

type KeroOption func(*Kero) error

// New automatically creates a new Kero database on-disk if one doesn't exist already.
Expand Down Expand Up @@ -86,6 +180,10 @@ func New(options ...KeroOption) (*Kero, error) {
k.DashboardPath = "/_kero"
}

k.IgnoredPrefixes = defaultIgnoredPathPrefixes
k.IgnoredSuffixes = defaultIgnoredPathSuffixes
k.IgnoredAgents = defaultIgnoredAgents

return k, nil
}

Expand Down
12 changes: 12 additions & 0 deletions kerofibermw/kero_fiber_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,15 @@ func TestPixel(t *testing.T) {
ktest.ExpectPixelToTrack(t, k)
}
}

func TestIgnoreCustomPath(t *testing.T) {
app, k := createServer(t)
defer k.Close()

k.IgnoredPrefixes = append(k.IgnoredPrefixes, ktest.PrefixToIgnore)
if _, err := app.Test(ktest.IgnoredHelloRequest()); err != nil {
t.Fatal("request failed", err)
} else {
ktest.ExpectHelloIgnored(t, k)
}
}
10 changes: 10 additions & 0 deletions keroginmw/kero_gin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,13 @@ func TestPixel(t *testing.T) {

ktest.ExpectPixelToTrack(t, k)
}

func TestIgnoreCustomPath(t *testing.T) {
r, k := createServer(t)
defer k.Close()

k.IgnoredPrefixes = append(k.IgnoredPrefixes, ktest.PrefixToIgnore)
w := httptest.NewRecorder()
r.ServeHTTP(w, ktest.IgnoredHelloRequest())
ktest.ExpectHelloIgnored(t, k)
}
97 changes: 5 additions & 92 deletions track_http.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,45 +17,6 @@ import (
var Pixel, _ = base64.StdEncoding.DecodeString("R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==")
var PixelSize = int64(len(Pixel))

var commonAssetPrefixes = []string{
"/.",
"/_",
// various bad bots testing for wordpress
"//",
"/wp",
"/public",
"/wordpress",
}

var commonAssetSuffixes = []string{
".js",
".js.map",
".css",
".css.map",
".png",
".jpg",
".jpeg",
".webp",
".gif",
".svg",
".woff",
".woff2",
".otf",
".ttf",
".ico",
".mov",
".mpg",
".mpg3",
".mpg4",
".wav",
".ogg",
// various bad bots
".php",
".asp",
".aspx",
".wlwmanifest.xml",
}

type TrackedHttpReq struct {
Method string
Path string
Expand Down Expand Up @@ -90,13 +51,13 @@ func (k *Kero) ShouldTrackHttpRequest(path string) bool {
return false
}

for _, prefix := range commonAssetPrefixes {
for _, prefix := range k.IgnoredPrefixes {
if strings.HasPrefix(path, prefix) {
return false
}
}

for _, suffix := range commonAssetSuffixes {
for _, suffix := range k.IgnoredSuffixes {
if strings.HasSuffix(path, suffix) {
return false
}
Expand Down Expand Up @@ -224,7 +185,7 @@ func (k *Kero) userAgentLabels(headers http.Header) MetricLabels {
if ua.Tablet {
formFactor = FormFactorTablet
}
if ua.Bot || isHttpClientLibrary(uaString) {
if ua.Bot || k.isHttpClientLibrary(uaString) {
formFactor = FormFactorBot
}

Expand Down Expand Up @@ -265,61 +226,13 @@ func (k *Kero) utmLabels(queryParams url.Values) MetricLabels {
}
}

var commonHttpClientLibraries = []string{
// go
"go-http-client",
"github.com/monaco-io",
"gentleman",
// node.js
"node-fetch",
"undici",
"axios",
// objective-c + swift
"alamofire",
"nsurlconnection",
"nsurlsession",
"urlsession",
"swifthttp",
// python
"python-", //-urlib3, -requests
// java
"apache-httpclient",
// php requests
"php-",
"zend",
"laminas",
"guzzlehttp",
// c#/.net todo
// C/c++ todo
// apps
"curl",
"wget",
"rapidapi",
"postman",
// Apple App Site Association
"aasa",
// RSS readers
"linkship",
"feedbin",
"feedly",
"artykul",
// others
"x11",
// render.com health check
"render",
"dataprovider.com",
"researchscan",
"zgrab",
"NetcraftSurveyAgent",
}

func isHttpClientLibrary(ua string) bool {
func (k *Kero) isHttpClientLibrary(ua string) bool {
if len(ua) == 0 {
return true
}

ua = strings.ToLower(ua)
for _, clientName := range commonHttpClientLibraries {
for _, clientName := range k.IgnoredAgents {
if strings.HasPrefix(ua, clientName) {
return true
}
Expand Down

0 comments on commit 0190b69

Please sign in to comment.