Skip to content

Commit

Permalink
Added support for proxy to closest unmodified Memento, closes #91
Browse files Browse the repository at this point in the history
  • Loading branch information
ibnesayeed committed Jan 29, 2017
1 parent c3a6882 commit 77663d0
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ When run as a Web Service, MemGator exposes following customizable endpoints:
$ memgator [options] server
TimeMap : http://localhost:1208/timemap/{FORMAT}/{URI-R}
TimeGate : http://localhost:1208/timegate/{URI-R} [Accept-Datetime]
Memento : http://localhost:1208/memento[/{FORMAT}]/{DATETIME}/{URI-R}
Memento : http://localhost:1208/memento[/{FORMAT}|proxy]/{DATETIME}/{URI-R}
Benchmark : http://localhost:1208/monitor [SSE]
# FORMAT => link|json|cdxj
Expand All @@ -52,9 +52,10 @@ Benchmark : http://localhost:1208/monitor [SSE]

* `TimeMap` endpoint serves an aggregated TimeMap for a given URI-R in accordance with the [Memento RFC](http://tools.ietf.org/html/rfc7089). Additionally, it makes sure that the Mementos are chronologically ordered. It also provides the TimeMap data serialized in additional experimental formats.
* `TimeGate` endpoint allows datetime negotiation via the `Accept-Datetime` header in accordance with the [Memento RFC](http://tools.ietf.org/html/rfc7089). A successful response redirects to the closes Memento (to the given datetime) using the `Location` header. The default datetime is the current time. A successful response also includes a `Link` header which provides links to the first, last, next, and previous Mementos.
* `Memento` endpoint allows datetime negotiation in the request URL itself for clients that cannot easily send custom request headers (as opposed to the `TimeGate` which requires the `Accept-Datetime` header). This endpoint behaves differently based on whether the `format` was specified in the request. It essentially splits the functionality of the `TimeGate` endpoint in two parts as follows:
* `Memento` endpoint allows datetime negotiation in the request URL itself for clients that cannot easily send custom request headers (as opposed to the `TimeGate` which requires the `Accept-Datetime` header). This endpoint behaves differently based on whether the `format` was specified in the request. It essentially splits the functionality of the `TimeGate` endpoint as follows:
* If a format is specified, it returns the description of the closest Memento (to the given datetime) in the specified format. It is essentially the same data that is available in the `Link` header of the `TimeGate` response, but as the payload in the format requested by the client.
* If a format is not specified, it redirects to the closest Memento (to the given datetime) using the `Location` header.
* If the term `proxy` is used instead of a format then it acts like a proxy for the closest original unmodified Memento with added CORS headers.
* `Benchmark` is an optional endpoint that can be enabled by the `--monitor` flag when the server is started. If enabled, it provides a stream of the benchmark log over [SSE](http://www.html5rocks.com/en/tutorials/eventsource/basics/) for realtime visualization and monitoring.

**NOTE:** A fallback endpoint `/api` is added for compatibility with [Time Travel APIs](http://timetravel.mementoweb.org/guide/api/#memento-json) to allow drop-in replacement in existing tools. This endpoint is an alias to the `/memento` endpoint that returns the description of a Memento.
Expand Down
31 changes: 26 additions & 5 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"math/rand"
"net"
"net/http"
"net/http/httputil"
"net/url"
"os"
"regexp"
Expand Down Expand Up @@ -47,6 +48,7 @@ var (
transport http.Transport
client http.Client
broker *sse.Broker
reverseProxy *httputil.ReverseProxy
baseURL string
)

Expand Down Expand Up @@ -149,10 +151,11 @@ var regs = map[string]*regexp.Regexp{
"attrdlm": regexp.MustCompile(`\s*>?"?\s*;\s*`),
"kvaldlm": regexp.MustCompile(`\s*=\s*"?\s*`),
"memento": regexp.MustCompile(`\bmemento\b`),
"memdttm": regexp.MustCompile(`/(\d{14})/`),
"dttmstr": regexp.MustCompile(`^(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?$`),
"tmappth": regexp.MustCompile(`^timemap/(link|json|cdxj)/.+`),
"tgatpth": regexp.MustCompile(`^timegate/.+`),
"descpth": regexp.MustCompile(`^(memento|api)/(link|json|cdxj)/(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?/.+`),
"descpth": regexp.MustCompile(`^(memento|api)/(link|json|cdxj|proxy)/(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?/.+`),
"rdrcpth": regexp.MustCompile(`^memento/(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?/.+`),
}

Expand Down Expand Up @@ -601,6 +604,17 @@ func memgatorService(w http.ResponseWriter, r *http.Request, urir string, format
http.Redirect(w, r, closest, http.StatusFound)
return
}
if format == "proxy" {
nr, err := http.NewRequest(http.MethodGet, closest, nil)
if err != nil {
logError.Printf("Error creating proxy request (%s): %v", closest, err)
http.Error(w, "Error creating proxy request for "+closest, http.StatusInternalServerError)
return
}
logInfo.Printf("Serving as proxy for: %s", closest)
reverseProxy.ServeHTTP(w, nr)
return
}
dataCh := make(chan string, 1)
if format == "timegate" {
go serializeLinks(urir, basetm, "link", dataCh, navonly, sess)
Expand Down Expand Up @@ -672,7 +686,7 @@ func router(w http.ResponseWriter, r *http.Request) {
rawdtm = p[2]
rawuri = p[3]
} else {
err = fmt.Errorf("/memento[/{FORMAT}]/{DATETIME}/{URI-R} (FORMAT => %s, DATETIME => %s)", responseFormats, validDatetimes)
err = fmt.Errorf("/memento[/{FORMAT}|proxy]/{DATETIME}/{URI-R} (FORMAT => %s, DATETIME => %s)", responseFormats, validDatetimes)
}
case "monitor":
if *monitor {
Expand Down Expand Up @@ -798,17 +812,24 @@ func initLoggers() {
func initNetwork() {
transport = http.Transport{
DialContext: (&net.Dialer{
Timeout: *contimeout,
Timeout: *contimeout,
KeepAlive: *restimeout,
}).DialContext,
ResponseHeaderTimeout: *hdrtimeout,
MaxIdleConnsPerHost: 5,
IdleConnTimeout: *restimeout,
IdleConnTimeout: *restimeout,
MaxIdleConnsPerHost: 5,
}
client = http.Client{
Transport: &transport,
Timeout: *restimeout,
}
reverseProxy = &httputil.ReverseProxy{
Transport: &transport,
FlushInterval: time.Duration(100*time.Millisecond),
Director: func(r *http.Request) {
r.URL.Path = regs["memdttm"].ReplaceAllString(r.URL.Path, "/${1}id_/")
},
}
}

func main() {
Expand Down

0 comments on commit 77663d0

Please sign in to comment.