// Copyright (c) Wikimedia Foundation and contributors.
// All Rights Reserved.
//
// This file is part of GitLab Content Proxy.
//
// GitLab Content Proxy is free software: you can redistribute it and/or
// modify it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or (at your
// option) any later version.
//
// GitLab Content Proxy is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
// more details.
//
// You should have received a copy of the GNU General Public License along
// with this program. If not, see 
A reverse proxy to serve gitlab.wikimedia.org files with appropriate
mime types so they can be imported by mw.loader.load.
https://%[1]s/{REPO}/-/raw/{PATH}?mime={MIME}&maxage={MAXAGE}
{REPO}{PATH}{MIME}{MAXAGE}max-age HTTP cache control header to this many seconds. Errors are never cached. (optional)The URL format matches gitlab's internal URLs for raw content. This should help folks find the right URL by browsing in gitlab.wikimedia.org, changing the hostname to gitlab-content.toolforge.org, and adding the desired mime type specification.
` func main() { // Output structured logs to stderr as JSON lines. logger := slog.New(slogenv.NewHandler(slog.NewJSONHandler(os.Stderr, nil))) slog.SetDefault(logger) slog.Info("Creating reverse proxy", "upstream", UPSTREAM_URL) upstream, err := url.Parse(UPSTREAM_URL) if err != nil { log.Fatal("Invalid origin server URL") } proxy := httputil.NewSingleHostReverseProxy(upstream) // Use proxy.Director to modify upstream requests. // - Replace User-Agent header with our own origDirector := proxy.Director proxy.Director = func(req *http.Request) { origDirector(req) req.Header.Set("User-Agent", USER_AGENT) // Drop Accept-Encoding header so the upstream doesn't compress its // response. The Toolforge reverse proxy adds gzip automatically based // on mime-type and double encoding makes everyone sad. req.Header.Del("Accept-Encoding") } headerSafe := regexp.MustCompile(RE_HEADER_SAFE) permalink := regexp.MustCompile(RE_PERMALINK) allowedResponseCodes := []int{ 200, 204, 304, 403, 404, 408, 410, 412, 413, 414, 415, 416, 417, 421, 429, 431, 451, 500, 501, 502, 503, 504, } allowedHeaders := []string{ "Cache-Control", "Content-Disposition", "Content-Length", "Content-Location", "Content-Type", "Date", "Etag", "Expires", "Retry-After", "Transfer-Encoding", "Vary", } // Use proxy.ModifyResponse to modify response to client. // - Turn unexpected status codes into 404s // - Add user supplied Content-Type or default "text/plain" proxy.ModifyResponse = func(resp *http.Response) error { req := resp.Request slog.Debug( "Upstream response", "url", req.URL.String(), "status", resp.StatusCode, ) if !slices.Contains(allowedResponseCodes, resp.StatusCode) { // We got some upstream response that isn't content or something // that we feel like we can just hand back to the caller. // Return a 404 because the actual code may be a 301 to some // login screen or something. resp.StatusCode = http.StatusNotFound resp.Status = http.StatusText(http.StatusNotFound) resp.Body = io.NopCloser(bytes.NewBufferString(BODY_NOTFOUND)) resp.ContentLength = int64(len(BODY_NOTFOUND)) resp.Header = make(http.Header, 0) resp.Header.Set("Content-Length", strconv.Itoa(len(BODY_NOTFOUND))) resp.Header.Set("Content-Type", "text/plain; charset=utf-8") resp.Header.Set("Cache-Control", "no-store") slog.Debug("Not Found", "url", req.URL.String()) return nil } // Pass through headers in the allow list respHeaders := resp.Header resp.Header = make(http.Header, 0) for _, header := range allowedHeaders { for _, value := range respHeaders.Values(header) { resp.Header.Add(header, value) } } if resp.StatusCode == 200 { // Change Content-Type header for found content mime := "text/plain; charset=utf-8" if req.URL.Query().Get("mime") != "" { mime = headerSafe.ReplaceAllString( req.URL.Query().Get("mime"), "", ) } resp.Header.Set("Content-Type", mime) // Change Cache-Control header if requested if permalink.MatchString(req.URL.Path) { resp.Header.Set( "Cache-Control", "public, max-age=31536000, immutable", ) } else if req.URL.Query().Get("maxage") != "" { maxage, err := strconv.Atoi(req.URL.Query().Get("maxage")) if err == nil { resp.Header.Set( "Cache-Control", fmt.Sprintf(FMT_CACHECONTROL, maxage), ) } } // T397571: Allow use from any origin resp.Header.Set("Access-Control-Allow-Origin", "*") resp.Header.Set("Access-Control-Expose-Headers", "*") } slog.Debug( "Request", "url", req.URL.String(), "status", resp.StatusCode, ) return nil } // Serve INDEX_HTML static page for GET / route http.HandleFunc("GET /{$}", func(w http.ResponseWriter, r *http.Request) { slog.Debug("Request", "url", r.URL.String()) w.WriteHeader(http.StatusOK) w.Header().Set("Content-Type", "text/html; charset=utf-8") w.Header().Set("Cache-Control", "public, max-age=3600, must-revalidate") _, err := io.WriteString(w, fmt.Sprintf(INDEX_HTML, r.Host)) if err != nil { log.Fatal(err) } }) // T397571: Handle CORS preflight requests http.HandleFunc("OPTIONS /", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Access-Control-Allow-Methods", "GET, OPTIONS") w.Header().Set("Access-Control-Expose-Headers", "*") w.Header().Set("Access-Control-Max-Age", "86400") w.WriteHeader(http.StatusNoContent) }) // Handle all unhandled GET requests. // - Reverse proxy to UPSTREAM_URL if path contains `/-/raw/` // - Return 400 Bad Request otherwise http.HandleFunc("GET /", func(w http.ResponseWriter, r *http.Request) { if strings.Contains(r.URL.Path, "/-/raw/") { proxy.ServeHTTP(w, r) } else { slog.Debug("Bad request", "url", r.URL.String()) w.WriteHeader(http.StatusBadRequest) w.Header().Set("Content-Type", "text/plain; charset=utf-8") _, err := io.WriteString(w, BODY_BADREQUEST) if err != nil { log.Fatal(err) } } }) // Listen for HTTP requests on port 8000 log.Fatal(http.ListenAndServe(":8000", nil)) }