Skip to content

Commit 6eb99e3

Browse files
authored
Merge branch 'efixler:main' into main
2 parents 3ca641f + 9009fdc commit 6eb99e3

File tree

11 files changed

+769
-640
lines changed

11 files changed

+769
-640
lines changed

cmd/scrape-server/main.go

+13-8
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ import (
1919
"github.com/efixler/envflags"
2020
"github.com/efixler/scrape/fetch"
2121
"github.com/efixler/scrape/fetch/trafilatura"
22+
"github.com/efixler/scrape/internal"
2223
"github.com/efixler/scrape/internal/auth"
2324
"github.com/efixler/scrape/internal/cmd"
2425
"github.com/efixler/scrape/internal/headless"
2526
"github.com/efixler/scrape/internal/server"
27+
"github.com/efixler/scrape/internal/storage"
2628
"github.com/efixler/scrape/resource"
2729
"github.com/efixler/scrape/ua"
2830
"github.com/efixler/webutil/graceful"
@@ -50,23 +52,26 @@ func main() {
5052
ctx, cancel := context.WithCancel(context.Background())
5153
dbh := dbFlags.MustDatabase()
5254
dbFlags = nil
53-
normalClient := fetch.MustClient(fetch.WithUserAgent(userAgent.Get().String()))
55+
directClient := fetch.MustClient(fetch.WithUserAgent(userAgent.Get().String()))
5456
var headlessFetcher fetch.URLFetcher = nil
5557
if headlessEnabled.Get() {
5658
headlessClient := headless.MustChromeClient(ctx, userAgent.Get().String(), 6)
5759
headlessFetcher = trafilatura.MustNew(headlessClient)
5860
}
5961

60-
// TODO: Implement options pattern for NewScrapeServer
61-
ss, _ := server.NewScrapeServer(
62+
ss := server.MustScrapeServer(
6263
ctx,
63-
dbh,
64-
trafilatura.MustNew(normalClient),
65-
headlessFetcher,
64+
server.WithURLFetcher(
65+
internal.NewStorageBackedFetcher(
66+
trafilatura.MustNew(directClient),
67+
storage.NewURLDataStore(dbh),
68+
),
69+
),
70+
server.WithHeadlessIf(headlessFetcher),
71+
server.WithAuthorizationIf(*signingKey.Get()),
6672
)
6773

68-
if sk := *signingKey.Get(); len(sk) > 0 {
69-
ss.SigningKey = sk
74+
if ss.AuthEnabled() {
7075
slog.Info("scrape-server authorization via JWT is enabled")
7176
} else {
7277
slog.Info("scrape-server authorization is disabled, running in open access mode")

cmd/scrape/main.go

+2-4
Original file line numberDiff line numberDiff line change
@@ -185,13 +185,11 @@ func initFetcher(dbh *database.DBHandle) (*internal.StorageBackedFetcher, error)
185185
fetch.WithUserAgent(userAgent.Get().String()),
186186
)
187187
}
188-
fetcher, err := internal.NewStorageBackedFetcher(
188+
fetcher := internal.NewStorageBackedFetcher(
189189
trafilatura.MustNew(client),
190190
storage.NewURLDataStore(dbh),
191191
)
192-
if err != nil {
193-
return nil, fmt.Errorf("error creating storage backed fetcher: %s", err)
194-
}
192+
195193
err = fetcher.Open(dbh.Ctx)
196194
if err != nil {
197195
return nil, fmt.Errorf("error opening storage backed fetcher: %s", err)

internal/benchmark_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ func makeFetcher(dbPath string, ctx context.Context) (*StorageBackedFetcher, err
6060
topts := *trafilatura.DefaultOptions
6161
topts.Transport = t
6262

63-
fetcher, err := NewStorageBackedFetcher(
63+
fetcher := NewStorageBackedFetcher(
6464
trafilatura.Factory(topts),
6565
sqlite.Factory(dbPath),
6666
)

internal/scrape.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ type StorageBackedFetcher struct {
2828
func NewStorageBackedFetcher(
2929
fetcher fetch.URLFetcher,
3030
storage store.URLDataStore,
31-
) (*StorageBackedFetcher, error) {
31+
) *StorageBackedFetcher {
3232
return &StorageBackedFetcher{
3333
Fetcher: fetcher,
3434
Storage: storage,
3535
saving: new(sync.WaitGroup),
36-
}, nil
36+
}
3737
}
3838

3939
// The context passed to Open() will be passed on to child components

internal/scrape_test.go

+4-8
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,8 @@ func TestFetchStoresAndRetrieves(t *testing.T) {
4343
dbh := database.New(sqlite.MustNew(sqlite.InMemoryDB()))
4444
storage := storage.NewURLDataStore(dbh)
4545

46-
fetcher, err := NewStorageBackedFetcher(tf, storage)
47-
if err != nil {
48-
t.Fatal(err)
49-
}
46+
fetcher := NewStorageBackedFetcher(tf, storage)
47+
5048
ctx, cancel := context.WithCancel(context.Background())
5149
defer cancel()
5250
err = fetcher.Open(ctx)
@@ -168,10 +166,8 @@ func TestFetchUnstored(t *testing.T) {
168166
}
169167
dbh := database.New(sqlite.MustNew(sqlite.InMemoryDB()))
170168
storage := storage.NewURLDataStore(dbh)
171-
fetcher, err := NewStorageBackedFetcher(tf, storage)
172-
if err != nil {
173-
t.Fatal(err)
174-
}
169+
fetcher := NewStorageBackedFetcher(tf, storage)
170+
175171
ctx, cancel := context.WithCancel(context.Background())
176172
defer cancel()
177173
err = fetcher.Open(ctx)

internal/server/home.go

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package server
2+
3+
import (
4+
"bytes"
5+
"embed"
6+
"html/template"
7+
"log/slog"
8+
"net/http"
9+
"time"
10+
11+
"github.com/efixler/scrape/internal/auth"
12+
)
13+
14+
//go:embed templates/index.html
15+
var home embed.FS
16+
17+
func mustHomeTemplate(ss *scrapeServer) *template.Template {
18+
tmpl := template.New("home")
19+
var authTokenF = func() string { return "" }
20+
var authEnabledF = func() bool { return ss.AuthEnabled() }
21+
if authEnabledF() {
22+
authTokenF = func() string {
23+
c, err := auth.NewClaims(
24+
auth.WithSubject("home"),
25+
auth.ExpiresIn(60*time.Minute),
26+
)
27+
if err != nil {
28+
slog.Error("Error creating claims for home view", "error", err)
29+
return ""
30+
}
31+
s, err := c.Sign(ss.SigningKey())
32+
if err != nil {
33+
slog.Error("Error signing claims for home view", "error", err)
34+
return ""
35+
}
36+
return s
37+
}
38+
}
39+
funcMap := template.FuncMap{
40+
"AuthToken": authTokenF,
41+
"AuthEnabled": authEnabledF,
42+
}
43+
tmpl = tmpl.Funcs(funcMap)
44+
homeSource, _ := home.ReadFile("templates/index.html")
45+
tmpl = template.Must(tmpl.Parse(string(homeSource)))
46+
return tmpl
47+
}
48+
49+
func homeHandler(ss *scrapeServer) http.HandlerFunc {
50+
tmpl := mustHomeTemplate(ss)
51+
return func(w http.ResponseWriter, r *http.Request) {
52+
var buf bytes.Buffer
53+
if err := tmpl.Execute(&buf, nil); err != nil {
54+
http.Error(w, "Error rendering home page", http.StatusInternalServerError)
55+
return
56+
}
57+
w.Header().Set("Content-Type", "text/html; charset=utf-8")
58+
w.WriteHeader(http.StatusOK)
59+
w.Write(buf.Bytes())
60+
}
61+
}

internal/server/home_test.go

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package server
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"testing"
7+
8+
"github.com/efixler/scrape/internal/auth"
9+
)
10+
11+
func TestMustTemplate(t *testing.T) {
12+
tests := []struct {
13+
name string
14+
key auth.HMACBase64Key
15+
expectToken bool
16+
}{
17+
{
18+
name: "with key",
19+
key: auth.MustNewHS256SigningKey(),
20+
expectToken: true,
21+
},
22+
{
23+
name: "no key",
24+
key: nil,
25+
expectToken: false,
26+
},
27+
{
28+
name: "empty key",
29+
key: auth.HMACBase64Key([]byte{}),
30+
expectToken: false,
31+
},
32+
}
33+
for _, test := range tests {
34+
ss := MustScrapeServer(
35+
context.Background(),
36+
WithURLFetcher(&mockUrlFetcher{}),
37+
WithAuthorizationIf(test.key),
38+
)
39+
tmpl := mustHomeTemplate(ss)
40+
tmpl, err := tmpl.Parse("{{AuthToken}}")
41+
if err != nil {
42+
t.Fatalf("[%s] Error parsing template: %s", test.name, err)
43+
}
44+
var buf bytes.Buffer
45+
err = tmpl.Execute(&buf, nil)
46+
if err != nil {
47+
t.Fatalf("[%s] Error executing template: %s", test.name, err)
48+
}
49+
output := buf.String()
50+
if !test.expectToken && output != "" {
51+
t.Fatalf("[%s] Expected empty output, got %s", test.name, output)
52+
}
53+
if test.expectToken {
54+
switch output {
55+
case "":
56+
t.Fatalf("[%s] Expected non-empty token, got empty", test.name)
57+
default:
58+
_, err := auth.VerifyToken(test.key, output)
59+
if err != nil {
60+
t.Fatalf("[%s] Error verifying token: %s", test.name, err)
61+
}
62+
}
63+
}
64+
}
65+
}

0 commit comments

Comments
 (0)