added catawiki scrape
ci.vdhsn.com/push Build is failing Details

feat/add-catawiki
Adam Veldhousen 2023-07-11 22:55:58 -05:00
parent 2643817e93
commit abb6183e88
Signed by: adam
GPG Key ID: 6DB29003C6DD1E4B
4 changed files with 73 additions and 8 deletions

View File

@ -36,10 +36,15 @@ func (cw CatawikiAuctionFinder) Find(ctx context.Context, limit int, results cha
limit = 1000
}
if limit <= 0 {
limit = 1000
}
errg, errgCtx := errgroup.WithContext(ctx)
errg.SetLimit(5)
for p := 1; p < limit/pageSize; p++ {
kernel.TraceLog.Printf("[Catawiki] fetching %d pages or up to %d results in %d batches", limit/pageSize, limit, pageSize)
for p := 1; p <= limit/pageSize; p++ {
pageIdx := p
errg.Go(func() error {
auctionR, err := GetUpcomingAuctions(errgCtx, limit, pageIdx)
@ -53,8 +58,11 @@ func (cw CatawikiAuctionFinder) Find(ctx context.Context, limit int, results cha
for _, r := range auctionR {
select {
case <-errgCtx.Done():
kernel.TraceLog.Println("[Catawiki] group context exited early")
return nil
case results <- r:
default:
results <- r
kernel.TraceLog.Printf("[Catawiki]: %+v", r)
}
}
return nil
@ -62,6 +70,7 @@ func (cw CatawikiAuctionFinder) Find(ctx context.Context, limit int, results cha
}
if err = errg.Wait(); err != nil {
kernel.ErrorLog.Printf("[Catawiki] could not get results f: %v", err)
return
}
@ -81,15 +90,15 @@ func GetUpcomingAuctions(ctx context.Context, limit int, page int) (results []ca
page = 1
}
url := fmt.Sprintf("https://www.catawiki.com/buyer/api/v1/auctions?locale=en&per_page=%d&page=%d", limit, page)
url := fmt.Sprintf("https://www.catawiki.com/buyer/api/v1/auctions?locale=en&=published_at_desc&per_page=%d&page=%d", limit, page)
req, _ := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
req.Header.Add("Accept", "application/json")
req.Header.Add("Accept-Language", "en-US,en;q=0.5")
req.Header.Add("Accept-Encoding", "gzip, deflate, br")
req.Header.Add("User-Agent", "barretthousen.com Auction Search Engine")
kernel.TraceLog.Printf("[Catawiki] Loading page %d of %d results", page, limit)
var res *http.Response
if res, err = http.DefaultClient.Do(req); err != nil {
return
@ -97,6 +106,11 @@ func GetUpcomingAuctions(ctx context.Context, limit int, page int) (results []ca
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
err = fmt.Errorf("bad response code: %d", res.StatusCode)
return
}
type Payload struct {
Auctions []struct {
ID int `json:"id"`
@ -125,7 +139,6 @@ func GetUpcomingAuctions(ctx context.Context, limit int, page int) (results []ca
results = make([]catalog.Auction, len(p.Auctions))
for idx, auction := range p.Auctions {
kernel.TraceLog.Printf("%+v", auction.Title)
results[idx] = catalog.Auction{
Title: auction.Title,
Description: "",

View File

@ -0,0 +1,44 @@
package catawiki
import (
"context"
"reflect"
"testing"
catalog "git.vdhsn.com/barretthousen/barretthousen/src/catalog/api"
)
func TestGetUpcomingAuctions(t *testing.T) {
type args struct {
limit int
page int
}
tests := []struct {
name string
args args
wantResults []catalog.Auction
wantErr bool
}{
{
name: "1 Page, 25 Results",
args: args{
limit: 25,
page: 1,
},
wantResults: make([]catalog.Auction, 25),
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotResults, err := GetUpcomingAuctions(context.TODO(), tt.args.limit, tt.args.page)
if (err != nil) != tt.wantErr {
t.Errorf("GetUpcomingAuctions() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(gotResults, tt.wantResults) {
t.Errorf("GetUpcomingAuctions() = %v, want %v", gotResults, tt.wantResults)
}
})
}
}

View File

@ -2,6 +2,7 @@ package domain
import (
"context"
"errors"
"fmt"
"strings"
"time"
@ -60,7 +61,12 @@ type (
func (domain Domain) StartSync(ctx context.Context, in FindNewUpcomingInput) (out FindNewUpcomingOutput, err error) {
kernel.TraceLog.Printf("%+v", in)
finder := targetsImpls["liveauctioneers"]
finder, ok := targetsImpls[in.TargetSite]
if !ok {
kernel.TraceLog.Println("could not find target")
err = errors.New("No scrape job found by name")
return
}
if out.Job, err = domain.Storage.CreateScrapeJob(ctx, in.TargetSite); err != nil {
err = fmt.Errorf("could not create new scrape job record: %w", err)
@ -116,13 +122,13 @@ func (domain *Domain) executeScrapeJob(finder UpcomingAuctionFinder, jobID int)
errs := &strings.Builder{}
for auction := range found {
total++
if !auction.Start.After(time.Now()) {
if !auction.Start.After(time.Now().Add(-24 * time.Hour)) {
continue
}
ace, err := domain.CatalogService.UpdateUpcomingAuction(ctx, auction)
if err != nil {
kernel.TraceLog.Printf("could not import upcoming auction: %s", err.Error())
kernel.ErrorLog.Printf("could not import upcoming auction: %s", err.Error())
fmt.Fprintf(errs, "{ \"AuctionFingerprint\": \"%s\", \"error\": \"%s\" }\n", ace.Fingerprint, err.Error())
continue
}
@ -132,6 +138,7 @@ func (domain *Domain) executeScrapeJob(finder UpcomingAuctionFinder, jobID int)
}
}
kernel.TraceLog.Println("waiting for results...")
if err := errGroup.Wait(); err != nil {
err = fmt.Errorf("an issue occurred while finding upcoming items iteration: %w", err)
fmt.Fprintf(errs, "{ \"error\": \"%s\" }", err.Error())

View File

@ -16,6 +16,7 @@ import (
"go.uber.org/dig"
"google.golang.org/grpc"
_ "git.vdhsn.com/barretthousen/barretthousen/src/runner/internal/domain/catawiki"
_ "git.vdhsn.com/barretthousen/barretthousen/src/runner/internal/domain/liveauctioneers"
)