Skip to content

Commit 0427204

Browse files
committed
cmd/go/internal/modfetch: retry failed fetches
Attempts to fetch go modules are not retried, impacting the reliability of many go toolchain commands, especially when a go module proxy is used. This adds a repo implementation to cmd/go/internal/modfetch that retries failed operations with exponential backoff unless errors are known to be permanent. Fixes golang#28194.
1 parent fba83cd commit 0427204

File tree

2 files changed

+162
-3
lines changed

2 files changed

+162
-3
lines changed

src/cmd/go/internal/modfetch/proxy.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ type proxySpec struct {
5454
// fallBackOnError is true if a request should be attempted on the next proxy
5555
// in the list after any error from this proxy. If fallBackOnError is false,
5656
// the request will only be attempted on the next proxy if the error is
57-
// equivalent to os.ErrNotFound, which is true for 404 and 410 responses.
57+
// equivalent to fs.ErrNotExist, which is true for 404 and 410 responses.
5858
fallBackOnError bool
5959
}
6060

src/cmd/go/internal/modfetch/repo.go

+161-2
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ package modfetch
66

77
import (
88
"context"
9+
"errors"
910
"fmt"
1011
"io"
1112
"io/fs"
13+
mathrand "math/rand"
1214
"os"
1315
"strconv"
1416
"time"
@@ -210,13 +212,13 @@ func Lookup(ctx context.Context, proxy, path string) Repo {
210212
}
211213

212214
return lookupCache.Do(lookupCacheKey{proxy, path}, func() Repo {
213-
return newCachingRepo(ctx, path, func(ctx context.Context) (Repo, error) {
215+
return newRetryingRepo(6, newCachingRepo(ctx, path, func(ctx context.Context) (Repo, error) {
214216
r, err := lookup(ctx, proxy, path)
215217
if err == nil && traceRepo {
216218
r = newLoggingRepo(r)
217219
}
218220
return r, err
219-
})
221+
}))
220222
})
221223
}
222224

@@ -436,3 +438,160 @@ func (notExistError) Is(target error) bool {
436438
func (e notExistError) Unwrap() error {
437439
return e.err
438440
}
441+
442+
// A retryingRepo is a wrapper around an underlying Repo
443+
// that retries each operation if it fails with a temporary error.
444+
type retryingRepo struct {
445+
maxRetries int
446+
r Repo
447+
}
448+
449+
func (r *retryingRepo) ModulePath() string {
450+
return r.r.ModulePath()
451+
}
452+
453+
func (r *retryingRepo) CheckReuse(ctx context.Context, old *codehost.Origin) error {
454+
_, err := wrapWithRetries(ctx, r.maxRetries, func() (any, error) {
455+
return nil, r.r.CheckReuse(ctx, old)
456+
})
457+
return err
458+
}
459+
460+
func (r *retryingRepo) Versions(ctx context.Context, prefix string) (*Versions, error) {
461+
return wrapWithRetries(ctx, r.maxRetries, func() (*Versions, error) {
462+
return r.r.Versions(ctx, prefix)
463+
})
464+
}
465+
466+
func (r *retryingRepo) Stat(ctx context.Context, rev string) (*RevInfo, error) {
467+
return wrapWithRetries(ctx, r.maxRetries, func() (*RevInfo, error) {
468+
return r.r.Stat(ctx, rev)
469+
})
470+
}
471+
472+
func (r *retryingRepo) Latest(ctx context.Context) (*RevInfo, error) {
473+
return wrapWithRetries(ctx, r.maxRetries, func() (*RevInfo, error) {
474+
return r.r.Latest(ctx)
475+
})
476+
}
477+
478+
func (r *retryingRepo) GoMod(ctx context.Context, version string) (data []byte, err error) {
479+
return wrapWithRetries(ctx, r.maxRetries, func() ([]byte, error) {
480+
return r.r.GoMod(ctx, version)
481+
})
482+
}
483+
484+
// permanentError is a wrapper around an error to indicate that it is not temporary.
485+
type permanentError struct {
486+
error
487+
}
488+
489+
func (permanentError) Temporary() bool {
490+
return false
491+
}
492+
493+
func (r *retryingRepo) Zip(ctx context.Context, dst io.Writer, version string) error {
494+
// This is a little trickier because we need to avoid partial writes to dst.
495+
// In the case of a partial write,
496+
// we attempt to truncate and rewind the file to the beginning.
497+
// If we can't do that, we do not retry.
498+
_, err := wrapWithRetries(ctx, r.maxRetries, func() (any, error) {
499+
err := r.r.Zip(ctx, dst, version)
500+
if err == nil { // if NO error
501+
return nil, nil
502+
}
503+
504+
// If there is an error downloading the file,
505+
// there is little we can do without exposing implementation details of the caller.
506+
// We can retry if we can rewind and truncate dst (true if dst is a temp file).
507+
type truncateSeeker interface {
508+
Truncate(size int64) error
509+
io.Seeker
510+
}
511+
512+
f, ok := dst.(truncateSeeker)
513+
if !ok {
514+
return nil, permanentError{err}
515+
}
516+
517+
// Truncate the file back to empty.
518+
if truncErr := f.Truncate(0); truncErr != nil {
519+
return nil, permanentError{truncErr}
520+
}
521+
// Seek back to the beginning of the file.
522+
if _, seekErr := f.Seek(0, io.SeekStart); seekErr != nil {
523+
return nil, permanentError{seekErr}
524+
}
525+
return nil, err // maybe we can retry
526+
})
527+
return err
528+
}
529+
530+
func newRetryingRepo(maxRetries int, r Repo) *retryingRepo {
531+
return &retryingRepo{
532+
maxRetries: maxRetries,
533+
r: r,
534+
}
535+
}
536+
537+
func wrapWithRetries[T any](ctx context.Context, maxRetries int, f func() (T, error)) (v T, err error) {
538+
for retry := 0; ; retry++ {
539+
v, err = f()
540+
if err == nil {
541+
return
542+
}
543+
544+
if retry >= maxRetries || !shouldRetry(err) {
545+
return
546+
}
547+
548+
// After the first retry,
549+
// do exponential backoff with 10% jitter starting at 1s.
550+
if retry == 0 {
551+
continue
552+
}
553+
backoff := float64(uint(1) << (uint(retry) - 1))
554+
backoff += backoff * (0.1 * mathrand.Float64())
555+
tm := time.NewTimer(time.Second * time.Duration(backoff))
556+
select {
557+
case <-tm.C:
558+
case <-ctx.Done():
559+
}
560+
tm.Stop()
561+
}
562+
}
563+
564+
func shouldRetry(err error) bool {
565+
if err == nil {
566+
return false
567+
}
568+
569+
if errors.Is(err, fs.ErrNotExist) {
570+
return false
571+
}
572+
573+
var isTimeout interface{ Timeout() bool }
574+
if errors.As(err, &isTimeout) && isTimeout.Timeout() {
575+
return true
576+
}
577+
578+
var httpError *web.HTTPError
579+
if errors.As(err, &httpError) {
580+
switch httpError.StatusCode {
581+
case 0, 429: // No HTTP response, Too Many Requests
582+
return true
583+
case 404, 410, 501: // Not Found, Gone, Not Implemented
584+
return false
585+
}
586+
587+
if httpError.StatusCode >= 500 {
588+
return true
589+
}
590+
}
591+
592+
var isTemporary interface{ Temporary() bool }
593+
if errors.As(err, &isTemporary) {
594+
return isTemporary.Temporary()
595+
}
596+
return false
597+
}

0 commit comments

Comments
 (0)