From 10cb5b3e54f41bf4d8d323226fb7895cfc98ab7d Mon Sep 17 00:00:00 2001 From: J23 Date: Sat, 23 Aug 2025 13:52:23 +0800 Subject: [PATCH 1/7] Introduced sha256 support for git-sizer --- git/git.go | 27 ++++++++++++-- git/obj_iter.go | 6 ++-- git/obj_resolver.go | 4 +-- git/oid.go | 63 +++++++++++++++++++++++++++------ git/tree.go | 19 +++++----- git_sizer_test.go | 37 +++++++++++++++++++ internal/testutils/repoutils.go | 2 +- sizes/graph.go | 2 +- sizes/output.go | 4 +-- 9 files changed, 135 insertions(+), 29 deletions(-) diff --git a/git/git.go b/git/git.go index 096ce81..ef3cbc6 100644 --- a/git/git.go +++ b/git/git.go @@ -24,6 +24,8 @@ type Repository struct { // gitBin is the path of the `git` executable that should be used // when running commands in this repository. gitBin string + // hashAgo is repository hash algo + hashAlgo HashAlgo } // smartJoin returns `relPath` if it is an absolute path. If not, it @@ -49,9 +51,18 @@ func NewRepositoryFromGitDir(gitDir string) (*Repository, error) { ) } + hashAlgo := HashSHA1 + cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format") //nolint:gosec + if out, err := cmd.Output(); err == nil { + if string(bytes.TrimSpace(out)) == "sha256" { + hashAlgo = HashSHA256 + } + } + repo := Repository{ - gitDir: gitDir, - gitBin: gitBin, + gitDir: gitDir, + gitBin: gitBin, + hashAlgo: hashAlgo, } full, err := repo.IsFull() @@ -170,3 +181,15 @@ func (repo *Repository) GitPath(relPath string) (string, error) { // current directory, we can use it as-is: return string(bytes.TrimSpace(out)), nil } + +func (repo *Repository) HashAlgo() HashAlgo { + return repo.hashAlgo +} + +func (repo *Repository) HashSize() int { + return repo.hashAlgo.HashSize() +} + +func (repo *Repository) NullOID() OID { + return repo.hashAlgo.NullOID() +} diff --git a/git/obj_iter.go b/git/obj_iter.go index cecdc2a..c367f11 100644 --- a/git/obj_iter.go +++ b/git/obj_iter.go @@ -30,7 +30,7 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) errCh: make(chan error), headerCh: make(chan BatchHeader), } - + hashHexSize := repo.HashSize() * 2 iter.p.Add( // Read OIDs from `iter.oidCh` and write them to `git // rev-list`: @@ -68,10 +68,10 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error) pipe.LinewiseFunction( "copy-oids", func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error { - if len(line) < 40 { + if len(line) < hashHexSize { return fmt.Errorf("line too short: '%s'", line) } - if _, err := stdout.Write(line[:40]); err != nil { + if _, err := stdout.Write(line[:hashHexSize]); err != nil { return fmt.Errorf("writing OID to 'git cat-file': %w", err) } if err := stdout.WriteByte('\n'); err != nil { diff --git a/git/obj_resolver.go b/git/obj_resolver.go index 418e293..fbeb246 100644 --- a/git/obj_resolver.go +++ b/git/obj_resolver.go @@ -9,12 +9,12 @@ func (repo *Repository) ResolveObject(name string) (OID, error) { cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name) output, err := cmd.Output() if err != nil { - return NullOID, fmt.Errorf("resolving object %q: %w", name, err) + return repo.NullOID(), fmt.Errorf("resolving object %q: %w", name, err) } oidString := string(bytes.TrimSpace(output)) oid, err := NewOID(oidString) if err != nil { - return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) + return repo.NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err) } return oid, nil } diff --git a/git/oid.go b/git/oid.go index 2aefbcb..2a2bdfc 100644 --- a/git/oid.go +++ b/git/oid.go @@ -1,32 +1,75 @@ package git import ( + "bytes" + "crypto/sha1" //nolint:gosec + "crypto/sha256" "encoding/hex" "errors" ) +const ( + HashSizeSHA256 = sha256.Size + HashSizeSHA1 = sha1.Size + HashSizeMax = HashSizeSHA256 +) + +type HashAlgo int + +const ( + HashUnknown HashAlgo = iota + HashSHA1 + HashSHA256 +) + // OID represents the SHA-1 object ID of a Git object, in binary // format. type OID struct { - v [20]byte + v [HashSizeMax]byte + hashSize int } -// NullOID is the null object ID; i.e., all zeros. -var NullOID OID +func (h HashAlgo) NullOID() OID { + switch h { + case HashSHA1: + return OID{hashSize: HashSizeSHA1} + case HashSHA256: + return OID{hashSize: HashSizeSHA256} + } + return OID{} +} + +func (h HashAlgo) HashSize() int { + switch h { + case HashSHA1: + return HashSizeSHA1 + case HashSHA256: + return HashSizeSHA256 + } + return 0 +} + +// defaultNullOID is the null object ID; i.e., all zeros. +var defaultNullOID OID + +func IsNullOID(o OID) bool { + return bytes.Equal(o.v[:], defaultNullOID.v[:]) +} // OIDFromBytes converts a byte slice containing an object ID in // binary format into an `OID`. func OIDFromBytes(oidBytes []byte) (OID, error) { var oid OID - if len(oidBytes) != len(oid.v) { + oidSize := len(oidBytes) + if oidSize != HashSizeSHA1 && oidSize != HashSizeSHA256 { return OID{}, errors.New("bytes oid has the wrong length") } - copy(oid.v[0:20], oidBytes) + oid.hashSize = oidSize + copy(oid.v[0:oidSize], oidBytes) return oid, nil } -// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`) -// into an `OID`. +// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`) into an `OID`. func NewOID(s string) (OID, error) { oidBytes, err := hex.DecodeString(s) if err != nil { @@ -37,18 +80,18 @@ func NewOID(s string) (OID, error) { // String formats `oid` as a string in hex format. func (oid OID) String() string { - return hex.EncodeToString(oid.v[:]) + return hex.EncodeToString(oid.v[:oid.hashSize]) } // Bytes returns a byte slice view of `oid`, in binary format. func (oid OID) Bytes() []byte { - return oid.v[:] + return oid.v[:oid.hashSize] } // MarshalJSON expresses `oid` as a JSON string with its enclosing // quotation marks. func (oid OID) MarshalJSON() ([]byte, error) { - src := oid.v[:] + src := oid.v[:oid.hashSize] dst := make([]byte, hex.EncodedLen(len(src))+2) dst[0] = '"' dst[len(dst)-1] = '"' diff --git a/git/tree.go b/git/tree.go index c31fa78..18cb3ee 100644 --- a/git/tree.go +++ b/git/tree.go @@ -10,13 +10,14 @@ import ( // Tree represents a Git tree object. type Tree struct { - data string + data string + hashSize int } // ParseTree parses the tree object whose contents are contained in // `data`. `oid` is currently unused. func ParseTree(oid OID, data []byte) (*Tree, error) { - return &Tree{string(data)}, nil + return &Tree{string(data), oid.hashSize}, nil } // Size returns the size of the tree object. @@ -36,13 +37,15 @@ type TreeEntry struct { // TreeIter is an iterator over the entries in a Git tree object. type TreeIter struct { // The as-yet-unread part of the tree's data. - data string + data string + hashSize int } // Iter returns an iterator over the entries in `tree`. func (tree *Tree) Iter() *TreeIter { return &TreeIter{ - data: tree.data, + data: tree.data, + hashSize: tree.hashSize, } } @@ -74,12 +77,12 @@ func (iter *TreeIter) NextEntry() (TreeEntry, bool, error) { entry.Name = iter.data[:nulAt] iter.data = iter.data[nulAt+1:] - if len(iter.data) < 20 { + if len(iter.data) < iter.hashSize { return TreeEntry{}, false, errors.New("tree entry ends unexpectedly") } - - copy(entry.OID.v[0:20], iter.data[0:20]) - iter.data = iter.data[20:] + entry.OID.hashSize = iter.hashSize + copy(entry.OID.v[0:iter.hashSize], iter.data[0:iter.hashSize]) + iter.data = iter.data[iter.hashSize:] return entry, true, nil } diff --git a/git_sizer_test.go b/git_sizer_test.go index 8a7a2d2..c74b459 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -849,3 +849,40 @@ func TestSubmodule(t *testing.T) { assert.Equal(t, counts.Count32(2), h.UniqueBlobCount, "unique blob count") assert.Equal(t, counts.Count32(3), h.MaxExpandedBlobCount, "max expanded blob count") } + +func TestSHA256(t *testing.T) { + t.Parallel() + + ctx := context.Background() + + t.Helper() + + path, err := os.MkdirTemp("", "sha256") + require.NoError(t, err) + + testRepo := testutils.TestRepo{Path: path} + defer testRepo.Remove(t) + + // Don't use `GitCommand()` because the directory might not + // exist yet: + cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) + cmd.Env = testutils.CleanGitEnv() + err = cmd.Run() + require.NoError(t, err) + + timestamp := time.Unix(1112911993, 0) + + testRepo.AddFile(t, "hello.txt", "Hello, world!\n") + cmd = testRepo.GitCommand(t, "commit", "-m", "initial") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating initial commit") + + cmd = testRepo.GitCommand(t, "commit", "-m", "initial", "--allow-empty") + testutils.AddAuthorInfo(cmd, ×tamp) + require.NoError(t, cmd.Run(), "creating commit") + + repo := testRepo.Repository(t) + + _, err = sizes.CollectReferences(ctx, repo, refGrouper{}) + require.NoError(t, err) +} diff --git a/internal/testutils/repoutils.go b/internal/testutils/repoutils.go index 48a8759..e14e487 100644 --- a/internal/testutils/repoutils.go +++ b/internal/testutils/repoutils.go @@ -165,7 +165,7 @@ func (repo *TestRepo) UpdateRef(t *testing.T, refname string, oid git.OID) { var cmd *exec.Cmd - if oid == git.NullOID { + if git.IsNullOID(oid) { cmd = repo.GitCommand(t, "update-ref", "-d", refname) } else { cmd = repo.GitCommand(t, "update-ref", refname, oid.String()) diff --git a/sizes/graph.go b/sizes/graph.go index 0fb1c8a..2101a00 100644 --- a/sizes/graph.go +++ b/sizes/graph.go @@ -134,7 +134,7 @@ func ScanRepositoryUsingGraph( case "tree": trees = append(trees, ObjectHeader{obj.OID, obj.ObjectSize}) case "commit": - commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, git.NullOID}) + commits = append(commits, CommitHeader{ObjectHeader{obj.OID, obj.ObjectSize}, repo.NullOID()}) case "tag": tags = append(tags, ObjectHeader{obj.OID, obj.ObjectSize}) default: diff --git a/sizes/output.go b/sizes/output.go index 933cc05..037f905 100644 --- a/sizes/output.go +++ b/sizes/output.go @@ -155,7 +155,7 @@ func (i *item) Emit(t *table) { } func (i *item) Footnote(nameStyle NameStyle) string { - if i.path == nil || i.path.OID == git.NullOID { + if i.path == nil || git.IsNullOID(i.path.OID) { return "" } switch nameStyle { @@ -214,7 +214,7 @@ func (i *item) MarshalJSON() ([]byte, error) { LevelOfConcern: float64(value) / i.scale, } - if i.path != nil && i.path.OID != git.NullOID { + if i.path != nil && !git.IsNullOID(i.path.OID) { stat.ObjectName = i.path.OID.String() stat.ObjectDescription = i.path.Path() } From cf4ba45f9251b113a46f6636da087cb3a9d126a0 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 21 Nov 2025 18:47:53 +0000 Subject: [PATCH 2/7] workflows: add document header This is a best practice and yamllint warns about omitting it. --- .github/workflows/lint.yml | 1 + .github/workflows/release.yml | 1 + .github/workflows/test.yml | 1 + 3 files changed, 3 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 52a9f07..0b08cfe 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,3 +1,4 @@ +--- name: Lint on: push: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 58af3d6..b35a733 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,3 +1,4 @@ +--- name: Release on: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f658b81..9340467 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,3 +1,4 @@ +--- on: [push, pull_request] name: Test jobs: From 3ca5f0e3dcf46c416dbea72976aa225575ee650a Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Fri, 21 Nov 2025 18:50:42 +0000 Subject: [PATCH 3/7] workflows: add permissions block We'd like to run GitHub Actions with the least possible permissions assigned to the token for security reasons. To make this possible, let's add a permissions block to each workflow that lacks one. --- .github/workflows/lint.yml | 3 +++ .github/workflows/test.yml | 2 ++ 2 files changed, 5 insertions(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 0b08cfe..f8cfb4b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -12,6 +12,9 @@ on: - go.mod - go.sum +permissions: + contents: read + jobs: lint: runs-on: ubuntu-latest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9340467..8efc5ea 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,6 +1,8 @@ --- on: [push, pull_request] name: Test +permissions: + contents: read jobs: test: strategy: From 9d29e5a1b5bdf415f0ba81b711a42fa28b470be0 Mon Sep 17 00:00:00 2001 From: Victoria Dye Date: Mon, 1 Dec 2025 15:24:57 -0800 Subject: [PATCH 4/7] install-vendored-go: update download link The Google storage account appears to no longer be valid, so let's use the official download link from https://go.dev. --- script/install-vendored-go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/install-vendored-go b/script/install-vendored-go index 45ace01..76d2195 100755 --- a/script/install-vendored-go +++ b/script/install-vendored-go @@ -39,7 +39,7 @@ fi ROOTDIR="$( cd "$( dirname "$0" )/.." && pwd )" VENDORDIR="$ROOTDIR/vendor" -DOWNLOAD_URL=https://storage.googleapis.com/golang/$GO_PKG +DOWNLOAD_URL=https://go.dev/dl/$GO_PKG ARCHIVE="$VENDORDIR/$GO_PKG" INSTALLDIR="$VENDORDIR/$GO_VERSION" export GOROOT="$INSTALLDIR/go" From dba52c5e298c0d9966af9aa87969bde1dc481cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 12:41:00 +0100 Subject: [PATCH 5/7] Skip the SHA256 test if git has not support for it If you are building and running the tests in an environment with an older version of git, it might not have SHA256 support. This should not cause the git-sizer test suite to fail as it's not an issue with git-sizer. Detect this situation and skip the test. --- git_sizer_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index c74b459..09f088f 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -867,7 +867,11 @@ func TestSHA256(t *testing.T) { // exist yet: cmd := exec.Command("git", "init", "--object-format", "sha256", testRepo.Path) cmd.Env = testutils.CleanGitEnv() - err = cmd.Run() + output, err := cmd.CombinedOutput() + + if err != nil && strings.HasPrefix(string(output), "error: unknown option `object-format'") { + t.Skip("skipping due to lack of SHA256 support") + } require.NoError(t, err) timestamp := time.Unix(1112911993, 0) From 0579f1812beaf09679e0651fbd0b36047759f5e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 12:48:24 +0100 Subject: [PATCH 6/7] ci: update the setup-go version Version 2 wants to use the old URL so that also fails to run. The latest is version 6 so let's update to that and at the same time update to the same Go version that we want to download in the build script. --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8efc5ea..542f410 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,9 +12,9 @@ jobs: runs-on: ${{ matrix.os }} steps: - name: Set up Go - uses: actions/setup-go@v2 + uses: actions/setup-go@v6 with: - go-version: '1.17' + go-version: '1.21.3' - name: Check out code uses: actions/checkout@v2 From 37ca70f5f033785298587bb642b83fce66616322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mart=C3=ADn=20Nieto?= Date: Fri, 16 Jan 2026 15:23:00 +0100 Subject: [PATCH 7/7] test: loosen the object-format error matching As pointed out by the robot, this can be an issue with different locales. It is enough for our purposes to know that the error message includes "object-format" so we know it's unhappy with it. --- git_sizer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_sizer_test.go b/git_sizer_test.go index 09f088f..f5c8006 100644 --- a/git_sizer_test.go +++ b/git_sizer_test.go @@ -869,7 +869,7 @@ func TestSHA256(t *testing.T) { cmd.Env = testutils.CleanGitEnv() output, err := cmd.CombinedOutput() - if err != nil && strings.HasPrefix(string(output), "error: unknown option `object-format'") { + if err != nil && strings.Contains(string(output), "object-format") { t.Skip("skipping due to lack of SHA256 support") } require.NoError(t, err)