Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 60 additions & 6 deletions internal/app/azldev/core/sources/sourceprep.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ import (
"errors"
"fmt"
"log/slog"
"os"
"path/filepath"
"slices"
"strings"
"time"
"unicode"

gogit "github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/filemode"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components"
"github.com/microsoft/azure-linux-dev-tools/internal/global/opctx"
Expand Down Expand Up @@ -396,13 +396,12 @@ func (p *sourcePreparerImpl) trySyntheticHistory(

gitDirPath := filepath.Join(sourcesDirPath, ".git")

_, statErr := os.Stat(gitDirPath)

if statErr != nil && !os.IsNotExist(statErr) {
return fmt.Errorf("failed to check for .git directory at %#q:\n%w", gitDirPath, statErr)
gitDirExists, err := fileutils.Exists(p.fs, gitDirPath)
if err != nil {
return fmt.Errorf("failed to check for .git directory at %#q:\n%w", gitDirPath, err)
}

if os.IsNotExist(statErr) {
if !gitDirExists {
slog.Info("No .git directory in sources; initializing repository",
"component", componentName)

Expand All @@ -417,13 +416,68 @@ func (p *sourcePreparerImpl) trySyntheticHistory(
return fmt.Errorf("failed to open sources repository at %#q:\n%w", sourcesDirPath, err)
}

// Strip bogus submodule entries from the index before staging. Some upstream
// repos have gitlink entries without .gitmodules that break go-git's staging.
if err := removeSubmoduleEntries(p.fs, sourcesRepo, sourcesDirPath); err != nil {
return fmt.Errorf("failed to remove submodule entries:\n%w", err)
}

if err := CommitInterleavedHistory(sourcesRepo, changes, importCommit); err != nil {
return fmt.Errorf("failed to commit synthetic history:\n%w", err)
}

return nil
}

// removeSubmoduleEntries strips gitlink (mode 160000) entries from the repository
// index and removes the corresponding empty directories from disk. Some upstream
// dist-git repositories (e.g., Fedora's "at" package) contain bogus submodule
// references — gitlink entries without a .gitmodules file — that leave empty
// directories after cloning. go-git's [gogit.Worktree.AddWithOptions] fails when
// it encounters these because it tries to read the directory path as a file.
func removeSubmoduleEntries(fs opctx.FS, repo *gogit.Repository, repoDir string) error {
idx, err := repo.Storer.Index()
if err != nil {
return fmt.Errorf("failed to read git index:\n%w", err)
}

originalLen := len(idx.Entries)
kept := 0

for _, entry := range idx.Entries {
if entry.Mode == filemode.Submodule {
slog.Info("Removing bogus submodule entry from index",
"path", entry.Name)

// Remove the directory left by the uninitialized submodule.
if removeErr := fs.RemoveAll(filepath.Join(repoDir, entry.Name)); removeErr != nil {
slog.Warn("Failed to remove submodule directory",
"path", entry.Name, "error", removeErr)
}

continue
}

idx.Entries[kept] = entry
kept++
}

if kept == originalLen {
return nil
}

idx.Entries = idx.Entries[:kept]

if err := repo.Storer.SetIndex(idx); err != nil {
return fmt.Errorf("failed to update git index:\n%w", err)
}

slog.Info("Removed submodule entries from git index",
"count", originalLen-kept)

return nil
}

// DiffSources implements the [SourcePreparer] interface.
// It fetches the component's sources once, copies them to a second directory, applies overlays
// to the copy, then diffs the two trees. This avoids fetching the sources twice.
Expand Down
165 changes: 165 additions & 0 deletions internal/app/azldev/core/sources/sourceprep_internal_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

Comment thread
dmcilvaney marked this conversation as resolved.
package sources

import (
"path/filepath"
"testing"

gogit "github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/filemode"
"github.com/go-git/go-git/v5/plumbing/format/index"
"github.com/go-git/go-git/v5/storage/memory"
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms"
"github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestRemoveSubmoduleEntries_StripsGitlinks(t *testing.T) {
const repoDir = "/fakerepo"

memFS := afero.NewMemMapFs()
storer := memory.NewStorage()

// Initialize a repo with in-memory storage only; this test exercises the
// index/storer and uses memFS separately for directory cleanup assertions.
repo, err := gogit.Init(storer, nil)
require.NoError(t, err)

// Manually build an index with a normal file entry and a submodule entry.
idx := &index.Index{
Version: 2,
Entries: []*index.Entry{
{
Name: "regular-file.spec",
Mode: filemode.Regular,
Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
},
{
Name: "tests/at",
Mode: filemode.Submodule,
Hash: plumbing.NewHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
},
},
}

require.NoError(t, storer.SetIndex(idx))

// Create the empty directory that the bogus submodule leaves behind.
submoduleDir := filepath.Join(repoDir, "tests/at")
require.NoError(t, memFS.MkdirAll(submoduleDir, fileperms.PublicDir))

Comment thread
dmcilvaney marked this conversation as resolved.
// Verify the directory exists before calling removeSubmoduleEntries.
dirExists, err := fileutils.Exists(memFS, submoduleDir)
require.NoError(t, err)
require.True(t, dirExists, "submodule directory should exist before removal")

// Act
err = removeSubmoduleEntries(memFS, repo, repoDir)
require.NoError(t, err)

// Assert: index should only have the regular file.
updatedIdx, err := storer.Index()
require.NoError(t, err)
require.Len(t, updatedIdx.Entries, 1)
assert.Equal(t, "regular-file.spec", updatedIdx.Entries[0].Name)
assert.Equal(t, filemode.Regular, updatedIdx.Entries[0].Mode)

// Assert: empty directory was removed.
dirExists, err = fileutils.Exists(memFS, submoduleDir)
require.NoError(t, err)
assert.False(t, dirExists, "submodule directory should be removed")
}

func TestRemoveSubmoduleEntries_NoOpWithoutSubmodules(t *testing.T) {
const repoDir = "/fakerepo"

memFS := afero.NewMemMapFs()
storer := memory.NewStorage()

repo, err := gogit.Init(storer, nil)
require.NoError(t, err)

// Index with only normal entries.
idx := &index.Index{
Version: 2,
Entries: []*index.Entry{
{
Name: "file-a.spec",
Mode: filemode.Regular,
Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
},
{
Name: "file-b.patch",
Mode: filemode.Regular,
Hash: plumbing.NewHash("cccccccccccccccccccccccccccccccccccccccc"),
},
},
}

require.NoError(t, storer.SetIndex(idx))

err = removeSubmoduleEntries(memFS, repo, repoDir)
require.NoError(t, err)

// Index should be untouched.
updatedIdx, err := storer.Index()
require.NoError(t, err)
require.Len(t, updatedIdx.Entries, 2)
}

func TestRemoveSubmoduleEntries_PreservesNormalEntriesWithMixedModes(t *testing.T) {
const repoDir = "/fakerepo"

memFS := afero.NewMemMapFs()
storer := memory.NewStorage()

repo, err := gogit.Init(storer, nil)
require.NoError(t, err)

// Mix of regular files, executable, and submodule entries.
idx := &index.Index{
Version: 2,
Entries: []*index.Entry{
{
Name: "build.sh",
Mode: filemode.Executable,
Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
},
{
Name: "tests/submod1",
Mode: filemode.Submodule,
Hash: plumbing.NewHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
},
{
Name: "pkg.spec",
Mode: filemode.Regular,
Hash: plumbing.NewHash("cccccccccccccccccccccccccccccccccccccccc"),
},
{
Name: "tests/submod2",
Mode: filemode.Submodule,
Hash: plumbing.NewHash("dddddddddddddddddddddddddddddddddddddddd"),
},
},
}

require.NoError(t, storer.SetIndex(idx))

// Create empty dirs for both submodules.
require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod1"), fileperms.PublicDir))
require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod2"), fileperms.PublicDir))

err = removeSubmoduleEntries(memFS, repo, repoDir)
require.NoError(t, err)

updatedIdx, err := storer.Index()
require.NoError(t, err)
require.Len(t, updatedIdx.Entries, 2)
assert.Equal(t, "build.sh", updatedIdx.Entries[0].Name)
assert.Equal(t, "pkg.spec", updatedIdx.Entries[1].Name)
}
Loading