diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 71ef5681..4a9ac83d 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -8,7 +8,6 @@ import ( "errors" "fmt" "log/slog" - "os" "path/filepath" "slices" "strings" @@ -16,6 +15,7 @@ import ( "unicode" gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing/filemode" "github.com/go-git/go-git/v5/plumbing/object" "github.com/microsoft/azure-linux-dev-tools/internal/app/azldev/core/components" "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" @@ -396,13 +396,12 @@ func (p *sourcePreparerImpl) trySyntheticHistory( gitDirPath := filepath.Join(sourcesDirPath, ".git") - _, statErr := os.Stat(gitDirPath) - - if statErr != nil && !os.IsNotExist(statErr) { - return fmt.Errorf("failed to check for .git directory at %#q:\n%w", gitDirPath, statErr) + gitDirExists, err := fileutils.Exists(p.fs, gitDirPath) + if err != nil { + return fmt.Errorf("failed to check for .git directory at %#q:\n%w", gitDirPath, err) } - if os.IsNotExist(statErr) { + if !gitDirExists { slog.Info("No .git directory in sources; initializing repository", "component", componentName) @@ -417,6 +416,12 @@ func (p *sourcePreparerImpl) trySyntheticHistory( return fmt.Errorf("failed to open sources repository at %#q:\n%w", sourcesDirPath, err) } + // Strip bogus submodule entries from the index before staging. Some upstream + // repos have gitlink entries without .gitmodules that break go-git's staging. + if err := removeSubmoduleEntries(p.fs, sourcesRepo, sourcesDirPath); err != nil { + return fmt.Errorf("failed to remove submodule entries:\n%w", err) + } + if err := CommitInterleavedHistory(sourcesRepo, changes, importCommit); err != nil { return fmt.Errorf("failed to commit synthetic history:\n%w", err) } @@ -424,6 +429,55 @@ func (p *sourcePreparerImpl) trySyntheticHistory( return nil } +// removeSubmoduleEntries strips gitlink (mode 160000) entries from the repository +// index and removes the corresponding empty directories from disk. Some upstream +// dist-git repositories (e.g., Fedora's "at" package) contain bogus submodule +// references — gitlink entries without a .gitmodules file — that leave empty +// directories after cloning. go-git's [gogit.Worktree.AddWithOptions] fails when +// it encounters these because it tries to read the directory path as a file. +func removeSubmoduleEntries(fs opctx.FS, repo *gogit.Repository, repoDir string) error { + idx, err := repo.Storer.Index() + if err != nil { + return fmt.Errorf("failed to read git index:\n%w", err) + } + + originalLen := len(idx.Entries) + kept := 0 + + for _, entry := range idx.Entries { + if entry.Mode == filemode.Submodule { + slog.Info("Removing bogus submodule entry from index", + "path", entry.Name) + + // Remove the directory left by the uninitialized submodule. + if removeErr := fs.RemoveAll(filepath.Join(repoDir, entry.Name)); removeErr != nil { + slog.Warn("Failed to remove submodule directory", + "path", entry.Name, "error", removeErr) + } + + continue + } + + idx.Entries[kept] = entry + kept++ + } + + if kept == originalLen { + return nil + } + + idx.Entries = idx.Entries[:kept] + + if err := repo.Storer.SetIndex(idx); err != nil { + return fmt.Errorf("failed to update git index:\n%w", err) + } + + slog.Info("Removed submodule entries from git index", + "count", originalLen-kept) + + return nil +} + // DiffSources implements the [SourcePreparer] interface. // It fetches the component's sources once, copies them to a second directory, applies overlays // to the copy, then diffs the two trees. This avoids fetching the sources twice. diff --git a/internal/app/azldev/core/sources/sourceprep_internal_test.go b/internal/app/azldev/core/sources/sourceprep_internal_test.go new file mode 100644 index 00000000..491c3a49 --- /dev/null +++ b/internal/app/azldev/core/sources/sourceprep_internal_test.go @@ -0,0 +1,165 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "path/filepath" + "testing" + + gogit "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/filemode" + "github.com/go-git/go-git/v5/plumbing/format/index" + "github.com/go-git/go-git/v5/storage/memory" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRemoveSubmoduleEntries_StripsGitlinks(t *testing.T) { + const repoDir = "/fakerepo" + + memFS := afero.NewMemMapFs() + storer := memory.NewStorage() + + // Initialize a repo with in-memory storage only; this test exercises the + // index/storer and uses memFS separately for directory cleanup assertions. + repo, err := gogit.Init(storer, nil) + require.NoError(t, err) + + // Manually build an index with a normal file entry and a submodule entry. + idx := &index.Index{ + Version: 2, + Entries: []*index.Entry{ + { + Name: "regular-file.spec", + Mode: filemode.Regular, + Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + }, + { + Name: "tests/at", + Mode: filemode.Submodule, + Hash: plumbing.NewHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + }, + }, + } + + require.NoError(t, storer.SetIndex(idx)) + + // Create the empty directory that the bogus submodule leaves behind. + submoduleDir := filepath.Join(repoDir, "tests/at") + require.NoError(t, memFS.MkdirAll(submoduleDir, fileperms.PublicDir)) + + // Verify the directory exists before calling removeSubmoduleEntries. + dirExists, err := fileutils.Exists(memFS, submoduleDir) + require.NoError(t, err) + require.True(t, dirExists, "submodule directory should exist before removal") + + // Act + err = removeSubmoduleEntries(memFS, repo, repoDir) + require.NoError(t, err) + + // Assert: index should only have the regular file. + updatedIdx, err := storer.Index() + require.NoError(t, err) + require.Len(t, updatedIdx.Entries, 1) + assert.Equal(t, "regular-file.spec", updatedIdx.Entries[0].Name) + assert.Equal(t, filemode.Regular, updatedIdx.Entries[0].Mode) + + // Assert: empty directory was removed. + dirExists, err = fileutils.Exists(memFS, submoduleDir) + require.NoError(t, err) + assert.False(t, dirExists, "submodule directory should be removed") +} + +func TestRemoveSubmoduleEntries_NoOpWithoutSubmodules(t *testing.T) { + const repoDir = "/fakerepo" + + memFS := afero.NewMemMapFs() + storer := memory.NewStorage() + + repo, err := gogit.Init(storer, nil) + require.NoError(t, err) + + // Index with only normal entries. + idx := &index.Index{ + Version: 2, + Entries: []*index.Entry{ + { + Name: "file-a.spec", + Mode: filemode.Regular, + Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + }, + { + Name: "file-b.patch", + Mode: filemode.Regular, + Hash: plumbing.NewHash("cccccccccccccccccccccccccccccccccccccccc"), + }, + }, + } + + require.NoError(t, storer.SetIndex(idx)) + + err = removeSubmoduleEntries(memFS, repo, repoDir) + require.NoError(t, err) + + // Index should be untouched. + updatedIdx, err := storer.Index() + require.NoError(t, err) + require.Len(t, updatedIdx.Entries, 2) +} + +func TestRemoveSubmoduleEntries_PreservesNormalEntriesWithMixedModes(t *testing.T) { + const repoDir = "/fakerepo" + + memFS := afero.NewMemMapFs() + storer := memory.NewStorage() + + repo, err := gogit.Init(storer, nil) + require.NoError(t, err) + + // Mix of regular files, executable, and submodule entries. + idx := &index.Index{ + Version: 2, + Entries: []*index.Entry{ + { + Name: "build.sh", + Mode: filemode.Executable, + Hash: plumbing.NewHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"), + }, + { + Name: "tests/submod1", + Mode: filemode.Submodule, + Hash: plumbing.NewHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"), + }, + { + Name: "pkg.spec", + Mode: filemode.Regular, + Hash: plumbing.NewHash("cccccccccccccccccccccccccccccccccccccccc"), + }, + { + Name: "tests/submod2", + Mode: filemode.Submodule, + Hash: plumbing.NewHash("dddddddddddddddddddddddddddddddddddddddd"), + }, + }, + } + + require.NoError(t, storer.SetIndex(idx)) + + // Create empty dirs for both submodules. + require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod1"), fileperms.PublicDir)) + require.NoError(t, memFS.MkdirAll(filepath.Join(repoDir, "tests/submod2"), fileperms.PublicDir)) + + err = removeSubmoduleEntries(memFS, repo, repoDir) + require.NoError(t, err) + + updatedIdx, err := storer.Index() + require.NoError(t, err) + require.Len(t, updatedIdx.Entries, 2) + assert.Equal(t, "build.sh", updatedIdx.Entries[0].Name) + assert.Equal(t, "pkg.spec", updatedIdx.Entries[1].Name) +}