Skip to content
This repository was archived by the owner on Mar 27, 2024. It is now read-only.

Layered analysis for single version packages #248

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ func getExtractPathForName(name string) (string, error) {

func includeLayers() bool {
for _, t := range types {
if t == "layer" {
if strings.Contains(t, "layer") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should keep this the way it was, this won't match up with the logic to retrieve the analyzers from the map in checkIfValidAnalyzer(). I'm guessing you did this so we could allow --types layers? We could add that as a valid string as well, but this would also match --types asdfasdfbnlayer which we probably shouldn't do.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well I included it to distinguish the Analyzers that require layer extraction from the ones that don't require it. I noticed the layers were only extracted for the layer analyzer, thus I though that enabling the layer extraction for any analyzer including layer in its name could be a generic way to distinguish them from the rest in a simple string test. Obviously that assumes that any analyzer requiring extracted layers is named using the .*layer.* pattern. At the time of writing it I was having in mind layer, aptlayer and rpmlayer analyzers. After all checkIfValidAnalyzer runs first, thus at the time of running includeLayers we can realy on having an already valid list of types to check if there is one requiring layers extraction.

It would be also fine for me to use a static list of types that do require layer extraction.

return true
}
}
Expand Down
63 changes: 62 additions & 1 deletion differs/apt_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ import (
"github.com/sirupsen/logrus"
)

//APT package database location
const dpkgStatusFile string = "var/lib/dpkg/status"

type AptAnalyzer struct {
}

Expand All @@ -53,7 +56,7 @@ func (a AptAnalyzer) getPackages(image pkgutil.Image) (map[string]util.PackageIn
// invalid image directory path
return packages, err
}
statusFile := filepath.Join(path, "var/lib/dpkg/status")
statusFile := filepath.Join(path, dpkgStatusFile)
if _, err := os.Stat(statusFile); err != nil {
// status file does not exist in this layer
return packages, nil
Expand Down Expand Up @@ -120,3 +123,61 @@ func parseLine(text string, currPackage string, packages map[string]util.Package
}
return currPackage
}

type AptLayerAnalyzer struct {
}

func (a AptLayerAnalyzer) Name() string {
return "AptLayerAnalyzer"
}

// AptDiff compares the packages installed by apt-get.
func (a AptLayerAnalyzer) Diff(image1, image2 pkgutil.Image) (util.Result, error) {
diff, err := singleVersionLayerDiff(image1, image2, a)
return diff, err
}

func (a AptLayerAnalyzer) Analyze(image pkgutil.Image) (util.Result, error) {
analysis, err := singleVersionLayerAnalysis(image, a)
return analysis, err
}

func (a AptLayerAnalyzer) getPackages(image pkgutil.Image) ([]map[string]util.PackageInfo, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this shares a lot of code with AptAnalyzer.getPackages() in apt_diff.go. Could you pull this out into a shared method between the two implementations?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure you are right.

var packages []map[string]util.PackageInfo
if _, err := os.Stat(image.FSPath); err != nil {
// invalid image directory path
return packages, err
}
statusFile := filepath.Join(image.FSPath, dpkgStatusFile)
if _, err := os.Stat(statusFile); err != nil {
// status file does not exist in this image
return packages, nil
}
for _, layer := range image.Layers {
layerPackages := make(map[string]util.PackageInfo)
if _, err := os.Stat(layer.FSPath); err != nil {
// invalid layer directory path
return packages, err
}
statusFile := filepath.Join(layer.FSPath, dpkgStatusFile)
if _, err := os.Stat(statusFile); err == nil {
// this layer has a package database
if file, err := os.Open(statusFile); err == nil {
// make sure it gets closed
defer file.Close()

// create a new scanner and read the file line by line
scanner := bufio.NewScanner(file)
var currPackage string
for scanner.Scan() {
currPackage = parseLine(scanner.Text(), currPackage, layerPackages)
}
} else {
return packages, err
}
}
packages = append(packages, layerPackages)
}

return packages, nil
}
1 change: 1 addition & 0 deletions differs/differs.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ var Analyzers = map[string]Analyzer{
"file": FileAnalyzer{},
"layer": FileLayerAnalyzer{},
"apt": AptAnalyzer{},
"aptlayer": AptLayerAnalyzer{},
"rpm": RPMAnalyzer{},
"pip": PipAnalyzer{},
"node": NodeAnalyzer{},
Expand Down
79 changes: 79 additions & 0 deletions differs/package_differs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

pkgutil "github.com/GoogleContainerTools/container-diff/pkg/util"
"github.com/GoogleContainerTools/container-diff/util"
"github.com/sirupsen/logrus"
)

type MultiVersionPackageAnalyzer interface {
Expand All @@ -33,6 +34,11 @@ type SingleVersionPackageAnalyzer interface {
Name() string
}

type SingleVersionPackageLayerAnalyzer interface {
getPackages(image pkgutil.Image) ([]map[string]util.PackageInfo, error)
Name() string
}

func multiVersionDiff(image1, image2 pkgutil.Image, differ MultiVersionPackageAnalyzer) (*util.MultiVersionPackageDiffResult, error) {
pack1, err := differ.getPackages(image1)
if err != nil {
Expand Down Expand Up @@ -71,6 +77,43 @@ func singleVersionDiff(image1, image2 pkgutil.Image, differ SingleVersionPackage
}, nil
}

// singleVersionLayerDiff diffs the packages of each image layer by layer
func singleVersionLayerDiff(image1, image2 pkgutil.Image, differ SingleVersionPackageLayerAnalyzer) (*util.SingleVersionPackageLayerDiffResult, error) {
pack1, err := differ.getPackages(image1)
if err != nil {
return &util.SingleVersionPackageLayerDiffResult{}, err
}
pack2, err := differ.getPackages(image2)
if err != nil {
return &util.SingleVersionPackageLayerDiffResult{}, err
}
var pkgDiffs []util.PackageDiff

// Go through each layer for image1
for i := range pack1 {
if i >= len(pack2) {
// Skip diff when there is no layer to compare with in image2
continue
}

pkgDiff := util.GetMapDiff(pack1[i], pack2[i])
pkgDiffs = append(pkgDiffs, pkgDiff)
}

if len(image1.Layers) != len(image2.Layers) {
logrus.Infof("%s and %s have different number of layers, please consider using container-diff analyze to view the contents of each image in each layer", image1.Source, image2.Source)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So what does the output look like for this case? We should maybe consider either a) not running the diff at all, or b) trying to do something smart to figure out which layers "match up" between the images. I could also be convinced to just run it as normal, but I'm not sure how useful the output will be.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that this diff is likely to be useless unless you know in advance the images are somehow comparable and they are build using an equivalent procedure (same number of layers, layers organized in the same way, etc.). This is pretty unlikely to happen, agree. I included it for completeness and because this is the approach that is followed in the layer diff.

I understand the concerns, IMHO this is the same issue exposed in #239. As I already suggested in the mentioned issue, I believe that probably rather than trying find out some smart algorithm to choose the layers to diff it could be easier to just let user choose which layers to diff and let user determine which diff is actually meaningful or meaningless.

I am also opened to just not perform this diff at all as you suggest in a).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should just go ahead and not run it here, the output will likely just be meaningless so I'd rather not add functionality that can be potentially confusing. I do like the idea of letting the user choose the specific layers to diff as an alternative though, I'll comment on the other issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I fully agree. So for now I'll modify it to not run the diff here and in case #239 we would be still on time to discuss again if it makes sense to run this diff or not.

}

return &util.SingleVersionPackageLayerDiffResult{
Image1: image1.Source,
Image2: image2.Source,
DiffType: strings.TrimSuffix(differ.Name(), "Analyzer"),
Diff: util.PackageLayerDiff{
PackageDiffs: pkgDiffs,
},
}, nil
}

func multiVersionAnalysis(image pkgutil.Image, analyzer MultiVersionPackageAnalyzer) (*util.MultiVersionPackageAnalyzeResult, error) {
pack, err := analyzer.getPackages(image)
if err != nil {
Expand Down Expand Up @@ -98,3 +141,39 @@ func singleVersionAnalysis(image pkgutil.Image, analyzer SingleVersionPackageAna
}
return &analysis, nil
}

// singleVersionLayerAnalysis returns the packages included, deleted or
// updated in each layer
func singleVersionLayerAnalysis(image pkgutil.Image, analyzer SingleVersionPackageLayerAnalyzer) (*util.SingleVersionPackageLayerAnalyzeResult, error) {
pack, err := analyzer.getPackages(image)
if err != nil {
return &util.SingleVersionPackageLayerAnalyzeResult{}, err
}
var pkgDiffs []util.PackageDiff

// Each layer with modified packages includes a complete list of packages
// in its package database. Thus we diff the current layer with the
// previous one. Not all layers may include differences in packages, those
// are omitted.
preInd := -1
for i := range pack {
var pkgDiff util.PackageDiff
if preInd < 0 && len(pack[i]) > 0 {
pkgDiff = util.GetMapDiff(make(map[string]util.PackageInfo), pack[i])
preInd = i
} else if preInd >= 0 && len(pack[i]) > 0 {
pkgDiff = util.GetMapDiff(pack[preInd], pack[i])
preInd = i
}

pkgDiffs = append(pkgDiffs, pkgDiff)
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each layer with modified packages includes a complete list of packages in its package database. Thus we diff the current layer with the previous one.

Could you explain this a bit more? Are you saying that each layer with modified packages contains all packages installed in the previous layer, so diffing those two gives you only the packages modified between layer i and layer i-1?

I think this block would also be a little more clear if it looked something like

for i := range pack {
	if len(pack[i] == 0) {
		continue
	}
	if i == 0 {
		pkgDiffs = append(pkgDiffs, util.GetMapDiff(make(map[string]util.PackageInfo), pack[i]))
	} else {
		pkgDiffs = append(pkgDiffs, util.GetMapDiff(pack[i-1], pack[i]))
	}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I haven't coded it as you suggest because we can't expect to have different packages in each layer. Image there are some layers that do not modify the package database, because they configure files or run some scripts or whatever; for those layers the reuslt of the getPackages is an empty map. I don't want to diff those empty maps with any other layer that actually modifies the package database, because the result will be the same as treating any package (regardless the layer in which they were appended) as a new addition.

So what I do here to compare only layers which actually include a modified package database. That means the diff is not with the immediate previous layer, but with the previous layer that actually includes some package database.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh ok, that makes sense now. Could you just update your comment slightly to explain that it's not the previous layer, but the previous layer that contains a package db?


return &util.SingleVersionPackageLayerAnalyzeResult{
Image: image.Source,
AnalyzeType: strings.TrimSuffix(analyzer.Name(), "Analyzer"),
Analysis: util.PackageLayerDiff{
PackageDiffs: pkgDiffs,
},
}, nil
}
72 changes: 72 additions & 0 deletions util/analyze_output_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,78 @@ func (r SingleVersionPackageAnalyzeResult) OutputText(diffType string, format st
return TemplateOutputFromFormat(strResult, "SingleVersionPackageAnalyze", format)
}

type SingleVersionPackageLayerAnalyzeResult AnalyzeResult

func (r SingleVersionPackageLayerAnalyzeResult) OutputStruct() interface{} {
analysis, valid := r.Analysis.(PackageLayerDiff)
if !valid {
logrus.Error("Unexpected structure of Analysis. Should be of type PackageLayerDiff")
return fmt.Errorf("Could not output %s analysis result", r.AnalyzeType)
}

type PkgDiff struct {
Packages1 []PackageOutput
Packages2 []PackageOutput
InfoDiff []Info
}

var analysisOutput []PkgDiff
for _, d := range analysis.PackageDiffs {
diffOutput := PkgDiff{
Packages1: getSingleVersionPackageOutput(d.Packages1),
Packages2: getSingleVersionPackageOutput(d.Packages2),
InfoDiff: getSingleVersionInfoDiffOutput(d.InfoDiff),
}
analysisOutput = append(analysisOutput, diffOutput)
}

output := struct {
Image string
AnalyzeType string
Analysis []PkgDiff
}{
Image: r.Image,
AnalyzeType: r.AnalyzeType,
Analysis: analysisOutput,
}
return output
}

func (r SingleVersionPackageLayerAnalyzeResult) OutputText(diffType string, format string) error {
analysis, valid := r.Analysis.(PackageLayerDiff)
if !valid {
logrus.Error("Unexpected structure of Analysis. Should be of type PackageLayerDiff")
return fmt.Errorf("Could not output %s analysis result", r.AnalyzeType)
}

type StrDiff struct {
Packages1 []StrPackageOutput
Packages2 []StrPackageOutput
InfoDiff []StrInfo
}

var analysisOutput []StrDiff
for _, d := range analysis.PackageDiffs {
diffOutput := StrDiff{
Packages1: stringifyPackages(getSingleVersionPackageOutput(d.Packages1)),
Packages2: stringifyPackages(getSingleVersionPackageOutput(d.Packages2)),
InfoDiff: stringifyPackageDiff(getSingleVersionInfoDiffOutput(d.InfoDiff)),
}
analysisOutput = append(analysisOutput, diffOutput)
}

strResult := struct {
Image string
AnalyzeType string
Analysis []StrDiff
}{
Image: r.Image,
AnalyzeType: r.AnalyzeType,
Analysis: analysisOutput,
}
return TemplateOutputFromFormat(strResult, "SingleVersionPackageLayerAnalyze", format)
}

type PackageOutput struct {
Name string
Path string `json:",omitempty"`
Expand Down
66 changes: 66 additions & 0 deletions util/diff_output_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,72 @@ func getSingleVersionInfoDiffOutput(infoDiff []Info) []Info {
return infoDiff
}

type SingleVersionPackageLayerDiffResult DiffResult

func (r SingleVersionPackageLayerDiffResult) OutputStruct() interface{} {
diff, valid := r.Diff.(PackageLayerDiff)
if !valid {
logrus.Error("Unexpected structure of Diff. Should follow the PackageLayerDiff struct")
return fmt.Errorf("Could not output %s diff result", r.DiffType)
}

type PkgDiff struct {
Packages1 []PackageOutput
Packages2 []PackageOutput
InfoDiff []Info
}

var diffOutputs []PkgDiff
for _, d := range diff.PackageDiffs {
diffOutput := PkgDiff{
Packages1: getSingleVersionPackageOutput(d.Packages1),
Packages2: getSingleVersionPackageOutput(d.Packages2),
InfoDiff: getSingleVersionInfoDiffOutput(d.InfoDiff),
}
diffOutputs = append(diffOutputs, diffOutput)
}

r.Diff = diffOutputs
return r
}

func (r SingleVersionPackageLayerDiffResult) OutputText(diffType string, format string) error {
diff, valid := r.Diff.(PackageLayerDiff)
if !valid {
logrus.Error("Unexpected structure of Diff. Should follow the PackageLayerDiff struct")
return fmt.Errorf("Could not output %s diff result", r.DiffType)
}

type StrDiff struct {
Packages1 []StrPackageOutput
Packages2 []StrPackageOutput
InfoDiff []StrInfo
}

var diffOutputs []StrDiff
for _, d := range diff.PackageDiffs {
diffOutput := StrDiff{
Packages1: stringifyPackages(getSingleVersionPackageOutput(d.Packages1)),
Packages2: stringifyPackages(getSingleVersionPackageOutput(d.Packages2)),
InfoDiff: stringifyPackageDiff(getSingleVersionInfoDiffOutput(d.InfoDiff)),
}
diffOutputs = append(diffOutputs, diffOutput)
}

strResult := struct {
Image1 string
Image2 string
DiffType string
Diff []StrDiff
}{
Image1: r.Image1,
Image2: r.Image2,
DiffType: r.DiffType,
Diff: diffOutputs,
}
return TemplateOutputFromFormat(strResult, "SingleVersionPackageLayerDiff", format)
}

type HistDiffResult DiffResult

func (r HistDiffResult) OutputStruct() interface{} {
Expand Down
26 changes: 14 additions & 12 deletions util/format_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,20 @@ import (
)

var templates = map[string]string{
"SingleVersionPackageDiff": SingleVersionDiffOutput,
"MultiVersionPackageDiff": MultiVersionDiffOutput,
"HistDiff": HistoryDiffOutput,
"MetadataDiff": MetadataDiffOutput,
"DirDiff": FSDiffOutput,
"MultipleDirDiff": FSLayerDiffOutput,
"FilenameDiff": FilenameDiffOutput,
"ListAnalyze": ListAnalysisOutput,
"FileAnalyze": FileAnalysisOutput,
"FileLayerAnalyze": FileLayerAnalysisOutput,
"MultiVersionPackageAnalyze": MultiVersionPackageOutput,
"SingleVersionPackageAnalyze": SingleVersionPackageOutput,
"SingleVersionPackageDiff": SingleVersionDiffOutput,
"SingleVersionPackageLayerDiff": SingleVersionLayerDiffOutput,
"MultiVersionPackageDiff": MultiVersionDiffOutput,
"HistDiff": HistoryDiffOutput,
"MetadataDiff": MetadataDiffOutput,
"DirDiff": FSDiffOutput,
"MultipleDirDiff": FSLayerDiffOutput,
"FilenameDiff": FilenameDiffOutput,
"ListAnalyze": ListAnalysisOutput,
"FileAnalyze": FileAnalysisOutput,
"FileLayerAnalyze": FileLayerAnalysisOutput,
"MultiVersionPackageAnalyze": MultiVersionPackageOutput,
"SingleVersionPackageAnalyze": SingleVersionPackageOutput,
"SingleVersionPackageLayerAnalyze": SingleVersionPackageLayerOutput,
}

func JSONify(diff interface{}) error {
Expand Down
6 changes: 6 additions & 0 deletions util/package_diff_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ type PackageDiff struct {
InfoDiff []Info
}

// PackageLayerDiff stores the difference information between two images
// layer by layer in PackageDiff array
type PackageLayerDiff struct {
PackageDiffs []PackageDiff
}

// Info stores the information for one package in two different images.
type Info struct {
Package string
Expand Down
Loading