-
Notifications
You must be signed in to change notification settings - Fork 237
Layered analysis for single version packages #248
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,9 @@ import ( | |
"github.com/sirupsen/logrus" | ||
) | ||
|
||
//APT package database location | ||
const dpkgStatusFile string = "var/lib/dpkg/status" | ||
|
||
type AptAnalyzer struct { | ||
} | ||
|
||
|
@@ -53,7 +56,7 @@ func (a AptAnalyzer) getPackages(image pkgutil.Image) (map[string]util.PackageIn | |
// invalid image directory path | ||
return packages, err | ||
} | ||
statusFile := filepath.Join(path, "var/lib/dpkg/status") | ||
statusFile := filepath.Join(path, dpkgStatusFile) | ||
if _, err := os.Stat(statusFile); err != nil { | ||
// status file does not exist in this layer | ||
return packages, nil | ||
|
@@ -120,3 +123,61 @@ func parseLine(text string, currPackage string, packages map[string]util.Package | |
} | ||
return currPackage | ||
} | ||
|
||
type AptLayerAnalyzer struct { | ||
} | ||
|
||
func (a AptLayerAnalyzer) Name() string { | ||
return "AptLayerAnalyzer" | ||
} | ||
|
||
// AptDiff compares the packages installed by apt-get. | ||
func (a AptLayerAnalyzer) Diff(image1, image2 pkgutil.Image) (util.Result, error) { | ||
diff, err := singleVersionLayerDiff(image1, image2, a) | ||
return diff, err | ||
} | ||
|
||
func (a AptLayerAnalyzer) Analyze(image pkgutil.Image) (util.Result, error) { | ||
analysis, err := singleVersionLayerAnalysis(image, a) | ||
return analysis, err | ||
} | ||
|
||
func (a AptLayerAnalyzer) getPackages(image pkgutil.Image) ([]map[string]util.PackageInfo, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this shares a lot of code with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure you are right. |
||
var packages []map[string]util.PackageInfo | ||
if _, err := os.Stat(image.FSPath); err != nil { | ||
// invalid image directory path | ||
return packages, err | ||
} | ||
statusFile := filepath.Join(image.FSPath, dpkgStatusFile) | ||
if _, err := os.Stat(statusFile); err != nil { | ||
// status file does not exist in this image | ||
return packages, nil | ||
} | ||
for _, layer := range image.Layers { | ||
layerPackages := make(map[string]util.PackageInfo) | ||
if _, err := os.Stat(layer.FSPath); err != nil { | ||
// invalid layer directory path | ||
return packages, err | ||
} | ||
statusFile := filepath.Join(layer.FSPath, dpkgStatusFile) | ||
if _, err := os.Stat(statusFile); err == nil { | ||
// this layer has a package database | ||
if file, err := os.Open(statusFile); err == nil { | ||
// make sure it gets closed | ||
defer file.Close() | ||
|
||
// create a new scanner and read the file line by line | ||
scanner := bufio.NewScanner(file) | ||
var currPackage string | ||
for scanner.Scan() { | ||
currPackage = parseLine(scanner.Text(), currPackage, layerPackages) | ||
} | ||
} else { | ||
return packages, err | ||
} | ||
} | ||
packages = append(packages, layerPackages) | ||
} | ||
|
||
return packages, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ import ( | |
|
||
pkgutil "github.com/GoogleContainerTools/container-diff/pkg/util" | ||
"github.com/GoogleContainerTools/container-diff/util" | ||
"github.com/sirupsen/logrus" | ||
) | ||
|
||
type MultiVersionPackageAnalyzer interface { | ||
|
@@ -33,6 +34,11 @@ type SingleVersionPackageAnalyzer interface { | |
Name() string | ||
} | ||
|
||
type SingleVersionPackageLayerAnalyzer interface { | ||
getPackages(image pkgutil.Image) ([]map[string]util.PackageInfo, error) | ||
Name() string | ||
} | ||
|
||
func multiVersionDiff(image1, image2 pkgutil.Image, differ MultiVersionPackageAnalyzer) (*util.MultiVersionPackageDiffResult, error) { | ||
pack1, err := differ.getPackages(image1) | ||
if err != nil { | ||
|
@@ -71,6 +77,43 @@ func singleVersionDiff(image1, image2 pkgutil.Image, differ SingleVersionPackage | |
}, nil | ||
} | ||
|
||
// singleVersionLayerDiff diffs the packages of each image layer by layer | ||
func singleVersionLayerDiff(image1, image2 pkgutil.Image, differ SingleVersionPackageLayerAnalyzer) (*util.SingleVersionPackageLayerDiffResult, error) { | ||
pack1, err := differ.getPackages(image1) | ||
if err != nil { | ||
return &util.SingleVersionPackageLayerDiffResult{}, err | ||
} | ||
pack2, err := differ.getPackages(image2) | ||
if err != nil { | ||
return &util.SingleVersionPackageLayerDiffResult{}, err | ||
} | ||
var pkgDiffs []util.PackageDiff | ||
|
||
// Go through each layer for image1 | ||
for i := range pack1 { | ||
if i >= len(pack2) { | ||
// Skip diff when there is no layer to compare with in image2 | ||
continue | ||
} | ||
|
||
pkgDiff := util.GetMapDiff(pack1[i], pack2[i]) | ||
pkgDiffs = append(pkgDiffs, pkgDiff) | ||
} | ||
|
||
if len(image1.Layers) != len(image2.Layers) { | ||
logrus.Infof("%s and %s have different number of layers, please consider using container-diff analyze to view the contents of each image in each layer", image1.Source, image2.Source) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So what does the output look like for this case? We should maybe consider either a) not running the diff at all, or b) trying to do something smart to figure out which layers "match up" between the images. I could also be convinced to just run it as normal, but I'm not sure how useful the output will be. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that this diff is likely to be useless unless you know in advance the images are somehow comparable and they are build using an equivalent procedure (same number of layers, layers organized in the same way, etc.). This is pretty unlikely to happen, agree. I included it for completeness and because this is the approach that is followed in the I understand the concerns, IMHO this is the same issue exposed in #239. As I already suggested in the mentioned issue, I believe that probably rather than trying find out some smart algorithm to choose the layers to diff it could be easier to just let user choose which layers to diff and let user determine which diff is actually meaningful or meaningless. I am also opened to just not perform this diff at all as you suggest in a). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should just go ahead and not run it here, the output will likely just be meaningless so I'd rather not add functionality that can be potentially confusing. I do like the idea of letting the user choose the specific layers to diff as an alternative though, I'll comment on the other issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I fully agree. So for now I'll modify it to not run the diff here and in case #239 we would be still on time to discuss again if it makes sense to run this diff or not. |
||
} | ||
|
||
return &util.SingleVersionPackageLayerDiffResult{ | ||
Image1: image1.Source, | ||
Image2: image2.Source, | ||
DiffType: strings.TrimSuffix(differ.Name(), "Analyzer"), | ||
Diff: util.PackageLayerDiff{ | ||
PackageDiffs: pkgDiffs, | ||
}, | ||
}, nil | ||
} | ||
|
||
func multiVersionAnalysis(image pkgutil.Image, analyzer MultiVersionPackageAnalyzer) (*util.MultiVersionPackageAnalyzeResult, error) { | ||
pack, err := analyzer.getPackages(image) | ||
if err != nil { | ||
|
@@ -98,3 +141,39 @@ func singleVersionAnalysis(image pkgutil.Image, analyzer SingleVersionPackageAna | |
} | ||
return &analysis, nil | ||
} | ||
|
||
// singleVersionLayerAnalysis returns the packages included, deleted or | ||
// updated in each layer | ||
func singleVersionLayerAnalysis(image pkgutil.Image, analyzer SingleVersionPackageLayerAnalyzer) (*util.SingleVersionPackageLayerAnalyzeResult, error) { | ||
pack, err := analyzer.getPackages(image) | ||
if err != nil { | ||
return &util.SingleVersionPackageLayerAnalyzeResult{}, err | ||
} | ||
var pkgDiffs []util.PackageDiff | ||
|
||
// Each layer with modified packages includes a complete list of packages | ||
// in its package database. Thus we diff the current layer with the | ||
// previous one. Not all layers may include differences in packages, those | ||
// are omitted. | ||
preInd := -1 | ||
for i := range pack { | ||
var pkgDiff util.PackageDiff | ||
if preInd < 0 && len(pack[i]) > 0 { | ||
pkgDiff = util.GetMapDiff(make(map[string]util.PackageInfo), pack[i]) | ||
preInd = i | ||
} else if preInd >= 0 && len(pack[i]) > 0 { | ||
pkgDiff = util.GetMapDiff(pack[preInd], pack[i]) | ||
preInd = i | ||
} | ||
|
||
pkgDiffs = append(pkgDiffs, pkgDiff) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Could you explain this a bit more? Are you saying that each layer with modified packages contains all packages installed in the previous layer, so diffing those two gives you only the packages modified between layer I think this block would also be a little more clear if it looked something like for i := range pack {
if len(pack[i] == 0) {
continue
}
if i == 0 {
pkgDiffs = append(pkgDiffs, util.GetMapDiff(make(map[string]util.PackageInfo), pack[i]))
} else {
pkgDiffs = append(pkgDiffs, util.GetMapDiff(pack[i-1], pack[i]))
}
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, I haven't coded it as you suggest because we can't expect to have different packages in each layer. Image there are some layers that do not modify the package database, because they configure files or run some scripts or whatever; for those layers the reuslt of the getPackages is an empty map. I don't want to diff those empty maps with any other layer that actually modifies the package database, because the result will be the same as treating any package (regardless the layer in which they were appended) as a new addition. So what I do here to compare only layers which actually include a modified package database. That means the diff is not with the immediate previous layer, but with the previous layer that actually includes some package database. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh ok, that makes sense now. Could you just update your comment slightly to explain that it's not the previous layer, but the previous layer that contains a package db? |
||
|
||
return &util.SingleVersionPackageLayerAnalyzeResult{ | ||
Image: image.Source, | ||
AnalyzeType: strings.TrimSuffix(analyzer.Name(), "Analyzer"), | ||
Analysis: util.PackageLayerDiff{ | ||
PackageDiffs: pkgDiffs, | ||
}, | ||
}, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should keep this the way it was, this won't match up with the logic to retrieve the analyzers from the map in
checkIfValidAnalyzer()
. I'm guessing you did this so we could allow--types layers
? We could add that as a valid string as well, but this would also match--types asdfasdfbnlayer
which we probably shouldn't do.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well I included it to distinguish the Analyzers that require layer extraction from the ones that don't require it. I noticed the layers were only extracted for the
layer
analyzer, thus I though that enabling the layer extraction for any analyzer includinglayer
in its name could be a generic way to distinguish them from the rest in a simple string test. Obviously that assumes that any analyzer requiring extracted layers is named using the.*layer.*
pattern. At the time of writing it I was having in mindlayer
,aptlayer
andrpmlayer
analyzers. After allcheckIfValidAnalyzer
runs first, thus at the time of runningincludeLayers
we can realy on having an already valid list of types to check if there is one requiring layers extraction.It would be also fine for me to use a static list of types that do require layer extraction.