Skip to content

Commit fbc52bc

Browse files
authored
Replace apache download tasks with custom ivy repository. (#1438)
Hadoop ecosystem tarballs are hosted on Apache's download mirrors only instead of in maven repositories. Previously we were downloading these artifacts from a round robin of mirror endpoints, using securely obtained hashes to validate the downloaded files even from non HTTPS sources. This PR removes those download tasks, replacing it with a custom Ivy repository backed by an HTTPS enabled apache mirror. This allows us to ensure the artifacts are securely downloaded and cached by Gradle instead of relying on task outputs like the old mirror download task.
1 parent f9fc240 commit fbc52bc

File tree

10 files changed

+215
-505
lines changed

10 files changed

+215
-505
lines changed

buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/HadoopClusterFormationTasks.groovy

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,12 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfigur
2828
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration
2929
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.RoleConfiguration
3030
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration
31-
import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload
32-
import org.elasticsearch.hadoop.gradle.tasks.VerifyChecksums
3331
import org.gradle.api.AntBuilder
3432
import org.gradle.api.DefaultTask
3533
import org.gradle.api.GradleException
3634
import org.gradle.api.Project
3735
import org.gradle.api.Task
36+
import org.gradle.api.artifacts.Configuration
3837
import org.gradle.api.logging.Logger
3938
import org.gradle.api.tasks.Copy
4039
import org.gradle.api.tasks.Delete
@@ -59,14 +58,6 @@ class HadoopClusterFormationTasks {
5958
Task stopTask
6059
}
6160

62-
/**
63-
* Pairing of download and verification tasks for a distribution
64-
*/
65-
static class DistributionTasks {
66-
ApacheMirrorDownload download
67-
VerifyChecksums verify
68-
}
69-
7061
/**
7162
* Adds dependent tasks to the given task to start and stop a cluster with the given configuration.
7263
* <p>
@@ -104,7 +95,7 @@ class HadoopClusterFormationTasks {
10495
for (ServiceConfiguration serviceConfiguration : clusterConfiguration.getServices()) {
10596

10697
// Get the download task for this service's package and add it to the service's dependency tasks
107-
DistributionTasks distributionTasks = getOrConfigureDistributionDownload(project, serviceConfiguration)
98+
Configuration distributionConfiguration = getOrConfigureDistributionDownload(project, serviceConfiguration)
10899

109100
// Keep track of the start tasks in this service
110101
List<TaskPair> serviceTaskPairs = []
@@ -140,7 +131,7 @@ class HadoopClusterFormationTasks {
140131
TaskPair instanceTasks
141132
try {
142133
instanceTasks = configureNode(project, prefix, instanceDependencies, instanceInfo,
143-
distributionTasks)
134+
distributionConfiguration)
144135
} catch (Exception e) {
145136
throw new GradleException(
146137
"Exception occurred while initializing instance [${instanceInfo.toString()}]", e)
@@ -207,36 +198,21 @@ class HadoopClusterFormationTasks {
207198
* either an already created one from the root project, or a newly created download task. These also contain the
208199
* verify task to ensure the download has been securely captured.
209200
*/
210-
static DistributionTasks getOrConfigureDistributionDownload(Project project, ServiceConfiguration serviceConfiguration) {
201+
static Configuration getOrConfigureDistributionDownload(Project project, ServiceConfiguration serviceConfiguration) {
211202
Version serviceVersion = serviceConfiguration.getVersion()
212203

213-
String downloadTaskName = "download${serviceConfiguration.serviceDescriptor.packageName().capitalize()}#${serviceVersion}"
214-
String verifyTaskName = "verify${serviceConfiguration.serviceDescriptor.packageName().capitalize()}#${serviceVersion}"
215-
216-
ApacheMirrorDownload downloadTask = project.rootProject.tasks.findByName(downloadTaskName) as ApacheMirrorDownload
217-
if (downloadTask == null) {
218-
downloadTask = project.rootProject.tasks.create(name: downloadTaskName, type: ApacheMirrorDownload) as ApacheMirrorDownload
219-
serviceConfiguration.getServiceDescriptor().configureDownload(downloadTask, serviceConfiguration)
220-
downloadTask.group = 'downloads'
221-
downloadTask.onlyIf { !downloadTask.outputFile().exists() }
222-
}
223-
224-
VerifyChecksums verifyTask = project.rootProject.tasks.findByName(verifyTaskName) as VerifyChecksums
225-
if (verifyTask == null) {
226-
verifyTask = project.rootProject.tasks.create(name: verifyTaskName, type: VerifyChecksums) as VerifyChecksums
227-
verifyTask.group = 'downloads'
228-
verifyTask.dependsOn downloadTask
229-
verifyTask.inputFile downloadTask.outputFile()
230-
for (Map.Entry<String, String> hash : serviceConfiguration.serviceDescriptor.packageHashVerification(serviceVersion)) {
231-
verifyTask.checksum hash.key, hash.value
232-
}
204+
String configurationName = "download${serviceConfiguration.serviceDescriptor.packageName().capitalize()}#${serviceVersion}"
205+
Configuration configuration = project.configurations.findByName(configurationName)
206+
if (configuration == null) {
207+
configuration = project.configurations.create(configurationName)
208+
project.dependencies.add(configurationName, serviceConfiguration.getServiceDescriptor().getDependencyCoordinates(serviceConfiguration))
233209
}
234210

235-
return new DistributionTasks(download: downloadTask, verify: verifyTask)
211+
return configuration
236212
}
237213

238214
static TaskPair configureNode(Project project, String prefix, Object dependsOn, InstanceInfo node,
239-
DistributionTasks distribution) {
215+
Configuration distributionConfiguration) {
240216
Task setup = project.tasks.create(name: taskName(prefix, node, 'clean'), type: Delete, dependsOn: dependsOn) {
241217
delete node.homeDir
242218
delete node.cwd
@@ -257,7 +233,7 @@ class HadoopClusterFormationTasks {
257233
}
258234

259235
// Always extract the package contents, and configure the files
260-
setup = configureExtractTask(taskName(prefix, node, 'extract'), project, setup, node, distribution)
236+
setup = configureExtractTask(taskName(prefix, node, 'extract'), project, setup, node, distributionConfiguration)
261237
setup = configureWriteConfigTask(taskName(prefix, node, 'configure'), project, setup, node)
262238
setup = configureExtraConfigFilesTask(taskName(prefix, node, 'extraConfig'), project, setup, node)
263239

@@ -329,13 +305,13 @@ class HadoopClusterFormationTasks {
329305
return setup
330306
}
331307

332-
static Task configureExtractTask(String name, Project project, Task setup, InstanceInfo node, DistributionTasks distribution) {
333-
List extractDependsOn = [distribution.verify, setup]
308+
static Task configureExtractTask(String name, Project project, Task setup, InstanceInfo node, Configuration distributionConfiguration) {
309+
List extractDependsOn = [distributionConfiguration, setup]
334310
return project.tasks.create(name: name, type: Copy, dependsOn: extractDependsOn) {
335311
group = 'hadoopFixture'
336312
// TODO: Switch logic if a service is ever not a tar distribution
337313
from {
338-
project.tarTree(project.resources.gzip(distribution.download.outputFile()))
314+
project.tarTree(project.resources.gzip(distributionConfiguration.files.first()))
339315
}
340316
into node.baseDir
341317
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.hadoop.gradle.fixture.hadoop
21+
22+
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfiguration
23+
import org.gradle.api.Action
24+
import org.gradle.api.NamedDomainObjectContainer
25+
import org.gradle.api.NamedDomainObjectFactory
26+
import org.gradle.api.Plugin
27+
import org.gradle.api.Project
28+
import org.gradle.api.artifacts.dsl.RepositoryHandler
29+
import org.gradle.api.artifacts.repositories.IvyArtifactRepository
30+
import org.gradle.api.artifacts.repositories.IvyPatternRepositoryLayout
31+
import org.gradle.api.publish.ivy.IvyArtifact
32+
33+
class HadoopFixturePlugin implements Plugin<Project> {
34+
35+
private static final String APACHE_MIRROR = "https://apache.osuosl.org/"
36+
37+
static class HadoopFixturePluginExtension {
38+
private NamedDomainObjectContainer<HadoopClusterConfiguration> clusters
39+
40+
HadoopFixturePluginExtension(final Project project) {
41+
this.clusters = project.container(HadoopClusterConfiguration.class, new NamedDomainObjectFactory<HadoopClusterConfiguration>() {
42+
@Override
43+
HadoopClusterConfiguration create(String name) {
44+
return new HadoopClusterConfiguration(project, name)
45+
}
46+
})
47+
}
48+
49+
HadoopClusterConfiguration cluster(String name, Closure config) {
50+
clusters.maybeCreate(name)
51+
return clusters.getByName(name, config)
52+
}
53+
54+
NamedDomainObjectContainer<HadoopClusterConfiguration> getClusters() {
55+
return clusters
56+
}
57+
}
58+
59+
@Override
60+
void apply(Project project) {
61+
HadoopFixturePluginExtension extension = project.getExtensions().create("hadoop", HadoopFixturePluginExtension.class, project)
62+
configureApacheMirrorRepository(project)
63+
project.afterEvaluate {
64+
extension.getClusters().forEach { config ->
65+
// Finish cluster setup
66+
HadoopClusterFormationTasks.setup(project, config)
67+
}
68+
}
69+
}
70+
71+
private static configureApacheMirrorRepository(Project project) {
72+
RepositoryHandler repositoryHandler = project.getRepositories()
73+
repositoryHandler.add(repositoryHandler.ivy({IvyArtifactRepository ivyArtifactRepository ->
74+
ivyArtifactRepository.setUrl(APACHE_MIRROR)
75+
ivyArtifactRepository.patternLayout({IvyPatternRepositoryLayout ivyPatternRepositoryLayout ->
76+
// We use this pattern normally and break the regular tradition of a strictly numerical version
77+
// because Hive does not provide a reasonable artifact name that makes a more robust pattern
78+
// reasonable (it has a very unorthodox layout)
79+
ivyPatternRepositoryLayout.artifact("[organization]/[module]/[revision].[ext]")
80+
ivyPatternRepositoryLayout.setM2compatible(true)
81+
})
82+
ivyArtifactRepository.metadataSources({IvyArtifactRepository.MetadataSources metadataSources ->
83+
metadataSources.artifact()
84+
})
85+
}))
86+
}
87+
}

buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/ServiceDescriptor.groovy

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ package org.elasticsearch.hadoop.gradle.fixture.hadoop
2222
import org.elasticsearch.gradle.Version
2323
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration
2424
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration
25-
import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload
2625

2726
import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings
2827

@@ -60,9 +59,10 @@ interface ServiceDescriptor {
6059
Version defaultVersion()
6160

6261
/**
63-
* Callback to configure a download task to perform the package download.
62+
* The coordinates for this dependency that will be used with a custom Ivy Repository to download the artifact from
63+
* an Apache mirror.
6464
*/
65-
void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration)
65+
String getDependencyCoordinates(ServiceConfiguration configuration)
6666

6767
/**
6868
* The official apache package name for the artifact.
@@ -74,11 +74,6 @@ interface ServiceDescriptor {
7474
*/
7575
String artifactName(ServiceConfiguration configuration)
7676

77-
/**
78-
* Return a mapping of hash algorithm id to hash value for an artifact of the given version.
79-
*/
80-
Map<String, String> packageHashVerification(Version version)
81-
8277
/**
8378
* The name of the directory under the base dir that contains the package contents.
8479
*/

buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HadoopServiceDescriptor.groovy

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,11 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor
2727
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration
2828
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration
2929
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer
30-
import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload
31-
import org.gradle.api.GradleException
3230

3331
import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings
3432

3533
class HadoopServiceDescriptor implements ServiceDescriptor {
3634

37-
static final Map<Version, Map<String, String>> VERSION_MAP = [:]
38-
static {
39-
VERSION_MAP.put(new Version(2, 7, 7),
40-
['SHA-512': '17c8917211dd4c25f78bf60130a390f9e273b0149737094e45f4ae5c917b1174b97eb90818c5df068e607835120126281bcc07514f38bd7fd3cb8e9d3db1bdde'])
41-
}
42-
4335
static final RoleDescriptor NAMENODE = RoleDescriptor.requiredProcess('namenode')
4436
static final RoleDescriptor DATANODE = RoleDescriptor.requiredProcess('datanode', [NAMENODE])
4537
static final RoleDescriptor RESOURCEMANAGER = RoleDescriptor.requiredProcess('resourcemanager')
@@ -73,12 +65,8 @@ class HadoopServiceDescriptor implements ServiceDescriptor {
7365
}
7466

7567
@Override
76-
void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) {
77-
Version version = configuration.getVersion()
78-
task.packagePath = 'hadoop/common'
79-
task.packageName = 'hadoop'
80-
task.artifactFileName = "hadoop-${version}.tar.gz"
81-
task.version = "${version}"
68+
String getDependencyCoordinates(ServiceConfiguration configuration) {
69+
return "hadoop.common:hadoop-${configuration.getVersion()}:${artifactName(configuration)}@tar.gz"
8270
}
8371

8472
@Override
@@ -92,15 +80,6 @@ class HadoopServiceDescriptor implements ServiceDescriptor {
9280
return "hadoop-${version}"
9381
}
9482

95-
@Override
96-
Map<String, String> packageHashVerification(Version version) {
97-
Map<String, String> hashVerifications = VERSION_MAP.get(version)
98-
if (hashVerifications == null) {
99-
throw new GradleException("Unsupported version [$version] - No download hash configured")
100-
}
101-
return hashVerifications
102-
}
103-
10483
@Override
10584
String homeDirName(InstanceConfiguration configuration) {
10685
return artifactName(configuration.getServiceConf())

buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/HiveServiceDescriptor.groovy

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,11 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.RoleDescriptor
2626
import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor
2727
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration
2828
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration
29-
import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload
30-
import org.gradle.api.GradleException
3129

3230
import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings
3331

3432
class HiveServiceDescriptor implements ServiceDescriptor {
3533

36-
static final Map<Version, Map<String, String>> VERSION_MAP = [:]
37-
static {
38-
VERSION_MAP.put(new Version(1, 2, 2),
39-
['SHA-256' : '763b246a1a1ceeb815493d1e5e1d71836b0c5b9be1c4cd9c8d685565113771d1'])
40-
}
41-
4234
static RoleDescriptor HIVESERVER = RoleDescriptor.requiredProcess('hiveserver')
4335

4436
@Override
@@ -67,12 +59,8 @@ class HiveServiceDescriptor implements ServiceDescriptor {
6759
}
6860

6961
@Override
70-
void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) {
71-
Version version = configuration.getVersion()
72-
task.packagePath = 'hive'
73-
task.packageName = 'hive'
74-
task.artifactFileName = "apache-hive-${version}-bin.tar.gz"
75-
task.version = "${version}"
62+
String getDependencyCoordinates(ServiceConfiguration configuration) {
63+
return "hive:hive-${configuration.getVersion()}:${artifactName(configuration)}@tar.gz"
7664
}
7765

7866
@Override
@@ -86,15 +74,6 @@ class HiveServiceDescriptor implements ServiceDescriptor {
8674
return "apache-hive-${version}-bin"
8775
}
8876

89-
@Override
90-
Map<String, String> packageHashVerification(Version version) {
91-
Map<String, String> hashVerifications = VERSION_MAP.get(version)
92-
if (hashVerifications == null) {
93-
throw new GradleException("Unsupported version [$version] - No download hash configured")
94-
}
95-
return hashVerifications
96-
}
97-
9877
@Override
9978
String homeDirName(InstanceConfiguration configuration) {
10079
return artifactName(configuration.getServiceConf())

buildSrc/src/main/groovy/org/elasticsearch/hadoop/gradle/fixture/hadoop/services/PigServiceDescriptor.groovy

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,11 @@ import org.elasticsearch.hadoop.gradle.fixture.hadoop.ServiceDescriptor
2626
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.HadoopClusterConfiguration
2727
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.InstanceConfiguration
2828
import org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.ServiceConfiguration
29-
import org.elasticsearch.hadoop.gradle.tasks.ApacheMirrorDownload
30-
import org.gradle.api.GradleException
3129

3230
import static org.elasticsearch.hadoop.gradle.fixture.hadoop.conf.SettingsContainer.FileSettings
3331

3432
class PigServiceDescriptor implements ServiceDescriptor {
3533

36-
static final Map<Version, Map<String, String>> VERSION_MAP = [:]
37-
static {
38-
VERSION_MAP.put(new Version(0, 17, 0),
39-
['MD5': 'da76998409fe88717b970b45678e00d4'])
40-
}
41-
4234
static RoleDescriptor GATEWAY = RoleDescriptor.requiredGateway('pig', [])
4335

4436
@Override
@@ -67,11 +59,8 @@ class PigServiceDescriptor implements ServiceDescriptor {
6759
}
6860

6961
@Override
70-
void configureDownload(ApacheMirrorDownload task, ServiceConfiguration configuration) {
71-
task.setPackagePath('pig')
72-
task.setPackageName('pig')
73-
task.setVersion(configuration.getVersion().toString())
74-
task.setArtifactFileName("${artifactName(configuration)}.tar.gz")
62+
String getDependencyCoordinates(ServiceConfiguration configuration) {
63+
return "pig:pig-${configuration.getVersion()}:${artifactName(configuration)}@tar.gz"
7564
}
7665

7766
@Override
@@ -84,15 +73,6 @@ class PigServiceDescriptor implements ServiceDescriptor {
8473
return "pig-${configuration.getVersion()}"
8574
}
8675

87-
@Override
88-
Map<String, String> packageHashVerification(Version version) {
89-
Map<String, String> hashVerifications = VERSION_MAP.get(version)
90-
if (hashVerifications == null) {
91-
throw new GradleException("Unsupported version [$version] - No download hash configured")
92-
}
93-
return hashVerifications
94-
}
95-
9676
@Override
9777
String homeDirName(InstanceConfiguration configuration) {
9878
return artifactName(configuration.getServiceConf())

0 commit comments

Comments
 (0)