@@ -173,6 +173,8 @@ object FactorGenerationStage extends DatagenStage with Logging {
173
173
date_trunc(" day" , $" creationDate" ).as(" creationDay" ),
174
174
date_trunc(" day" , $" deletionDate" ).as(" deletionDay" ),
175
175
$" MessageId" )
176
+ .orderBy($" MessageId" )
177
+
176
178
val sampleSize = 20000.0
177
179
val count = messages.count()
178
180
val sampleFraction = Math .min(sampleSize / count, 1.0 )
@@ -407,6 +409,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
407
409
log.info(s " Factor people4Hops: using ${sampleSize} samples ( ${sampleFraction * 100 }%) " )
408
410
409
411
peopleInChina
412
+ .orderBy($" Person.id" )
410
413
.sample(sampleFraction, 42 )
411
414
.join(relations.alias(" knows" ), $" Person.id" === $" knows.Person1Id" )
412
415
.select($" knows.Person1Id" .alias(" Person1Id" ), $" knows.Person2Id" .alias(" Person2Id" ))
@@ -428,6 +431,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
428
431
$" Person2.creationDate" .as(" Person2CreationDate" ),
429
432
$" Person2.deletionDate" .as(" Person2DeletionDate" )
430
433
)
434
+ .orderBy($" Person1Id" , $" Person2Id" )
431
435
432
436
val sampleFractionPersonPairs = Math .min(10000.0 / personPairs.count(), 1.0 )
433
437
personPairs.sample(sampleFractionPersonPairs, 42 )
@@ -455,6 +459,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
455
459
log.info(s " Factor people4Hops: using ${sampleSize} samples ( ${sampleFraction * 100 }%) " )
456
460
457
461
peopleInChina
462
+ .orderBy($" Person.id" )
458
463
.sample(sampleFraction, 42 )
459
464
.join(relations.alias(" knows" ), $" Person.id" === $" knows.Person1Id" )
460
465
.select($" knows.Person1Id" .alias(" Person1Id" ), $" knows.Person2Id" .alias(" Person2Id" ))
@@ -476,6 +481,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
476
481
$" Person2.creationDate" .as(" Person2CreationDate" ),
477
482
$" Person2.deletionDate" .as(" Person2DeletionDate" )
478
483
)
484
+ .orderBy($" Person1Id" , $" Person2Id" )
479
485
480
486
val sampleFractionPersonPairs = Math .min(10000.0 / personPairs.count(), 1.0 )
481
487
personPairs.sample(sampleFractionPersonPairs, 42 )
0 commit comments