@@ -58,7 +58,7 @@ def generate_synthetic_control_data(
58
58
Generates data for synthetic control example.
59
59
60
60
:param N:
61
- Number fo data points
61
+ Number of data points
62
62
:param treatment_time:
63
63
Index where treatment begins in the generated dataframe
64
64
:param grw_mu:
@@ -324,15 +324,6 @@ def generate_geolift_data():
324
324
treatment_time = pd .to_datetime ("2022-01-01" )
325
325
causal_impact = 0.2
326
326
327
- def create_series (n = 52 , amplitude = 1 , length_scale = 2 ):
328
- """
329
- Returns numpy tile with generated seasonality data repeated over
330
- multiple years
331
- """
332
- return np .tile (
333
- generate_seasonality (n = n , amplitude = amplitude , length_scale = 2 ) + 3 , n_years
334
- )
335
-
336
327
time = pd .date_range (start = "2019-01-01" , periods = 52 * n_years , freq = "W" )
337
328
338
329
untreated = [
@@ -345,7 +336,12 @@ def create_series(n=52, amplitude=1, length_scale=2):
345
336
]
346
337
347
338
df = (
348
- pd .DataFrame ({country : create_series () for country in untreated })
339
+ pd .DataFrame (
340
+ {
341
+ country : create_series (n_years = n_years , intercept = 3 )
342
+ for country in untreated
343
+ }
344
+ )
349
345
.assign (time = time )
350
346
.set_index ("time" )
351
347
)
@@ -360,6 +356,67 @@ def create_series(n=52, amplitude=1, length_scale=2):
360
356
361
357
# add treatment effect
362
358
df ["Denmark" ] += np .where (df .index < treatment_time , 0 , causal_impact )
359
+
360
+ # ensure we never see any negative sales
361
+ df = df .clip (lower = 0 )
362
+
363
+ return df
364
+
365
+
366
+ def generate_multicell_geolift_data ():
367
+ """Generate synthetic data for a geolift example. This will consists of 6 untreated
368
+ countries. The treated unit `Denmark` is a weighted combination of the untreated
369
+ units. We additionally specify a treatment effect which takes effect after the
370
+ `treatment_time`. The timeseries data is observed at weekly resolution and has
371
+ annual seasonality, with this seasonality being a drawn from a Gaussian Process with
372
+ a periodic kernel."""
373
+ n_years = 4
374
+ treatment_time = pd .to_datetime ("2022-01-01" )
375
+ causal_impact = 0.2
376
+ time = pd .date_range (start = "2019-01-01" , periods = 52 * n_years , freq = "W" )
377
+
378
+ untreated = [
379
+ "u1" ,
380
+ "u2" ,
381
+ "u3" ,
382
+ "u4" ,
383
+ "u5" ,
384
+ "u6" ,
385
+ "u7" ,
386
+ "u8" ,
387
+ "u9" ,
388
+ "u10" ,
389
+ "u11" ,
390
+ "u12" ,
391
+ ]
392
+
393
+ df = (
394
+ pd .DataFrame (
395
+ {
396
+ country : create_series (n_years = n_years , intercept = 3 )
397
+ for country in untreated
398
+ }
399
+ )
400
+ .assign (time = time )
401
+ .set_index ("time" )
402
+ )
403
+
404
+ treated = ["t1" , "t2" , "t3" , "t4" ]
405
+
406
+ for treated_geo in treated :
407
+ # create treated unit as a weighted sum of the untreated units
408
+ weights = np .random .dirichlet (np .ones (len (untreated )), size = 1 )[0 ]
409
+ df [treated_geo ] = np .dot (df [untreated ].values , weights )
410
+ # add treatment effect
411
+ df [treated_geo ] += np .where (df .index < treatment_time , 0 , causal_impact )
412
+
413
+ # add observation noise to all geos
414
+ for col in untreated + treated :
415
+ df [col ] += np .random .normal (size = len (df ), scale = 0.1 )
416
+
417
+ # ensure we never see any negative sales
418
+ df = df .clip (lower = 0 )
419
+
363
420
return df
364
421
365
422
@@ -387,3 +444,14 @@ def periodic_kernel(x1, x2, period=1, length_scale=1, amplitude=1):
387
444
return amplitude ** 2 * np .exp (
388
445
- 2 * np .sin (np .pi * np .abs (x1 - x2 ) / period ) ** 2 / length_scale ** 2
389
446
)
447
+
448
+
449
+ def create_series (n = 52 , amplitude = 1 , length_scale = 2 , n_years = 4 , intercept = 3 ):
450
+ """
451
+ Returns numpy tile with generated seasonality data repeated over
452
+ multiple years
453
+ """
454
+ return np .tile (
455
+ generate_seasonality (n = n , amplitude = amplitude , length_scale = 2 ) + intercept ,
456
+ n_years ,
457
+ )
0 commit comments