Ingredient: Using anonymizersΒΆ
This example shows a shelf with anonymizers. Anonymizers make a dataset look like it has random data by modifying the ingredient value.
[7]:
from examples_base import *
# The state anonymizer reverses the state name.
# The population anonymizer adds 1000
shelf = Shelf({
'state': Dimension(Census.state, anonymizer=lambda v: v[::-1]),
'population': Metric(func.sum(Census.pop2000), anonymizer=lambda v: v+1000)
})
# To use the anonymizer, the extension class Anonymize must be used.
# and the anonymize flag must be True
recipe = Recipe(shelf=shelf, session=oven.Session(), extension_classes=[Anonymize])\
.dimensions('state').metrics('population').anonymize(True)
# Look at the output.
print(recipe.to_sql())
recipe.dataset.df
SELECT census.state AS state_raw,
sum(census.pop2000) AS population_raw
FROM census
GROUP BY census.state
[7]:
state_raw | population_raw | state | state_id | population | |
---|---|---|---|---|---|
0 | Alabama | 4438559 | amabalA | Alabama | 4439559 |
1 | Alaska | 608588 | aksalA | Alaska | 609588 |
2 | Arizona | 5143931 | anozirA | Arizona | 5144931 |
3 | Arkansas | 2672434 | sasnakrA | Arkansas | 2673434 |
4 | California | 33829442 | ainrofilaC | California | 33830442 |
5 | Colorado | 4300877 | odaroloC | Colorado | 4301877 |
6 | Connecticut | 3403620 | tucitcennoC | Connecticut | 3404620 |
7 | Delaware | 782386 | erawaleD | Delaware | 783386 |
8 | District of Columbia | 568103 | aibmuloC fo tcirtsiD | District of Columbia | 569103 |
9 | Florida | 15976093 | adirolF | Florida | 15977093 |
10 | Georgia | 8161776 | aigroeG | Georgia | 8162776 |
11 | Hawaii | 1167027 | iiawaH | Hawaii | 1168027 |
12 | Idaho | 1294560 | ohadI | Idaho | 1295560 |
13 | Illinois | 12405300 | sionillI | Illinois | 12406300 |
14 | Indiana | 6089161 | anaidnI | Indiana | 6090161 |
15 | Iowa | 2926878 | awoI | Iowa | 2927878 |
16 | Kansas | 2675929 | sasnaK | Kansas | 2676929 |
17 | Kentucky | 4028389 | ykcutneK | Kentucky | 4029389 |
18 | Louisiana | 4449489 | anaisiuoL | Louisiana | 4450489 |
19 | Maine | 1271694 | eniaM | Maine | 1272694 |
20 | Maryland | 5274039 | dnalyraM | Maryland | 5275039 |
21 | Massachusetts | 6357515 | sttesuhcassaM | Massachusetts | 6358515 |
22 | Michigan | 9951873 | nagihciM | Michigan | 9952873 |
23 | Minnesota | 4931897 | atosenniM | Minnesota | 4932897 |
24 | Mississippi | 2830133 | ippississiM | Mississippi | 2831133 |
25 | Missouri | 5588759 | iruossiM | Missouri | 5589759 |
26 | Montana | 899459 | anatnoM | Montana | 900459 |
27 | Nebraska | 1705040 | aksarbeN | Nebraska | 1706040 |
28 | Nevada | 2010272 | adaveN | Nevada | 2011272 |
29 | New Hampshire | 1239307 | erihspmaH weN | New Hampshire | 1240307 |
30 | New Jersey | 8420023 | yesreJ weN | New Jersey | 8421023 |
31 | New Mexico | 1809015 | ocixeM weN | New Mexico | 1810015 |
32 | New York | 18978668 | kroY weN | New York | 18979668 |
33 | North Carolina | 7978581 | aniloraC htroN | North Carolina | 7979581 |
34 | North Dakota | 633621 | atokaD htroN | North Dakota | 634621 |
35 | Ohio | 11355210 | oihO | Ohio | 11356210 |
36 | Oklahoma | 3430420 | amohalkO | Oklahoma | 3431420 |
37 | Oregon | 3428319 | nogerO | Oregon | 3429319 |
38 | Pennsylvania | 12276157 | ainavlysnneP | Pennsylvania | 12277157 |
39 | Rhode Island | 1047200 | dnalsI edohR | Rhode Island | 1048200 |
40 | South Carolina | 3983917 | aniloraC htuoS | South Carolina | 3984917 |
41 | South Dakota | 752231 | atokaD htuoS | South Dakota | 753231 |
42 | Tennessee | 5685230 | eessenneT | Tennessee | 5686230 |
43 | Texas | 20830810 | saxeT | Texas | 20831810 |
44 | Utah | 2238675 | hatU | Utah | 2239675 |
45 | Vermont | 609480 | tnomreV | Vermont | 610480 |
46 | Virginia | 6955790 | ainigriV | Virginia | 6956790 |
47 | Washington | 5863102 | notgnihsaW | Washington | 5864102 |
48 | West Virginia | 1805847 | ainigriV tseW | West Virginia | 1806847 |
49 | Wisconsin | 5372159 | nisnocsiW | Wisconsin | 5373159 |
50 | Wyoming | 490336 | gnimoyW | Wyoming | 491336 |
[9]:
from examples_base import *
# The state anonymizer reverses the state name.
# The population anonymizer adds 1000
shelf = Shelf({
'state': Dimension(Census.state, anonymizer='{fake:name}'),
'population': Metric(func.sum(Census.pop2000), anonymizer=lambda v: v+1000)
})
# To use the anonymizer, the extension class Anonymize must be used.
# and the anonymize flag must be True
recipe = Recipe(shelf=shelf, session=oven.Session(), extension_classes=[Anonymize])\
.dimensions('state').metrics('population').anonymize(True)
# Look at the output.
print(recipe.to_sql())
recipe.dataset.df
SELECT census.state AS state_raw,
sum(census.pop2000) AS population_raw
FROM census
GROUP BY census.state
[9]:
state_raw | population_raw | state | state_id | population | |
---|---|---|---|---|---|
0 | Alabama | 4438559 | Steven Williams | Alabama | 4439559 |
1 | Alaska | 608588 | Donald Calderon | Alaska | 609588 |
2 | Arizona | 5143931 | Shannon Bean | Arizona | 5144931 |
3 | Arkansas | 2672434 | Mr. Kyle Hurst | Arkansas | 2673434 |
4 | California | 33829442 | Stephanie Mitchell | California | 33830442 |
5 | Colorado | 4300877 | Alex Graham | Colorado | 4301877 |
6 | Connecticut | 3403620 | John Newton | Connecticut | 3404620 |
7 | Delaware | 782386 | Samantha Norman | Delaware | 783386 |
8 | District of Columbia | 568103 | Justin Taylor | District of Columbia | 569103 |
9 | Florida | 15976093 | Tanya Kelley | Florida | 15977093 |
10 | Georgia | 8161776 | Jacob Koch | Georgia | 8162776 |
11 | Hawaii | 1167027 | Natalie Walsh | Hawaii | 1168027 |
12 | Idaho | 1294560 | Michael Austin | Idaho | 1295560 |
13 | Illinois | 12405300 | Mr. Paul Olson | Illinois | 12406300 |
14 | Indiana | 6089161 | Natalie Mcfarland | Indiana | 6090161 |
15 | Iowa | 2926878 | Laurie Smith | Iowa | 2927878 |
16 | Kansas | 2675929 | Robert Baker | Kansas | 2676929 |
17 | Kentucky | 4028389 | Carol Wright | Kentucky | 4029389 |
18 | Louisiana | 4449489 | Michael Harrison | Louisiana | 4450489 |
19 | Maine | 1271694 | Cassandra Berry | Maine | 1272694 |
20 | Maryland | 5274039 | Matthew Warren | Maryland | 5275039 |
21 | Massachusetts | 6357515 | Michael Bryant | Massachusetts | 6358515 |
22 | Michigan | 9951873 | Michelle Nelson | Michigan | 9952873 |
23 | Minnesota | 4931897 | Rebekah Berg | Minnesota | 4932897 |
24 | Mississippi | 2830133 | Elaine Wood | Mississippi | 2831133 |
25 | Missouri | 5588759 | Kevin Johnson | Missouri | 5589759 |
26 | Montana | 899459 | Lindsey Adams | Montana | 900459 |
27 | Nebraska | 1705040 | Linda Wade | Nebraska | 1706040 |
28 | Nevada | 2010272 | Mark Hayes | Nevada | 2011272 |
29 | New Hampshire | 1239307 | Matthew Anderson | New Hampshire | 1240307 |
30 | New Jersey | 8420023 | Kathleen Little | New Jersey | 8421023 |
31 | New Mexico | 1809015 | Edward Andrews | New Mexico | 1810015 |
32 | New York | 18978668 | Mary James | New York | 18979668 |
33 | North Carolina | 7978581 | Matthew Myers | North Carolina | 7979581 |
34 | North Dakota | 633621 | Luke Huynh | North Dakota | 634621 |
35 | Ohio | 11355210 | Jessica Guerrero | Ohio | 11356210 |
36 | Oklahoma | 3430420 | Valerie Zimmerman | Oklahoma | 3431420 |
37 | Oregon | 3428319 | Jennifer Stephenson | Oregon | 3429319 |
38 | Pennsylvania | 12276157 | Timothy Johnson | Pennsylvania | 12277157 |
39 | Rhode Island | 1047200 | Michael Garcia | Rhode Island | 1048200 |
40 | South Carolina | 3983917 | Timothy Kramer | South Carolina | 3984917 |
41 | South Dakota | 752231 | Christopher Henson | South Dakota | 753231 |
42 | Tennessee | 5685230 | Michael Rodriguez PhD | Tennessee | 5686230 |
43 | Texas | 20830810 | Eric Cross | Texas | 20831810 |
44 | Utah | 2238675 | Rachael Pratt | Utah | 2239675 |
45 | Vermont | 609480 | Michelle Schultz | Vermont | 610480 |
46 | Virginia | 6955790 | Laura Summers | Virginia | 6956790 |
47 | Washington | 5863102 | Shannon Young | Washington | 5864102 |
48 | West Virginia | 1805847 | Connie Mitchell | West Virginia | 1806847 |
49 | Wisconsin | 5372159 | Evan Lee | Wisconsin | 5373159 |
50 | Wyoming | 490336 | Marcus Williams | Wyoming | 491336 |
Using an anonymizer injects the anonymize function as the last formatter. The original value is available as ingredient_raw
.