As result of the prompt described in the previous section, we have the following Python script excerpt, which produces the profiles.json file containing the 50,000 synthetic customer profiles.
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
import random
import uuid
import json
from datetime import datetime, timedelta
from faker import Faker
fake = Faker()
# Constants
NUM_PROFILES = 50000
EMAIL_DOMAIN = "@email.com"
GENDER_MAP = {0: "Unknown", 1: "Male", 2: "Female", 3: "Nonbinary", 9: "Not Determined"}
NOW = datetime.now()
ONE_YEAR_AGO = NOW - timedelta(days=365)
SIXTY_DAYS_AGO = NOW - timedelta(days=60)
EIGHTEEN_YEARS = timedelta(days=18 * 365)
NINETY_YEARS = timedelta(days=90 * 365)
# Generate unique names
unique_names = set()
def generate_unique_name(gender):
while True:
first = fake.first_name_male() if gender == 1 else fake.first_name_female() if gender == 2 else fake.first_name()
last = fake.last_name()
name = (first, last)
if name not in unique_names:
unique_names.add(name)
return first, last
# Generate a random date between two datetimes
def random_date(start, end):
return fake.date_time_between(start_date=start, end_date=end).isoformat()
# Generate profiles
profiles = []
profile_timestamps = []
for i in range(1, NUM_PROFILES + 1):
gender = random.choice([0, 1, 2, 3, 9])
first_name, last_name = generate_unique_name(gender)
email = f"{first_name}{last_name}{EMAIL_DOMAIN}"
birth_date = (NOW - timedelta(days=random.randint(18 * 365, 90 * 365))).date().isoformat()
phone = fake.phone_number()
timestamp = random_date(ONE_YEAR_AGO, SIXTY_DAYS_AGO)
profile_timestamps.append(timestamp)
profiles.append({
"firstName": first_name,
"lastName": last_name,
"gender": gender,
"birthDate": birth_date,
"primaryEmail": email,
"primaryPhone": phone,
"timestamp": timestamp,
"masterDataId": str(i)
})
# Save profiles.json
profiles_path = "profiles.json"
with open(profiles_path, "w") as f:
json.dump(profiles, f, indent=4)
For the orders, the following piece of Python code was provided to us by the LLM that generates them according to the specification.
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
# Generate orders based on profile segments
orders = []
for idx, profile in enumerate(profiles):
master_id = profile["masterDataId"]
profile_timestamp = datetime.fromisoformat(profile["timestamp"])
# Old orders (5-10 per profile, $20-$80, timestamp between profile ts and 60 days ago)
for _ in range(random.randint(5, 10)):
amount = round(random.uniform(20, 80), 2)
tax = round(amount * 0.10, 2)
timestamp = random_date(profile_timestamp, SIXTY_DAYS_AGO)
orders.append({
"currency": "USD",
"tax": f"{tax:.2f}",
"amount": f"{amount:.2f}",
"productId": str(random.randint(1, 100)),
"timestamp": timestamp,
"id": str(uuid.uuid4()),
"masterDataId": master_id
})
# Recent orders by segment
segment = idx / NUM_PROFILES
if segment < 0.10:
continue # No recent orders
elif segment < 0.30:
# 1 small order (<$100)
amount = round(random.uniform(10, 99), 2)
tax = round(amount * 0.10, 2)
timestamp = random_date(SIXTY_DAYS_AGO, NOW)
orders.append({
"currency": "USD",
"tax": f"{tax:.2f}",
"amount": f"{amount:.2f}",
"productId": str(random.randint(1, 100)),
"timestamp": timestamp,
"id": str(uuid.uuid4()),
"masterDataId": master_id
})
elif segment < 0.60:
# 4-8 small orders (<$100)
for _ in range(random.randint(4, 8)):
amount = round(random.uniform(10, 99), 2)
tax = round(amount * 0.10, 2)
timestamp = random_date(SIXTY_DAYS_AGO, NOW)
orders.append({
"currency": "USD",
"tax": f"{tax:.2f}",
"amount": f"{amount:.2f}",
"productId": str(random.randint(1, 100)),
"timestamp": timestamp,
"id": str(uuid.uuid4()),
"masterDataId": master_id
})
elif segment < 0.80:
# 1 large order (>$300)
amount = round(random.uniform(301, 600), 2)
tax = round(amount * 0.10, 2)
timestamp = random_date(SIXTY_DAYS_AGO, NOW)
orders.append({
"currency": "USD",
"tax": f"{tax:.2f}",
"amount": f"{amount:.2f}",
"productId": str(random.randint(1, 100)),
"timestamp": timestamp,
"id": str(uuid.uuid4()),
"masterDataId": master_id
})
else:
# 4-8 large orders (>$300)
for _ in range(random.randint(4, 8)):
amount = round(random.uniform(301, 600), 2)
tax = round(amount * 0.10, 2)
timestamp = random_date(SIXTY_DAYS_AGO, NOW)
orders.append({
"currency": "USD",
"tax": f"{tax:.2f}",
"amount": f"{amount:.2f}",
"productId": str(random.randint(1, 100)),
"timestamp": timestamp,
"id": str(uuid.uuid4()),
"masterDataId": master_id
})
# Save orders.json
orders_path = "orders.json"
with open(orders_path, "w") as f:
json.dump(orders, f, indent=4)
The resulting file contents for both profiles.json and orders.json can now be ingested on a new Business Unit inside the SAP Customer Data Platform console.