Back to Blog
API & Developer Guides

How to Batch Enrich Thousands of Leads Programmatically

Published February 11, 2026

When You Have Thousands of Leads

You have a CSV with 5,000 business websites and you need emails for all of them. Doing this manually is impossible. Doing it with individual API calls is slow. The Easy Email Finder /enrich-batch endpoint lets you process up to 20 websites per call, making high-volume enrichment practical and efficient.

The Math

With the /enrich-batch endpoint processing 20 websites per call and a rate limit of 10 enrichment calls per minute, you can process approximately 200 websites per minute or 12,000 per hour. For 5,000 websites, expect a total processing time of about 25 minutes and a cost of up to $1,250 at $0.25 per email (less in practice, since not every website yields an email).

The Batch Processing Script

import os
import csv
import json
import time
import requests
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.getenv("EEF_API_KEY")
BASE = "https://easyemailfinder.com/api/v1"
HEADERS = {
    "Authorization": f"Bearer {API_KEY}",
    "Content-Type": "application/json"
}

BATCH_SIZE = 20
DELAY_BETWEEN_BATCHES = 7  # seconds

def load_websites(input_file: str) -> list:
    """Load websites from a CSV file."""
    websites = []
    with open(input_file, "r") as f:
        reader = csv.DictReader(f)
        for row in reader:
            url = row.get("website", "").strip()
            if url and url.startswith("http"):
                websites.append(url)
    return websites

def enrich_batch_with_retry(websites: list, max_retries=3) -> list:
    """Enrich a batch with retry logic."""
    for attempt in range(max_retries):
        try:
            resp = requests.post(f"{BASE}/enrich-batch", headers=HEADERS, json={
                "websites": websites
            }, timeout=120)

            if resp.status_code == 429:
                retry_after = int(resp.headers.get("Retry-After", 60))
                print(f"  Rate limited. Waiting {retry_after}s...")
                time.sleep(retry_after)
                continue

            if resp.status_code == 402:
                print("  Insufficient credits. Stopping.")
                return []

            resp.raise_for_status()
            return resp.json().get("results", [])

        except requests.exceptions.Timeout:
            print(f"  Timeout on attempt {attempt + 1}")
            time.sleep(10)
        except requests.exceptions.RequestException as e:
            print(f"  Error on attempt {attempt + 1}: {e}")
            time.sleep(2 ** attempt)

    print(f"  Failed after {max_retries} attempts")
    return []

def save_checkpoint(processed: int, results: list, checkpoint_file: str):
    """Save progress so we can resume if interrupted."""
    with open(checkpoint_file, "w") as f:
        json.dump({"processed": processed, "results": results}, f)

def load_checkpoint(checkpoint_file: str):
    """Load progress from a checkpoint."""
    if os.path.exists(checkpoint_file):
        with open(checkpoint_file, "r") as f:
            data = json.load(f)
            return data["processed"], data["results"]
    return 0, []

def run_batch_enrichment(input_file: str, output_file: str):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    checkpoint_file = f"checkpoint_{timestamp}.json"

    # Load websites
    all_websites = load_websites(input_file)
    print(f"Loaded {len(all_websites)} websites from {input_file}")

    # Check balance
    bal_resp = requests.get(f"{BASE}/balance", headers=HEADERS)
    credits = bal_resp.json().get("credits", 0)
    print(f"Available credits: {credits}")
    print(f"Maximum cost: {len(all_websites) * 0.25:.2f} USD")

    if credits < 10:
        print("Warning: Very low credits. Consider purchasing more first.")

    # Load checkpoint if resuming
    start_idx, all_results = load_checkpoint(checkpoint_file)
    if start_idx > 0:
        print(f"Resuming from website #{start_idx}")

    # Process in batches
    total = len(all_websites)
    emails_found = len([r for r in all_results if r.get("email")])
    start_time = time.time()

    for i in range(start_idx, total, BATCH_SIZE):
        batch = all_websites[i:i + BATCH_SIZE]
        batch_num = i // BATCH_SIZE + 1
        total_batches = (total + BATCH_SIZE - 1) // BATCH_SIZE

        print(f"\nBatch {batch_num}/{total_batches} ({i}/{total} websites)")
        results = enrich_batch_with_retry(batch)

        for r in results:
            all_results.append(r)
            if r.get("email"):
                emails_found += 1

        # Progress stats
        elapsed = time.time() - start_time
        rate = (i + len(batch)) / elapsed if elapsed > 0 else 0
        eta = (total - i - len(batch)) / rate if rate > 0 else 0
        print(f"  Emails found: {emails_found} | Rate: {rate:.1f} sites/sec | ETA: {eta/60:.1f} min")

        # Save checkpoint every 5 batches
        if batch_num % 5 == 0:
            save_checkpoint(i + len(batch), all_results, checkpoint_file)

        if i + BATCH_SIZE < total:
            time.sleep(DELAY_BETWEEN_BATCHES)

    # Write final output
    with open(output_file, "w", newline="") as f:
        fieldnames = ["website", "email", "name", "phone", "address"]
        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
        writer.writeheader()
        writer.writerows(all_results)

    # Cleanup checkpoint
    if os.path.exists(checkpoint_file):
        os.remove(checkpoint_file)

    elapsed = time.time() - start_time
    print(f"\n=== COMPLETE ===")
    print(f"Processed: {total} websites")
    print(f"Emails found: {emails_found}")
    print(f"Hit rate: {emails_found/total*100:.1f}%")
    print(f"Time: {elapsed/60:.1f} minutes")
    print(f"Output: {output_file}")

if __name__ == "__main__":
    run_batch_enrichment("websites.csv", "enriched_results.csv")

Key Features of This Script

Checkpointing: The script saves progress every 5 batches. If it crashes or you stop it, you can resume from where you left off instead of re-processing (and re-paying for) websites you have already enriched.

Retry logic: Failed batches are retried up to 3 times with exponential backoff. Rate limit responses are handled by respecting the Retry-After header.

Credit awareness: The script checks your balance before starting and stops if it encounters a 402 (insufficient credits) response.

Optimizing for Cost

  • Deduplicate websites first: Remove duplicate URLs before processing to avoid paying twice for the same website
  • Pre-filter websites: Remove known non-business domains (gmail.com, facebook.com, etc.) that will never yield a business email
  • Start small: Run a test batch of 100 websites to gauge your hit rate before committing to thousands

For more cost optimization strategies, read our cost optimization guide.

Next Steps

Once you have your enriched CSV, you can import it directly into your CRM. See our HubSpot enrichment guide or CRM integration guide for next steps. Full API documentation is at easyemailfinder.com/developer/docs.

Ready to find business emails?

Try Easy Email Finder free — get 5 credits to start.

Start Finding Emails

Related Posts