feat: add comprehensive error handling and validation

- Add input validation for PDF files, height, and weight - Validate PDF file exists, is a file, and has .pdf extension - Check height range (36-96 inches) and weight range (50-500 lbs) - Add warnings for missing critical data - Improve user feedback with emojis and clear error messages - Better output formatting with file descriptions - Catch and handle PDF reading errors gracefully
2025-10-06 15:24:11 -07:00 · 2025-10-06 15:24:11 -07:00 · d6793e2572
commit d6793e2572
parent c7d0255f61
1 changed files with 68 additions and 9 deletions
--- a/dexa_extract.py
+++ b/dexa_extract.py
@ -19,11 +19,17 @@ import json
 import math
 import os
 import re
 import sys
 from datetime import datetime
 from pathlib import Path
 import pdfplumber
 import pandas as pd
 class ValidationError(Exception):
    """Custom exception for validation errors"""
    pass
 def read_pdf_text(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        pages_text = [page.extract_text() or "" for page in pdf.pages]
@ -324,17 +330,62 @@ def make_markdown(measured_date, d, derived, total_mass):
    return "\n".join(lines)
 def main():
-    ap = argparse.ArgumentParser()
+    ap = argparse.ArgumentParser(
-    ap.add_argument("pdf", help="Path to DEXA report PDF")
+        description="BodySpec Insights - Extract and analyze body composition data from BodySpec DEXA scan PDFs",
-    ap.add_argument("--height-in", type=float, required=True, help="Height in inches (Imperial)")
+        epilog="Example: python dexa_extract.py scan.pdf --height-in 74 --weight-lb 212 --outdir ./data/results"
-    ap.add_argument("--weight-lb", type=float, help="Body weight in lb (optional; used if DEXA total mass missing)")
+    )
-    ap.add_argument("--outdir", default="dexa_out", help="Output directory")
+    ap.add_argument("pdf", help="Path to BodySpec DEXA report PDF")
    ap.add_argument("--height-in", type=float, required=True, help="Height in inches (e.g., 6'2\" = 74)")
    ap.add_argument("--weight-lb", type=float, help="Body weight in lbs (optional; used if DEXA total mass missing)")
    ap.add_argument("--outdir", default="dexa_out", help="Output directory (default: dexa_out)")
    args = ap.parse_args()
-    ensure_outdir(args.outdir)
+    # Validate PDF file exists
    pdf_file = Path(args.pdf)
    if not pdf_file.exists():
        print(f"❌ Error: PDF file not found: {args.pdf}", file=sys.stderr)
        sys.exit(1)
    if not pdf_file.is_file():
        print(f"❌ Error: Path is not a file: {args.pdf}", file=sys.stderr)
        sys.exit(1)
    if pdf_file.suffix.lower() != '.pdf':
        print(f"❌ Error: File is not a PDF: {args.pdf}", file=sys.stderr)
        sys.exit(1)
-    d = parse_dexa_pdf(args.pdf)
+    # Validate height
    if args.height_in < 36 or args.height_in > 96:
        print(f"❌ Error: Height seems unrealistic: {args.height_in} inches (expected 36-96 inches / 3'-8')", file=sys.stderr)
        sys.exit(1)
    # Validate weight if provided
    if args.weight_lb is not None and (args.weight_lb < 50 or args.weight_lb > 500):
        print(f"❌ Error: Weight seems unrealistic: {args.weight_lb} lbs (expected 50-500 lbs)", file=sys.stderr)
        sys.exit(1)
    try:
        ensure_outdir(args.outdir)
    except PermissionError:
        print(f"❌ Error: Cannot create output directory: {args.outdir} (permission denied)", file=sys.stderr)
        sys.exit(1)
    print(f"📄 Reading PDF: {args.pdf}")
    try:
        d = parse_dexa_pdf(args.pdf)
    except Exception as e:
        print(f"❌ Error reading PDF: {e}", file=sys.stderr)
        print("This tool is specifically designed for BodySpec PDF reports.", file=sys.stderr)
        sys.exit(1)
    # Check if critical data was extracted
    if d.get("body_fat_percent") is None or d.get("total_mass_lb") is None:
        print("⚠️  Warning: Missing critical data from PDF. This may not be a BodySpec report.", file=sys.stderr)
        if d.get("body_fat_percent") is None:
            print("   - Body Fat % not found", file=sys.stderr)
        if d.get("total_mass_lb") is None:
            print("   - Total Mass not found", file=sys.stderr)
    print("📊 Computing derived metrics...")
    measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y")
    measured_date = convert_date_to_iso(measured_date_raw)
    total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb)
@ -490,8 +541,16 @@ def main():
    md_text = make_markdown(measured_date, d, derived, total_mass)
    append_markdown(os.path.join(args.outdir, "summary.md"), md_text)
-    print(f"Wrote files to: {args.outdir}")
+    print(f"\n✅ Success! Wrote files to: {args.outdir}")
-    print("Files: overall.csv, regional.csv, muscle_balance.csv, overall.json, summary.md")
+    print("   📁 Files created:")
    print("      - overall.csv (time-series data)")
    print("      - regional.csv (body composition by region)")
    print("      - muscle_balance.csv (left/right symmetry)")
    print("      - overall.json (structured data)")
    print("      - summary.md (readable report)")
    print(f"\n   📈 Scan date: {measured_date}")
    print(f"   💪 Body fat: {d.get('body_fat_percent')}%")
    print(f"   🏋️  FFMI: {derived.get('ffmi')}")
 if __name__ == "__main__":
    main()