From d6793e2572389e410ca7617a4a9d014eb9eb0c6a Mon Sep 17 00:00:00 2001 From: Mac DeCourcy Date: Mon, 6 Oct 2025 15:24:11 -0700 Subject: [PATCH] feat: add comprehensive error handling and validation - Add input validation for PDF files, height, and weight - Validate PDF file exists, is a file, and has .pdf extension - Check height range (36-96 inches) and weight range (50-500 lbs) - Add warnings for missing critical data - Improve user feedback with emojis and clear error messages - Better output formatting with file descriptions - Catch and handle PDF reading errors gracefully --- dexa_extract.py | 77 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 9 deletions(-) diff --git a/dexa_extract.py b/dexa_extract.py index f229b89..240976f 100644 --- a/dexa_extract.py +++ b/dexa_extract.py @@ -19,11 +19,17 @@ import json import math import os import re +import sys from datetime import datetime +from pathlib import Path import pdfplumber import pandas as pd +class ValidationError(Exception): + """Custom exception for validation errors""" + pass + def read_pdf_text(pdf_path): with pdfplumber.open(pdf_path) as pdf: pages_text = [page.extract_text() or "" for page in pdf.pages] @@ -324,17 +330,62 @@ def make_markdown(measured_date, d, derived, total_mass): return "\n".join(lines) def main(): - ap = argparse.ArgumentParser() - ap.add_argument("pdf", help="Path to DEXA report PDF") - ap.add_argument("--height-in", type=float, required=True, help="Height in inches (Imperial)") - ap.add_argument("--weight-lb", type=float, help="Body weight in lb (optional; used if DEXA total mass missing)") - ap.add_argument("--outdir", default="dexa_out", help="Output directory") + ap = argparse.ArgumentParser( + description="BodySpec Insights - Extract and analyze body composition data from BodySpec DEXA scan PDFs", + epilog="Example: python dexa_extract.py scan.pdf --height-in 74 --weight-lb 212 --outdir ./data/results" + ) + ap.add_argument("pdf", help="Path to BodySpec DEXA report PDF") + ap.add_argument("--height-in", type=float, required=True, help="Height in inches (e.g., 6'2\" = 74)") + ap.add_argument("--weight-lb", type=float, help="Body weight in lbs (optional; used if DEXA total mass missing)") + ap.add_argument("--outdir", default="dexa_out", help="Output directory (default: dexa_out)") args = ap.parse_args() - ensure_outdir(args.outdir) + # Validate PDF file exists + pdf_file = Path(args.pdf) + if not pdf_file.exists(): + print(f"❌ Error: PDF file not found: {args.pdf}", file=sys.stderr) + sys.exit(1) + if not pdf_file.is_file(): + print(f"❌ Error: Path is not a file: {args.pdf}", file=sys.stderr) + sys.exit(1) + if pdf_file.suffix.lower() != '.pdf': + print(f"❌ Error: File is not a PDF: {args.pdf}", file=sys.stderr) + sys.exit(1) - d = parse_dexa_pdf(args.pdf) + # Validate height + if args.height_in < 36 or args.height_in > 96: + print(f"❌ Error: Height seems unrealistic: {args.height_in} inches (expected 36-96 inches / 3'-8')", file=sys.stderr) + sys.exit(1) + # Validate weight if provided + if args.weight_lb is not None and (args.weight_lb < 50 or args.weight_lb > 500): + print(f"❌ Error: Weight seems unrealistic: {args.weight_lb} lbs (expected 50-500 lbs)", file=sys.stderr) + sys.exit(1) + + try: + ensure_outdir(args.outdir) + except PermissionError: + print(f"❌ Error: Cannot create output directory: {args.outdir} (permission denied)", file=sys.stderr) + sys.exit(1) + + print(f"📄 Reading PDF: {args.pdf}") + + try: + d = parse_dexa_pdf(args.pdf) + except Exception as e: + print(f"❌ Error reading PDF: {e}", file=sys.stderr) + print("This tool is specifically designed for BodySpec PDF reports.", file=sys.stderr) + sys.exit(1) + + # Check if critical data was extracted + if d.get("body_fat_percent") is None or d.get("total_mass_lb") is None: + print("⚠️ Warning: Missing critical data from PDF. This may not be a BodySpec report.", file=sys.stderr) + if d.get("body_fat_percent") is None: + print(" - Body Fat % not found", file=sys.stderr) + if d.get("total_mass_lb") is None: + print(" - Total Mass not found", file=sys.stderr) + + print("📊 Computing derived metrics...") measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y") measured_date = convert_date_to_iso(measured_date_raw) total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb) @@ -490,8 +541,16 @@ def main(): md_text = make_markdown(measured_date, d, derived, total_mass) append_markdown(os.path.join(args.outdir, "summary.md"), md_text) - print(f"Wrote files to: {args.outdir}") - print("Files: overall.csv, regional.csv, muscle_balance.csv, overall.json, summary.md") + print(f"\n✅ Success! Wrote files to: {args.outdir}") + print(" 📁 Files created:") + print(" - overall.csv (time-series data)") + print(" - regional.csv (body composition by region)") + print(" - muscle_balance.csv (left/right symmetry)") + print(" - overall.json (structured data)") + print(" - summary.md (readable report)") + print(f"\n 📈 Scan date: {measured_date}") + print(f" 💪 Body fat: {d.get('body_fat_percent')}%") + print(f" 🏋️ FFMI: {derived.get('ffmi')}") if __name__ == "__main__": main()