feat: add comprehensive error handling and validation

- Add input validation for PDF files, height, and weight
- Validate PDF file exists, is a file, and has .pdf extension
- Check height range (36-96 inches) and weight range (50-500 lbs)
- Add warnings for missing critical data
- Improve user feedback with emojis and clear error messages
- Better output formatting with file descriptions
- Catch and handle PDF reading errors gracefully
This commit is contained in:
Mac DeCourcy 2025-10-06 15:24:11 -07:00
parent c7d0255f61
commit d6793e2572

View file

@ -19,11 +19,17 @@ import json
import math
import os
import re
import sys
from datetime import datetime
from pathlib import Path
import pdfplumber
import pandas as pd
class ValidationError(Exception):
"""Custom exception for validation errors"""
pass
def read_pdf_text(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
pages_text = [page.extract_text() or "" for page in pdf.pages]
@ -324,17 +330,62 @@ def make_markdown(measured_date, d, derived, total_mass):
return "\n".join(lines)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pdf", help="Path to DEXA report PDF")
ap.add_argument("--height-in", type=float, required=True, help="Height in inches (Imperial)")
ap.add_argument("--weight-lb", type=float, help="Body weight in lb (optional; used if DEXA total mass missing)")
ap.add_argument("--outdir", default="dexa_out", help="Output directory")
ap = argparse.ArgumentParser(
description="BodySpec Insights - Extract and analyze body composition data from BodySpec DEXA scan PDFs",
epilog="Example: python dexa_extract.py scan.pdf --height-in 74 --weight-lb 212 --outdir ./data/results"
)
ap.add_argument("pdf", help="Path to BodySpec DEXA report PDF")
ap.add_argument("--height-in", type=float, required=True, help="Height in inches (e.g., 6'2\" = 74)")
ap.add_argument("--weight-lb", type=float, help="Body weight in lbs (optional; used if DEXA total mass missing)")
ap.add_argument("--outdir", default="dexa_out", help="Output directory (default: dexa_out)")
args = ap.parse_args()
ensure_outdir(args.outdir)
# Validate PDF file exists
pdf_file = Path(args.pdf)
if not pdf_file.exists():
print(f"❌ Error: PDF file not found: {args.pdf}", file=sys.stderr)
sys.exit(1)
if not pdf_file.is_file():
print(f"❌ Error: Path is not a file: {args.pdf}", file=sys.stderr)
sys.exit(1)
if pdf_file.suffix.lower() != '.pdf':
print(f"❌ Error: File is not a PDF: {args.pdf}", file=sys.stderr)
sys.exit(1)
d = parse_dexa_pdf(args.pdf)
# Validate height
if args.height_in < 36 or args.height_in > 96:
print(f"❌ Error: Height seems unrealistic: {args.height_in} inches (expected 36-96 inches / 3'-8')", file=sys.stderr)
sys.exit(1)
# Validate weight if provided
if args.weight_lb is not None and (args.weight_lb < 50 or args.weight_lb > 500):
print(f"❌ Error: Weight seems unrealistic: {args.weight_lb} lbs (expected 50-500 lbs)", file=sys.stderr)
sys.exit(1)
try:
ensure_outdir(args.outdir)
except PermissionError:
print(f"❌ Error: Cannot create output directory: {args.outdir} (permission denied)", file=sys.stderr)
sys.exit(1)
print(f"📄 Reading PDF: {args.pdf}")
try:
d = parse_dexa_pdf(args.pdf)
except Exception as e:
print(f"❌ Error reading PDF: {e}", file=sys.stderr)
print("This tool is specifically designed for BodySpec PDF reports.", file=sys.stderr)
sys.exit(1)
# Check if critical data was extracted
if d.get("body_fat_percent") is None or d.get("total_mass_lb") is None:
print("⚠️ Warning: Missing critical data from PDF. This may not be a BodySpec report.", file=sys.stderr)
if d.get("body_fat_percent") is None:
print(" - Body Fat % not found", file=sys.stderr)
if d.get("total_mass_lb") is None:
print(" - Total Mass not found", file=sys.stderr)
print("📊 Computing derived metrics...")
measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y")
measured_date = convert_date_to_iso(measured_date_raw)
total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb)
@ -490,8 +541,16 @@ def main():
md_text = make_markdown(measured_date, d, derived, total_mass)
append_markdown(os.path.join(args.outdir, "summary.md"), md_text)
print(f"Wrote files to: {args.outdir}")
print("Files: overall.csv, regional.csv, muscle_balance.csv, overall.json, summary.md")
print(f"\n✅ Success! Wrote files to: {args.outdir}")
print(" 📁 Files created:")
print(" - overall.csv (time-series data)")
print(" - regional.csv (body composition by region)")
print(" - muscle_balance.csv (left/right symmetry)")
print(" - overall.json (structured data)")
print(" - summary.md (readable report)")
print(f"\n 📈 Scan date: {measured_date}")
print(f" 💪 Body fat: {d.get('body_fat_percent')}%")
print(f" 🏋️ FFMI: {derived.get('ffmi')}")
if __name__ == "__main__":
main()