feat: add comprehensive error handling and validation

- Add input validation for PDF files, height, and weight
- Validate PDF file exists, is a file, and has .pdf extension
- Check height range (36-96 inches) and weight range (50-500 lbs)
- Add warnings for missing critical data
- Improve user feedback with emojis and clear error messages
- Better output formatting with file descriptions
- Catch and handle PDF reading errors gracefully
This commit is contained in:
Mac DeCourcy 2025-10-06 15:24:11 -07:00
parent c7d0255f61
commit d6793e2572

View file

@ -19,11 +19,17 @@ import json
import math import math
import os import os
import re import re
import sys
from datetime import datetime from datetime import datetime
from pathlib import Path
import pdfplumber import pdfplumber
import pandas as pd import pandas as pd
class ValidationError(Exception):
"""Custom exception for validation errors"""
pass
def read_pdf_text(pdf_path): def read_pdf_text(pdf_path):
with pdfplumber.open(pdf_path) as pdf: with pdfplumber.open(pdf_path) as pdf:
pages_text = [page.extract_text() or "" for page in pdf.pages] pages_text = [page.extract_text() or "" for page in pdf.pages]
@ -324,17 +330,62 @@ def make_markdown(measured_date, d, derived, total_mass):
return "\n".join(lines) return "\n".join(lines)
def main(): def main():
ap = argparse.ArgumentParser() ap = argparse.ArgumentParser(
ap.add_argument("pdf", help="Path to DEXA report PDF") description="BodySpec Insights - Extract and analyze body composition data from BodySpec DEXA scan PDFs",
ap.add_argument("--height-in", type=float, required=True, help="Height in inches (Imperial)") epilog="Example: python dexa_extract.py scan.pdf --height-in 74 --weight-lb 212 --outdir ./data/results"
ap.add_argument("--weight-lb", type=float, help="Body weight in lb (optional; used if DEXA total mass missing)") )
ap.add_argument("--outdir", default="dexa_out", help="Output directory") ap.add_argument("pdf", help="Path to BodySpec DEXA report PDF")
ap.add_argument("--height-in", type=float, required=True, help="Height in inches (e.g., 6'2\" = 74)")
ap.add_argument("--weight-lb", type=float, help="Body weight in lbs (optional; used if DEXA total mass missing)")
ap.add_argument("--outdir", default="dexa_out", help="Output directory (default: dexa_out)")
args = ap.parse_args() args = ap.parse_args()
ensure_outdir(args.outdir) # Validate PDF file exists
pdf_file = Path(args.pdf)
if not pdf_file.exists():
print(f"❌ Error: PDF file not found: {args.pdf}", file=sys.stderr)
sys.exit(1)
if not pdf_file.is_file():
print(f"❌ Error: Path is not a file: {args.pdf}", file=sys.stderr)
sys.exit(1)
if pdf_file.suffix.lower() != '.pdf':
print(f"❌ Error: File is not a PDF: {args.pdf}", file=sys.stderr)
sys.exit(1)
d = parse_dexa_pdf(args.pdf) # Validate height
if args.height_in < 36 or args.height_in > 96:
print(f"❌ Error: Height seems unrealistic: {args.height_in} inches (expected 36-96 inches / 3'-8')", file=sys.stderr)
sys.exit(1)
# Validate weight if provided
if args.weight_lb is not None and (args.weight_lb < 50 or args.weight_lb > 500):
print(f"❌ Error: Weight seems unrealistic: {args.weight_lb} lbs (expected 50-500 lbs)", file=sys.stderr)
sys.exit(1)
try:
ensure_outdir(args.outdir)
except PermissionError:
print(f"❌ Error: Cannot create output directory: {args.outdir} (permission denied)", file=sys.stderr)
sys.exit(1)
print(f"📄 Reading PDF: {args.pdf}")
try:
d = parse_dexa_pdf(args.pdf)
except Exception as e:
print(f"❌ Error reading PDF: {e}", file=sys.stderr)
print("This tool is specifically designed for BodySpec PDF reports.", file=sys.stderr)
sys.exit(1)
# Check if critical data was extracted
if d.get("body_fat_percent") is None or d.get("total_mass_lb") is None:
print("⚠️ Warning: Missing critical data from PDF. This may not be a BodySpec report.", file=sys.stderr)
if d.get("body_fat_percent") is None:
print(" - Body Fat % not found", file=sys.stderr)
if d.get("total_mass_lb") is None:
print(" - Total Mass not found", file=sys.stderr)
print("📊 Computing derived metrics...")
measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y") measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y")
measured_date = convert_date_to_iso(measured_date_raw) measured_date = convert_date_to_iso(measured_date_raw)
total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb) total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb)
@ -490,8 +541,16 @@ def main():
md_text = make_markdown(measured_date, d, derived, total_mass) md_text = make_markdown(measured_date, d, derived, total_mass)
append_markdown(os.path.join(args.outdir, "summary.md"), md_text) append_markdown(os.path.join(args.outdir, "summary.md"), md_text)
print(f"Wrote files to: {args.outdir}") print(f"\n✅ Success! Wrote files to: {args.outdir}")
print("Files: overall.csv, regional.csv, muscle_balance.csv, overall.json, summary.md") print(" 📁 Files created:")
print(" - overall.csv (time-series data)")
print(" - regional.csv (body composition by region)")
print(" - muscle_balance.csv (left/right symmetry)")
print(" - overall.json (structured data)")
print(" - summary.md (readable report)")
print(f"\n 📈 Scan date: {measured_date}")
print(f" 💪 Body fat: {d.get('body_fat_percent')}%")
print(f" 🏋️ FFMI: {derived.get('ffmi')}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()