commit c7d0255f6139e8500d4c15e202a52f7cff5c3d4e Author: Mac DeCourcy Date: Mon Oct 6 14:32:25 2025 -0700 Initial commit: BodySpec Insights - comprehensive DEXA analytics tool diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..404841d --- /dev/null +++ b/.gitattributes @@ -0,0 +1,14 @@ +# Normalize line endings +* text=auto + +# Python files +*.py text eol=lf + +# Shell scripts +*.sh text eol=lf + +# Data files +*.csv text eol=lf +*.json text eol=lf +*.md text eol=lf + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e6c34bb --- /dev/null +++ b/.gitignore @@ -0,0 +1,40 @@ +# Python virtual environment +venv/ +env/ +ENV/ +*.pyc +__pycache__/ +*.py[cod] +*$py.class + +# PDF files (sensitive health data) +*.pdf + +# Results and output files (exclude directories but allow README.md) +dexa_out/ +data/pdfs/*.pdf +data/results/*.csv +data/results/*.json +*.csv +*.json + +# Exclude generated markdown but keep README files +summary.md +!README.md + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Logs +*.log + +# Distribution / packaging +dist/ +build/ +*.egg-info/ + diff --git a/README.md b/README.md new file mode 100644 index 0000000..e3dbb3c --- /dev/null +++ b/README.md @@ -0,0 +1,304 @@ +# BodySpec Insights + +**Body composition analytics for BodySpec DEXA scan PDFs** + +A Python tool to extract and analyze body composition data from BodySpec DEXA scan reports. Automatically parses measurements, computes 30+ derived metrics, and tracks your progress over time. + +> **Note:** This tool is specifically designed for BodySpec PDF reports and may not work with other DEXA providers (DexaFit, Hologic, etc.). + +## Features + +- ๐Ÿ“Š **Comprehensive Data Extraction**: Body fat %, lean mass, bone density, regional composition, and more +- ๐Ÿงฎ **Derived Metrics**: Automatically calculates FFMI, FMI, LSTI, SMI, and other body composition indices +- ๐Ÿ“ **Multiple Output Formats**: CSV (for spreadsheet analysis), JSON (for programmatic use), and Markdown (for readable summaries) +- ๐Ÿ“ˆ **Time-Series Ready**: Append mode allows tracking progress across multiple scans +- ๐ŸŽฏ **Regional Analysis**: Breaks down composition by Arms, Legs, Trunk, Android, and Gynoid regions +- โš–๏ธ **Muscle Balance**: Tracks left/right limb symmetry + +## Installation + +### Prerequisites + +- Python 3.7 or higher +- pip (Python package manager) + +### Setup + +1. **Clone or download this repository** + +2. **Create a virtual environment** (recommended): + ```bash + python3 -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` + +3. **Install dependencies**: + ```bash + pip install -r requirements.txt + ``` + + The script requires: + - `pdfplumber` - PDF text extraction + - `pandas` - Data manipulation and CSV handling + +## Usage + +### Basic Command + +```bash +python dexa_extract.py --height-in [--weight-lb ] [--outdir ] +``` + +### Required Arguments + +- `PDF_PATH` - Path to your DEXA scan PDF report +- `--height-in` - Your height in inches + +### Optional Arguments + +- `--weight-lb` - Body weight in pounds (used as fallback if PDF doesn't contain total mass) +- `--outdir` - Output directory for results (default: `dexa_out`) + +### Examples + +**Single scan:** +```bash +python dexa_extract.py data/pdfs/2025-10-06-scan.pdf --height-in 74 --weight-lb 212 --outdir data/results +``` + +**Process multiple scans** (appends to existing files): +```bash +python dexa_extract.py data/pdfs/scan-2025-01.pdf --height-in 74 --outdir data/results +python dexa_extract.py data/pdfs/scan-2025-04.pdf --height-in 74 --outdir data/results +python dexa_extract.py data/pdfs/scan-2025-10.pdf --height-in 74 --outdir data/results +``` + +**Height conversion** (for reference): +- 5'8" = 68 inches +- 5'10" = 70 inches +- 6'0" = 72 inches +- 6'2" = 74 inches +- 6'4" = 76 inches + +## Directory Structure + +``` +bodyspec-insights/ +โ”œโ”€โ”€ dexa_extract.py # Main extraction script +โ”œโ”€โ”€ requirements.txt # Python dependencies +โ”œโ”€โ”€ README.md # This file +โ”œโ”€โ”€ .gitignore # Git ignore patterns +โ”œโ”€โ”€ data/ # Data directory (gitignored) +โ”‚ โ”œโ”€โ”€ pdfs/ # Place your BodySpec PDF reports here +โ”‚ โ””โ”€โ”€ results/ # Results will be saved here +โ””โ”€โ”€ venv/ # Virtual environment (gitignored) +``` + +## Output Files + +The script generates 5 files in the specified output directory: + +### 1. `overall.csv` +Time-series data with one row per scan. Includes all primary metrics and derived indices. + +**Columns:** +- `MeasuredDate` - Scan date (YYYY-MM-DD) +- `Height_in`, `Height_ft_in` - Height measurements +- `Weight_lb_Input`, `DEXA_TotalMass_lb`, `Adjusted_Body_Weight_lb` - Weight data +- `BodyFat_percent`, `LeanMass_percent` - Body composition percentages +- `FatMass_lb`, `LeanSoftTissue_lb`, `BoneMineralContent_lb`, `FatFreeMass_lb` - Mass measurements +- `BMI`, `FFMI`, `FMI`, `LST_Index`, `SMI`, `BMDI` - Normalized indices +- `ALM_lb` - Appendicular lean mass (arms + legs) +- `VAT_Mass_lb`, `VAT_Volume_in3`, `VAT_Index` - Visceral adipose tissue +- `Android_percent`, `Gynoid_percent`, `AG_Ratio` - Fat distribution +- `Trunk_to_Limb_Fat_Ratio` - Central adiposity indicator +- `Arms_Lean_pct`, `Legs_Lean_pct`, `Trunk_Lean_pct` - Regional lean mass distribution +- `Arm_Symmetry_Index`, `Leg_Symmetry_Index` - Left/right balance (50% = perfect) +- `RMR_cal_per_day` - Resting metabolic rate + +### 2. `regional.csv` +Regional body composition breakdown (Arms, Legs, Trunk, Android, Gynoid, Total). + +**Columns:** Region, FatPercent, TotalMass_lb, FatTissue_lb, LeanTissue_lb, BMC_lb + +### 3. `muscle_balance.csv` +Left/right limb comparison for tracking muscle symmetry. + +**Regions:** Arms Total, Right Arm, Left Arm, Legs Total, Right Leg, Left Leg + +### 4. `overall.json` +Structured JSON format containing all extracted data in a hierarchical format. + +**Structure:** +```json +{ + "measured_date": "2025-10-06", + "anthropometrics": { ... }, + "composition": { ... }, + "regional": [ ... ], + "muscle_balance": [ ... ], + "supplemental": { ... }, + "bone_density": { ... } +} +``` + +### 5. `summary.md` +Human-readable Markdown summary of the scan results. + +## Extracted Metrics + +### Primary Measurements +- **Body Fat %** - Percentage of body weight that is fat +- **Lean Mass %** - Percentage of body weight that is lean tissue (complement of body fat %) +- **Fat Mass** - Total weight of fat tissue +- **Lean Soft Tissue** - Muscle, organs, and other non-bone lean tissue +- **Bone Mineral Content (BMC)** - Total bone mineral weight +- **Fat-Free Mass** - Total body weight minus fat mass + +### Derived Indices (Height-Normalized) +- **BMI** - Body Mass Index (standard weight-to-height ratio) +- **FFMI** - Fat-Free Mass Index (normalized muscle mass) +- **FMI** - Fat Mass Index (normalized fat mass) +- **LSTI** - Lean Soft Tissue Index (height-adjusted lean tissue) +- **SMI** - Skeletal Muscle Index (height-adjusted appendicular lean mass) +- **BMDI** - Bone Mineral Density Index (height-adjusted bone content) +- **VAT Index** - Visceral fat normalized by height + +### Regional Analysis +- **Android** - Abdominal/trunk fat (higher risk area) +- **Gynoid** - Hip/thigh fat (lower risk area) +- **A/G Ratio** - Android-to-Gynoid ratio (cardiovascular risk indicator) +- **Trunk-to-Limb Fat Ratio** - Ratio of trunk fat to limb fat (central adiposity indicator) +- **Lean Mass Distribution** - Percentage of total lean mass in arms, legs, and trunk + +### Symmetry & Balance +- **Arm Symmetry Index** - Right-to-left arm lean mass balance (50% = perfect symmetry) +- **Leg Symmetry Index** - Right-to-left leg lean mass balance (50% = perfect symmetry) + +### Supplemental +- **VAT (Visceral Adipose Tissue)** - Deep abdominal fat around organs +- **RMR (Resting Metabolic Rate)** - Estimated daily calorie burn at rest +- **Adjusted Body Weight** - Clinical weight used for medication dosing and nutrition calculations +- **Bone Density** - BMD (g/cmยฒ), T-score, Z-score + +## Understanding Your Results + +### Body Fat % Ranges (by age and sex) + +**Men:** +- Athletes: 6-13% +- Fitness: 14-17% +- Average: 18-24% +- Above Average: 25%+ + +**Women:** +- Athletes: 14-20% +- Fitness: 21-24% +- Average: 25-31% +- Above Average: 32%+ + +### FFMI (Fat-Free Mass Index) + +Normalized measure of muscle mass: +- **16-17**: Below average +- **18-20**: Average/athletic +- **21-23**: Above average/very muscular +- **24-25**: Elite natural bodybuilder range +- **26+**: Typically requires enhanced training + +### A/G Ratio (Android/Gynoid Ratio) + +Fat distribution indicator: +- **< 1.0**: Lower risk (more fat in hips/thighs) +- **1.0-1.5**: Moderate +- **> 1.5**: Higher risk (more abdominal fat) + +### Trunk-to-Limb Fat Ratio + +Central adiposity indicator: +- **< 1.0**: More peripheral fat distribution (healthier) +- **1.0-1.5**: Moderate central fat +- **> 1.5**: High central fat (increased health risk) + +### Symmetry Indices + +Muscle balance between left and right sides: +- **50%**: Perfect symmetry +- **48-52%**: Normal range (slight asymmetry is common) +- **< 48% or > 52%**: Notable imbalance (may indicate injury, overuse, or compensation patterns) + +### VAT Index + +Visceral fat normalized by height: +- **< 0.30**: Low visceral fat +- **0.30-0.50**: Moderate +- **> 0.50**: High (increased metabolic risk) + +### Lean Mass Distribution + +Typical ranges for lean tissue distribution: +- **Arms**: 13-16% of total lean mass +- **Legs**: 32-38% of total lean mass +- **Trunk**: 46-54% of total lean mass + +Higher trunk percentage may indicate good core development, while higher leg percentage suggests strong lower body development. + +## Tracking Progress + +The script appends data to existing CSV files, making it easy to track changes over time: + +1. Place all your DEXA PDFs in `data/pdfs/` +2. Process each one with the same output directory +3. Open `overall.csv` in Excel/Google Sheets to visualize trends +4. Compare `muscle_balance.csv` to track left/right symmetry improvements + +## Privacy & Security + +โš ๏ธ **Important:** DEXA reports contain personal health information (PHI). + +- All PDF files and results are excluded from git via `.gitignore` +- Keep your `data/` directory private +- Don't commit PDFs or output files to version control +- Consider encrypting your data directory if sharing the repository + +## Troubleshooting + +### "Total mass is missing" error +- Ensure your PDF contains a SUMMARY RESULTS table +- Provide `--weight-lb` as a fallback + +### No data extracted or null values +- **Verify your PDF is from BodySpec** - This tool only works with BodySpec reports +- Ensure the PDF is text-based, not a scanned image +- Check that your BodySpec report includes the "SUMMARY RESULTS" table +- Open an issue with a sample (redacted) PDF for support + +### Import errors +- Ensure virtual environment is activated: `source venv/bin/activate` +- Reinstall dependencies: `pip install -r requirements.txt` + +## Contributing + +Contributions welcome! Areas for improvement: + +- [ ] Enhanced error handling and validation +- [ ] Automatic height detection from PDF +- [ ] Data visualization/plotting features +- [ ] GUI interface for non-technical users +- [ ] Batch processing multiple PDFs at once +- [ ] Export to additional formats (Excel, SQLite, etc.) + +## License + +MIT License - feel free to use and modify for personal or commercial use. + +## Acknowledgments + +Built for personal body composition tracking with BodySpec scans. Thanks to BodySpec for providing detailed, consistent DEXA scan reports that make automated analysis possible. + +**Disclaimer:** This is an unofficial, independent tool and is not affiliated with or endorsed by BodySpec. + +--- + +**Questions or issues?** Open an issue on GitHub or contact the maintainer. + diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000..e2e200e --- /dev/null +++ b/data/.gitkeep @@ -0,0 +1,3 @@ +# This file ensures the data directory structure is preserved in git +# while keeping the actual PDF and results files private (see .gitignore) + diff --git a/data/pdfs/.gitkeep b/data/pdfs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/pdfs/README.md b/data/pdfs/README.md new file mode 100644 index 0000000..4239076 --- /dev/null +++ b/data/pdfs/README.md @@ -0,0 +1,17 @@ +# PDFs Directory + +Place your BodySpec DEXA scan PDF reports in this directory. + +## Example + +``` +data/pdfs/ +โ”œโ”€โ”€ 2025-01-15-scan.pdf +โ”œโ”€โ”€ 2025-04-20-scan.pdf +โ””โ”€โ”€ 2025-10-06-scan.pdf +``` + +## Note + +โš ๏ธ **PDF files are gitignored** - They won't be committed to version control to protect your personal health information. + diff --git a/data/results/.gitkeep b/data/results/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/data/results/README.md b/data/results/README.md new file mode 100644 index 0000000..85e71f3 --- /dev/null +++ b/data/results/README.md @@ -0,0 +1,18 @@ +# Results Directory + +Your extracted DEXA data will be saved here by default. + +## Output Files + +When you run the extraction script with `--outdir data/results`, you'll get: + +- `overall.csv` - Time-series data (one row per scan) +- `regional.csv` - Regional body composition +- `muscle_balance.csv` - Left/right limb comparison +- `overall.json` - Structured JSON format +- `summary.md` - Human-readable summary + +## Note + +โš ๏ธ **Result files are gitignored** - They contain your personal health data and won't be committed to version control. + diff --git a/dexa_extract.py b/dexa_extract.py new file mode 100644 index 0000000..f229b89 --- /dev/null +++ b/dexa_extract.py @@ -0,0 +1,497 @@ +#!/usr/bin/env python3 +""" +BodySpec Insights - Body composition analytics for BodySpec DEXA scan PDFs + +Extract measurements from BodySpec DEXA reports, compute 30+ derived metrics, +and output structured data for progress tracking. + +Usage: + python dexa_extract.py /path/to/bodyspec-report.pdf --height-in 74 --weight-lb 212 --outdir ./data/results + +Note: This script is specifically designed for BodySpec PDF reports. + +Requires: + pip install pdfplumber pandas +""" + +import argparse +import json +import math +import os +import re +from datetime import datetime + +import pdfplumber +import pandas as pd + +def read_pdf_text(pdf_path): + with pdfplumber.open(pdf_path) as pdf: + pages_text = [page.extract_text() or "" for page in pdf.pages] + return "\n".join(pages_text) + +def find_one(pattern, text, cast=float, flags=re.IGNORECASE): + m = re.search(pattern, text, flags) + if not m: + return None + val = m.group(1).replace(",", "").strip() + return cast(val) if cast else val + +def convert_date_to_iso(date_str): + """Convert MM/DD/YYYY to YYYY-MM-DD""" + if not date_str: + return None + try: + dt = datetime.strptime(date_str, "%m/%d/%Y") + return dt.strftime("%Y-%m-%d") + except: + return date_str + +def inches_to_ft_in(inches): + """Convert inches to feet'inches" format""" + if inches is None: + return None + feet = int(inches // 12) + remaining_inches = int(inches % 12) + return f"{feet}'{remaining_inches}\"" + +def parse_regional_table(text): + regions = ["Arms", "Legs", "Trunk", "Android", "Gynoid", "Total"] + out = {} + for r in regions: + # Example line: Arms 22.1% 27.4 6.0 20.2 1.1 + pattern = rf"{r}\s+([\d\.]+)%\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)" + m = re.search(pattern, text) + if m: + out[r] = { + "fat_percent": float(m.group(1)), + "total_mass_lb": float(m.group(2)), + "fat_tissue_lb": float(m.group(3)), + "lean_tissue_lb": float(m.group(4)), + "bmc_lb": float(m.group(5)), + } + return out + +def parse_muscle_balance(text): + names = ["Arms Total", "Right Arm", "Left Arm", "Legs Total", "Right Leg", "Left Leg"] + out = {} + for n in names: + # Example: Right Arm 20.4 13.7 2.8 10.3 0.6 + pattern = rf"{n}\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)" + m = re.search(pattern, text) + if m: + out[n] = { + "fat_percent": float(m.group(1)), + "total_mass_lb": float(m.group(2)), + "fat_mass_lb": float(m.group(3)), + "lean_mass_lb": float(m.group(4)), + "bmc_lb": float(m.group(5)), + } + return out + +def parse_bone_density_total(text): + # Example: Total 1.280 0.8 0.8 + m = re.search(r"Total\s+([\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)", text) + if m: + return { + "total_bmd_g_per_cm2": float(m.group(1)), + "young_adult_t_score": float(m.group(2)), + "age_matched_z_score": float(m.group(3)), + } + return {} + +def parse_dexa_pdf(pdf_path): + text = read_pdf_text(pdf_path) + + data = {} + data["measured_date"] = find_one(r"Measured Date\s+([\d/]+)", text, cast=str) + + # First try to extract from SUMMARY RESULTS table (more reliable) + # Pattern: 10/6/2025 27.8% 211.6 58.8 145.4 7.4 + summary_pattern = r"(\d{1,2}/\d{1,2}/\d{4})\s+([\d\.]+)%\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)" + summary_match = re.search(summary_pattern, text) + if summary_match: + data["body_fat_percent"] = float(summary_match.group(2)) + data["total_mass_lb"] = float(summary_match.group(3)) + data["fat_mass_lb"] = float(summary_match.group(4)) + data["lean_soft_tissue_lb"] = float(summary_match.group(5)) + data["bmc_lb"] = float(summary_match.group(6)) + else: + # Fallback to individual patterns + data["body_fat_percent"] = find_one(r"Total Body Fat %\s+([\d\.]+)", text) + data["total_mass_lb"] = find_one(r"Total Mass.*?\(lbs\)\s+([\d\.]+)", text) + data["fat_mass_lb"] = find_one(r"Fat Tissue \(lbs\)\s+([\d\.]+)", text) + data["lean_soft_tissue_lb"] = find_one(r"Lean Tissue \(lbs\)\s+([\d\.]+)", text) + data["bmc_lb"] = find_one(r"Bone Mineral\s+Content \(BMC\)\s+([\d\.]+)", text) + + # Supplemental + data["android_percent"] = find_one(r"Android.*?([\d\.]+)%", text) + data["gynoid_percent"] = find_one(r"Gynoid.*?([\d\.]+)%", text) + data["rmr_cal_per_day"] = find_one(r"([\d,]+)\s*cal/day", text, cast=lambda s: int(s.replace(",", ""))) + + # A/G Ratio appears after RMR, Android%, Gynoid% on same line: "1,778 cal/day 36.5% 27.8% 1.31" + ag_match = re.search(r"[\d,]+\s*cal/day\s+([\d\.]+)%\s+([\d\.]+)%\s+([\d\.]+)", text) + if ag_match: + data["ag_ratio"] = float(ag_match.group(3)) + else: + data["ag_ratio"] = find_one(r"A/G Ratio\s+([\d\.]+)", text) + + data["vat_mass_lb"] = find_one(r"Mass \(lbs\)\s+([\d\.]+)", text) + data["vat_volume_in3"] = find_one(r"Volume \(in3\)\s+([\d\.]+)", text) + + # Tables + data["regional"] = parse_regional_table(text) + data["muscle_balance"] = parse_muscle_balance(text) + data["bone_density"] = parse_bone_density_total(text) + + return data + +def compute_derived(d, height_in, weight_lb=None): + # Prefer DEXA total mass if available + total_mass = d.get("total_mass_lb") or weight_lb + if total_mass is None: + raise ValueError("Total mass is missing; pass --weight-lb if the PDF lacks it.") + + fm = d.get("fat_mass_lb") + lst = d.get("lean_soft_tissue_lb") + bmc = d.get("bmc_lb") + bf_pct = d.get("body_fat_percent") + + ffm = None + if fm is not None: + ffm = total_mass - fm + elif lst is not None and bmc is not None: + ffm = lst + bmc + + def idx(value_lb): + return round(703.0 * value_lb / (height_in ** 2), 2) + + derived = { + "height_in": height_in, + "height_ft_in": inches_to_ft_in(height_in), + "weight_input_lb": weight_lb, + "bmi": round(703.0 * total_mass / (height_in ** 2), 1), + "fat_free_mass_lb": round(ffm, 1) if ffm is not None else None, + "ffmi": idx(ffm) if ffm is not None else None, + "fmi": idx(fm) if fm is not None else None, + "lsti": idx(lst) if lst is not None else None, + "alm_lb": None, + "smi": None, + } + + # Lean mass percentage (complement of body fat %) + if bf_pct is not None: + derived["lean_mass_percent"] = round(100 - bf_pct, 1) + else: + derived["lean_mass_percent"] = None + + # ALM from regional lean masses + arms_lean = d.get("regional", {}).get("Arms", {}).get("lean_tissue_lb") + legs_lean = d.get("regional", {}).get("Legs", {}).get("lean_tissue_lb") + trunk_lean = d.get("regional", {}).get("Trunk", {}).get("lean_tissue_lb") + + if arms_lean is not None and legs_lean is not None: + alm = arms_lean + legs_lean + derived["alm_lb"] = round(alm, 1) + derived["smi"] = idx(alm) + + # Regional lean mass distribution + if lst is not None and arms_lean is not None and legs_lean is not None and trunk_lean is not None: + derived["arms_lean_pct"] = round(100 * arms_lean / lst, 1) + derived["legs_lean_pct"] = round(100 * legs_lean / lst, 1) + derived["trunk_lean_pct"] = round(100 * trunk_lean / lst, 1) + else: + derived["arms_lean_pct"] = None + derived["legs_lean_pct"] = None + derived["trunk_lean_pct"] = None + + # Trunk-to-limb fat ratio (health risk indicator) + trunk_fat = d.get("regional", {}).get("Trunk", {}).get("fat_tissue_lb") + arms_fat = d.get("regional", {}).get("Arms", {}).get("fat_tissue_lb") + legs_fat = d.get("regional", {}).get("Legs", {}).get("fat_tissue_lb") + + if trunk_fat is not None and arms_fat is not None and legs_fat is not None: + limb_fat = arms_fat + legs_fat + if limb_fat > 0: + derived["trunk_to_limb_fat_ratio"] = round(trunk_fat / limb_fat, 2) + else: + derived["trunk_to_limb_fat_ratio"] = None + else: + derived["trunk_to_limb_fat_ratio"] = None + + # Limb symmetry indices (balance indicators) + mb = d.get("muscle_balance", {}) + right_arm = mb.get("Right Arm", {}).get("lean_mass_lb") + left_arm = mb.get("Left Arm", {}).get("lean_mass_lb") + right_leg = mb.get("Right Leg", {}).get("lean_mass_lb") + left_leg = mb.get("Left Leg", {}).get("lean_mass_lb") + + if right_arm is not None and left_arm is not None and right_arm + left_arm > 0: + # Symmetry: 100 = perfect, <100 = left stronger, >100 = right stronger + derived["arm_symmetry_index"] = round(100 * right_arm / (right_arm + left_arm), 1) + else: + derived["arm_symmetry_index"] = None + + if right_leg is not None and left_leg is not None and right_leg + left_leg > 0: + derived["leg_symmetry_index"] = round(100 * right_leg / (right_leg + left_leg), 1) + else: + derived["leg_symmetry_index"] = None + + # VAT Index (normalized by height squared, like BMI) + vat_mass = d.get("vat_mass_lb") + if vat_mass is not None: + derived["vat_index"] = idx(vat_mass) + else: + derived["vat_index"] = None + + # Bone Mineral Density Index (BMC normalized by height) + if bmc is not None: + derived["bmdi"] = idx(bmc) + else: + derived["bmdi"] = None + + # Adjusted Body Weight (used in nutrition/health calculations) + # ABW = IBW + 0.4 * (actual weight - IBW), where IBW differs by sex + # For simplicity, using a unisex approximation: IBW โ‰ˆ height_in * 2.3 - 100 (rough estimate) + if total_mass is not None: + ibw_estimate = height_in * 2.3 - 100 + if total_mass > ibw_estimate: + derived["adjusted_body_weight_lb"] = round(ibw_estimate + 0.4 * (total_mass - ibw_estimate), 1) + else: + derived["adjusted_body_weight_lb"] = round(total_mass, 1) + else: + derived["adjusted_body_weight_lb"] = None + + return total_mass, derived + +def ensure_outdir(outdir): + os.makedirs(outdir, exist_ok=True) + +def write_or_append_csv(path, row_dict, columns): + df_row = pd.DataFrame([{k: row_dict.get(k) for k in columns}]) + if os.path.exists(path): + df_row.to_csv(path, mode="a", header=False, index=False) + else: + df_row.to_csv(path, index=False) + +def write_or_append_json(path, obj): + if os.path.exists(path): + with open(path, "r") as f: + try: + data = json.load(f) + except json.JSONDecodeError: + data = [] + else: + data = [] + if isinstance(data, dict): + # convert to list of entries if previous file was a single dict + data = [data] + data.append(obj) + with open(path, "w") as f: + json.dump(data, f, indent=2) + +def append_markdown(path, md_text): + mode = "a" if os.path.exists(path) else "w" + with open(path, mode) as f: + f.write(md_text.strip() + "\n\n") + +def make_markdown(measured_date, d, derived, total_mass): + lines = [] + lines.append(f"# DEXA Summary โ€” {measured_date}") + lines.append("") + lines.append(f"- Height: {derived['height_in']} in") + lines.append(f"- Weight: {round(total_mass, 1)} lb") + if d.get("body_fat_percent") is not None and d.get("fat_mass_lb") is not None: + lines.append(f"- Body fat: {d['body_fat_percent']}% ({d['fat_mass_lb']} lb)") + if d.get("lean_soft_tissue_lb") is not None: + lines.append(f"- Lean soft tissue: {d['lean_soft_tissue_lb']} lb") + if d.get("bmc_lb") is not None: + lines.append(f"- Bone mineral content: {d['bmc_lb']} lb") + lines.append(f"- Fatโ€‘free mass: {derived.get('fat_free_mass_lb')}") + lines.append(f"- BMI: {derived['bmi']}") + lines.append(f"- FFMI: {derived.get('ffmi')}; FMI: {derived.get('fmi')}; Lean Soft Tissue Index: {derived.get('lsti')}") + if derived.get("alm_lb") is not None: + lines.append(f"- Appendicular Lean Mass: {derived['alm_lb']} lb; Skeletal Muscle Index: {derived['smi']}") + if d.get("android_percent") is not None and d.get("gynoid_percent") is not None and d.get("ag_ratio") is not None: + lines.append(f"- Android: {d['android_percent']}%; Gynoid: {d['gynoid_percent']}%; A/G ratio: {d['ag_ratio']}") + if d.get("vat_mass_lb") is not None and d.get("vat_volume_in3") is not None: + lines.append(f"- VAT: {d['vat_mass_lb']} lb ({d['vat_volume_in3']} inยณ)") + if d.get("rmr_cal_per_day") is not None: + lines.append(f"- RMR: {d['rmr_cal_per_day']} cal/day") + lines.append("") + lines.append("## Regional") + for name, r in d.get("regional", {}).items(): + lines.append(f"- {name}: {r['fat_percent']}% fat; {r['total_mass_lb']} lb total; {r['fat_tissue_lb']} lb fat; {r['lean_tissue_lb']} lb lean; {r['bmc_lb']} lb BMC") + return "\n".join(lines) + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("pdf", help="Path to DEXA report PDF") + ap.add_argument("--height-in", type=float, required=True, help="Height in inches (Imperial)") + ap.add_argument("--weight-lb", type=float, help="Body weight in lb (optional; used if DEXA total mass missing)") + ap.add_argument("--outdir", default="dexa_out", help="Output directory") + args = ap.parse_args() + + ensure_outdir(args.outdir) + + d = parse_dexa_pdf(args.pdf) + + measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y") + measured_date = convert_date_to_iso(measured_date_raw) + total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb) + + # Overall CSV row + overall_cols = [ + "MeasuredDate","Height_in","Height_ft_in","Weight_lb_Input","DEXA_TotalMass_lb","BodyFat_percent", + "LeanMass_percent","FatMass_lb","LeanSoftTissue_lb","BoneMineralContent_lb","FatFreeMass_lb", + "BMI","FFMI","FMI","LST_Index","ALM_lb","SMI","VAT_Mass_lb","VAT_Volume_in3","VAT_Index", + "BMDI","Android_percent","Gynoid_percent","AG_Ratio","Trunk_to_Limb_Fat_Ratio", + "Arms_Lean_pct","Legs_Lean_pct","Trunk_Lean_pct","Arm_Symmetry_Index","Leg_Symmetry_Index", + "Adjusted_Body_Weight_lb","RMR_cal_per_day" + ] + overall_row = { + "MeasuredDate": measured_date, + "Height_in": derived["height_in"], + "Height_ft_in": derived["height_ft_in"], + "Weight_lb_Input": derived["weight_input_lb"], + "DEXA_TotalMass_lb": round(total_mass, 1), + "BodyFat_percent": d.get("body_fat_percent"), + "LeanMass_percent": derived.get("lean_mass_percent"), + "FatMass_lb": d.get("fat_mass_lb"), + "LeanSoftTissue_lb": d.get("lean_soft_tissue_lb"), + "BoneMineralContent_lb": d.get("bmc_lb"), + "FatFreeMass_lb": derived.get("fat_free_mass_lb"), + "BMI": derived["bmi"], + "FFMI": derived.get("ffmi"), + "FMI": derived.get("fmi"), + "LST_Index": derived.get("lsti"), + "ALM_lb": derived.get("alm_lb"), + "SMI": derived.get("smi"), + "VAT_Mass_lb": d.get("vat_mass_lb"), + "VAT_Volume_in3": d.get("vat_volume_in3"), + "VAT_Index": derived.get("vat_index"), + "BMDI": derived.get("bmdi"), + "Android_percent": d.get("android_percent"), + "Gynoid_percent": d.get("gynoid_percent"), + "AG_Ratio": d.get("ag_ratio"), + "Trunk_to_Limb_Fat_Ratio": derived.get("trunk_to_limb_fat_ratio"), + "Arms_Lean_pct": derived.get("arms_lean_pct"), + "Legs_Lean_pct": derived.get("legs_lean_pct"), + "Trunk_Lean_pct": derived.get("trunk_lean_pct"), + "Arm_Symmetry_Index": derived.get("arm_symmetry_index"), + "Leg_Symmetry_Index": derived.get("leg_symmetry_index"), + "Adjusted_Body_Weight_lb": derived.get("adjusted_body_weight_lb"), + "RMR_cal_per_day": d.get("rmr_cal_per_day"), + } + write_or_append_csv(os.path.join(args.outdir, "overall.csv"), overall_row, overall_cols) + + # Regional table + regional_cols = ["Region","FatPercent","TotalMass_lb","FatTissue_lb","LeanTissue_lb","BMC_lb"] + reg_rows = [] + for name, r in d.get("regional", {}).items(): + reg_rows.append({ + "Region": name, + "FatPercent": r["fat_percent"], + "TotalMass_lb": r["total_mass_lb"], + "FatTissue_lb": r["fat_tissue_lb"], + "LeanTissue_lb": r["lean_tissue_lb"], + "BMC_lb": r["bmc_lb"], + }) + regional_path = os.path.join(args.outdir, "regional.csv") + if os.path.exists(regional_path): + pd.DataFrame(reg_rows).to_csv(regional_path, mode="a", header=False, index=False) + else: + pd.DataFrame(reg_rows).to_csv(regional_path, index=False) + + # Muscle balance + mb_cols = ["Region","FatPercent","TotalMass_lb","FatMass_lb","LeanMass_lb","BMC_lb"] + mb_rows = [] + for name, r in d.get("muscle_balance", {}).items(): + mb_rows.append({ + "Region": name, + "FatPercent": r["fat_percent"], + "TotalMass_lb": r["total_mass_lb"], + "FatMass_lb": r["fat_mass_lb"], + "LeanMass_lb": r["lean_mass_lb"], + "BMC_lb": r["bmc_lb"], + }) + mb_path = os.path.join(args.outdir, "muscle_balance.csv") + if os.path.exists(mb_path): + pd.DataFrame(mb_rows).to_csv(mb_path, mode="a", header=False, index=False) + else: + pd.DataFrame(mb_rows).to_csv(mb_path, index=False) + + # JSON (overall structured object) + # Convert regional and muscle_balance dicts to arrays + regional_array = [ + {"region": name, **data} + for name, data in d.get("regional", {}).items() + ] + muscle_balance_array = [ + {"region": name, **data} + for name, data in d.get("muscle_balance", {}).items() + ] + + overall_json = { + "measured_date": measured_date, + "anthropometrics": { + "height_in": derived["height_in"], + "height_ft_in": derived["height_ft_in"], + "weight_input_lb": derived["weight_input_lb"], + "dexa_total_mass_lb": round(total_mass, 1), + "adjusted_body_weight_lb": derived.get("adjusted_body_weight_lb"), + "bmi": derived["bmi"] + }, + "composition": { + "body_fat_percent": d.get("body_fat_percent"), + "lean_mass_percent": derived.get("lean_mass_percent"), + "fat_mass_lb": d.get("fat_mass_lb"), + "lean_soft_tissue_lb": d.get("lean_soft_tissue_lb"), + "bone_mineral_content_lb": d.get("bmc_lb"), + "fat_free_mass_lb": derived.get("fat_free_mass_lb"), + "derived_indices": { + "ffmi": derived.get("ffmi"), + "fmi": derived.get("fmi"), + "lsti": derived.get("lsti"), + "alm_lb": derived.get("alm_lb"), + "smi": derived.get("smi"), + "bmdi": derived.get("bmdi") + } + }, + "regional": regional_array, + "regional_analysis": { + "trunk_to_limb_fat_ratio": derived.get("trunk_to_limb_fat_ratio"), + "lean_mass_distribution": { + "arms_percent": derived.get("arms_lean_pct"), + "legs_percent": derived.get("legs_lean_pct"), + "trunk_percent": derived.get("trunk_lean_pct") + } + }, + "muscle_balance": muscle_balance_array, + "symmetry_indices": { + "arm_symmetry_index": derived.get("arm_symmetry_index"), + "leg_symmetry_index": derived.get("leg_symmetry_index") + }, + "supplemental": { + "android_percent": d.get("android_percent"), + "gynoid_percent": d.get("gynoid_percent"), + "ag_ratio": d.get("ag_ratio"), + "vat": { + "mass_lb": d.get("vat_mass_lb"), + "volume_in3": d.get("vat_volume_in3"), + "vat_index": derived.get("vat_index") + }, + "rmr_cal_per_day": d.get("rmr_cal_per_day") + }, + "bone_density": d.get("bone_density", {}) + } + write_or_append_json(os.path.join(args.outdir, "overall.json"), overall_json) + + # Markdown summary (append) + md_text = make_markdown(measured_date, d, derived, total_mass) + append_markdown(os.path.join(args.outdir, "summary.md"), md_text) + + print(f"Wrote files to: {args.outdir}") + print("Files: overall.csv, regional.csv, muscle_balance.csv, overall.json, summary.md") + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..686178a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +cffi==2.0.0 +charset-normalizer==3.4.3 +cryptography==46.0.2 +numpy==2.3.3 +pandas==2.3.3 +pdfminer.six==20250506 +pdfplumber==0.11.7 +pillow==11.3.0 +pycparser==2.23 +pypdfium2==4.30.0 +python-dateutil==2.9.0.post0 +pytz==2025.2 +six==1.17.0 +tzdata==2025.2