Initial commit: BodySpec Insights - comprehensive DEXA analytics tool

This commit is contained in:
Mac DeCourcy 2025-10-06 14:32:25 -07:00
commit c7d0255f61
10 changed files with 907 additions and 0 deletions

497
dexa_extract.py Normal file
View file

@ -0,0 +1,497 @@
#!/usr/bin/env python3
"""
BodySpec Insights - Body composition analytics for BodySpec DEXA scan PDFs
Extract measurements from BodySpec DEXA reports, compute 30+ derived metrics,
and output structured data for progress tracking.
Usage:
python dexa_extract.py /path/to/bodyspec-report.pdf --height-in 74 --weight-lb 212 --outdir ./data/results
Note: This script is specifically designed for BodySpec PDF reports.
Requires:
pip install pdfplumber pandas
"""
import argparse
import json
import math
import os
import re
from datetime import datetime
import pdfplumber
import pandas as pd
def read_pdf_text(pdf_path):
with pdfplumber.open(pdf_path) as pdf:
pages_text = [page.extract_text() or "" for page in pdf.pages]
return "\n".join(pages_text)
def find_one(pattern, text, cast=float, flags=re.IGNORECASE):
m = re.search(pattern, text, flags)
if not m:
return None
val = m.group(1).replace(",", "").strip()
return cast(val) if cast else val
def convert_date_to_iso(date_str):
"""Convert MM/DD/YYYY to YYYY-MM-DD"""
if not date_str:
return None
try:
dt = datetime.strptime(date_str, "%m/%d/%Y")
return dt.strftime("%Y-%m-%d")
except:
return date_str
def inches_to_ft_in(inches):
"""Convert inches to feet'inches" format"""
if inches is None:
return None
feet = int(inches // 12)
remaining_inches = int(inches % 12)
return f"{feet}'{remaining_inches}\""
def parse_regional_table(text):
regions = ["Arms", "Legs", "Trunk", "Android", "Gynoid", "Total"]
out = {}
for r in regions:
# Example line: Arms 22.1% 27.4 6.0 20.2 1.1
pattern = rf"{r}\s+([\d\.]+)%\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)"
m = re.search(pattern, text)
if m:
out[r] = {
"fat_percent": float(m.group(1)),
"total_mass_lb": float(m.group(2)),
"fat_tissue_lb": float(m.group(3)),
"lean_tissue_lb": float(m.group(4)),
"bmc_lb": float(m.group(5)),
}
return out
def parse_muscle_balance(text):
names = ["Arms Total", "Right Arm", "Left Arm", "Legs Total", "Right Leg", "Left Leg"]
out = {}
for n in names:
# Example: Right Arm 20.4 13.7 2.8 10.3 0.6
pattern = rf"{n}\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)"
m = re.search(pattern, text)
if m:
out[n] = {
"fat_percent": float(m.group(1)),
"total_mass_lb": float(m.group(2)),
"fat_mass_lb": float(m.group(3)),
"lean_mass_lb": float(m.group(4)),
"bmc_lb": float(m.group(5)),
}
return out
def parse_bone_density_total(text):
# Example: Total 1.280 0.8 0.8
m = re.search(r"Total\s+([\d\.]+)\s+([-\d\.]+)\s+([-\d\.]+)", text)
if m:
return {
"total_bmd_g_per_cm2": float(m.group(1)),
"young_adult_t_score": float(m.group(2)),
"age_matched_z_score": float(m.group(3)),
}
return {}
def parse_dexa_pdf(pdf_path):
text = read_pdf_text(pdf_path)
data = {}
data["measured_date"] = find_one(r"Measured Date\s+([\d/]+)", text, cast=str)
# First try to extract from SUMMARY RESULTS table (more reliable)
# Pattern: 10/6/2025 27.8% 211.6 58.8 145.4 7.4
summary_pattern = r"(\d{1,2}/\d{1,2}/\d{4})\s+([\d\.]+)%\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)"
summary_match = re.search(summary_pattern, text)
if summary_match:
data["body_fat_percent"] = float(summary_match.group(2))
data["total_mass_lb"] = float(summary_match.group(3))
data["fat_mass_lb"] = float(summary_match.group(4))
data["lean_soft_tissue_lb"] = float(summary_match.group(5))
data["bmc_lb"] = float(summary_match.group(6))
else:
# Fallback to individual patterns
data["body_fat_percent"] = find_one(r"Total Body Fat %\s+([\d\.]+)", text)
data["total_mass_lb"] = find_one(r"Total Mass.*?\(lbs\)\s+([\d\.]+)", text)
data["fat_mass_lb"] = find_one(r"Fat Tissue \(lbs\)\s+([\d\.]+)", text)
data["lean_soft_tissue_lb"] = find_one(r"Lean Tissue \(lbs\)\s+([\d\.]+)", text)
data["bmc_lb"] = find_one(r"Bone Mineral\s+Content \(BMC\)\s+([\d\.]+)", text)
# Supplemental
data["android_percent"] = find_one(r"Android.*?([\d\.]+)%", text)
data["gynoid_percent"] = find_one(r"Gynoid.*?([\d\.]+)%", text)
data["rmr_cal_per_day"] = find_one(r"([\d,]+)\s*cal/day", text, cast=lambda s: int(s.replace(",", "")))
# A/G Ratio appears after RMR, Android%, Gynoid% on same line: "1,778 cal/day 36.5% 27.8% 1.31"
ag_match = re.search(r"[\d,]+\s*cal/day\s+([\d\.]+)%\s+([\d\.]+)%\s+([\d\.]+)", text)
if ag_match:
data["ag_ratio"] = float(ag_match.group(3))
else:
data["ag_ratio"] = find_one(r"A/G Ratio\s+([\d\.]+)", text)
data["vat_mass_lb"] = find_one(r"Mass \(lbs\)\s+([\d\.]+)", text)
data["vat_volume_in3"] = find_one(r"Volume \(in3\)\s+([\d\.]+)", text)
# Tables
data["regional"] = parse_regional_table(text)
data["muscle_balance"] = parse_muscle_balance(text)
data["bone_density"] = parse_bone_density_total(text)
return data
def compute_derived(d, height_in, weight_lb=None):
# Prefer DEXA total mass if available
total_mass = d.get("total_mass_lb") or weight_lb
if total_mass is None:
raise ValueError("Total mass is missing; pass --weight-lb if the PDF lacks it.")
fm = d.get("fat_mass_lb")
lst = d.get("lean_soft_tissue_lb")
bmc = d.get("bmc_lb")
bf_pct = d.get("body_fat_percent")
ffm = None
if fm is not None:
ffm = total_mass - fm
elif lst is not None and bmc is not None:
ffm = lst + bmc
def idx(value_lb):
return round(703.0 * value_lb / (height_in ** 2), 2)
derived = {
"height_in": height_in,
"height_ft_in": inches_to_ft_in(height_in),
"weight_input_lb": weight_lb,
"bmi": round(703.0 * total_mass / (height_in ** 2), 1),
"fat_free_mass_lb": round(ffm, 1) if ffm is not None else None,
"ffmi": idx(ffm) if ffm is not None else None,
"fmi": idx(fm) if fm is not None else None,
"lsti": idx(lst) if lst is not None else None,
"alm_lb": None,
"smi": None,
}
# Lean mass percentage (complement of body fat %)
if bf_pct is not None:
derived["lean_mass_percent"] = round(100 - bf_pct, 1)
else:
derived["lean_mass_percent"] = None
# ALM from regional lean masses
arms_lean = d.get("regional", {}).get("Arms", {}).get("lean_tissue_lb")
legs_lean = d.get("regional", {}).get("Legs", {}).get("lean_tissue_lb")
trunk_lean = d.get("regional", {}).get("Trunk", {}).get("lean_tissue_lb")
if arms_lean is not None and legs_lean is not None:
alm = arms_lean + legs_lean
derived["alm_lb"] = round(alm, 1)
derived["smi"] = idx(alm)
# Regional lean mass distribution
if lst is not None and arms_lean is not None and legs_lean is not None and trunk_lean is not None:
derived["arms_lean_pct"] = round(100 * arms_lean / lst, 1)
derived["legs_lean_pct"] = round(100 * legs_lean / lst, 1)
derived["trunk_lean_pct"] = round(100 * trunk_lean / lst, 1)
else:
derived["arms_lean_pct"] = None
derived["legs_lean_pct"] = None
derived["trunk_lean_pct"] = None
# Trunk-to-limb fat ratio (health risk indicator)
trunk_fat = d.get("regional", {}).get("Trunk", {}).get("fat_tissue_lb")
arms_fat = d.get("regional", {}).get("Arms", {}).get("fat_tissue_lb")
legs_fat = d.get("regional", {}).get("Legs", {}).get("fat_tissue_lb")
if trunk_fat is not None and arms_fat is not None and legs_fat is not None:
limb_fat = arms_fat + legs_fat
if limb_fat > 0:
derived["trunk_to_limb_fat_ratio"] = round(trunk_fat / limb_fat, 2)
else:
derived["trunk_to_limb_fat_ratio"] = None
else:
derived["trunk_to_limb_fat_ratio"] = None
# Limb symmetry indices (balance indicators)
mb = d.get("muscle_balance", {})
right_arm = mb.get("Right Arm", {}).get("lean_mass_lb")
left_arm = mb.get("Left Arm", {}).get("lean_mass_lb")
right_leg = mb.get("Right Leg", {}).get("lean_mass_lb")
left_leg = mb.get("Left Leg", {}).get("lean_mass_lb")
if right_arm is not None and left_arm is not None and right_arm + left_arm > 0:
# Symmetry: 100 = perfect, <100 = left stronger, >100 = right stronger
derived["arm_symmetry_index"] = round(100 * right_arm / (right_arm + left_arm), 1)
else:
derived["arm_symmetry_index"] = None
if right_leg is not None and left_leg is not None and right_leg + left_leg > 0:
derived["leg_symmetry_index"] = round(100 * right_leg / (right_leg + left_leg), 1)
else:
derived["leg_symmetry_index"] = None
# VAT Index (normalized by height squared, like BMI)
vat_mass = d.get("vat_mass_lb")
if vat_mass is not None:
derived["vat_index"] = idx(vat_mass)
else:
derived["vat_index"] = None
# Bone Mineral Density Index (BMC normalized by height)
if bmc is not None:
derived["bmdi"] = idx(bmc)
else:
derived["bmdi"] = None
# Adjusted Body Weight (used in nutrition/health calculations)
# ABW = IBW + 0.4 * (actual weight - IBW), where IBW differs by sex
# For simplicity, using a unisex approximation: IBW ≈ height_in * 2.3 - 100 (rough estimate)
if total_mass is not None:
ibw_estimate = height_in * 2.3 - 100
if total_mass > ibw_estimate:
derived["adjusted_body_weight_lb"] = round(ibw_estimate + 0.4 * (total_mass - ibw_estimate), 1)
else:
derived["adjusted_body_weight_lb"] = round(total_mass, 1)
else:
derived["adjusted_body_weight_lb"] = None
return total_mass, derived
def ensure_outdir(outdir):
os.makedirs(outdir, exist_ok=True)
def write_or_append_csv(path, row_dict, columns):
df_row = pd.DataFrame([{k: row_dict.get(k) for k in columns}])
if os.path.exists(path):
df_row.to_csv(path, mode="a", header=False, index=False)
else:
df_row.to_csv(path, index=False)
def write_or_append_json(path, obj):
if os.path.exists(path):
with open(path, "r") as f:
try:
data = json.load(f)
except json.JSONDecodeError:
data = []
else:
data = []
if isinstance(data, dict):
# convert to list of entries if previous file was a single dict
data = [data]
data.append(obj)
with open(path, "w") as f:
json.dump(data, f, indent=2)
def append_markdown(path, md_text):
mode = "a" if os.path.exists(path) else "w"
with open(path, mode) as f:
f.write(md_text.strip() + "\n\n")
def make_markdown(measured_date, d, derived, total_mass):
lines = []
lines.append(f"# DEXA Summary — {measured_date}")
lines.append("")
lines.append(f"- Height: {derived['height_in']} in")
lines.append(f"- Weight: {round(total_mass, 1)} lb")
if d.get("body_fat_percent") is not None and d.get("fat_mass_lb") is not None:
lines.append(f"- Body fat: {d['body_fat_percent']}% ({d['fat_mass_lb']} lb)")
if d.get("lean_soft_tissue_lb") is not None:
lines.append(f"- Lean soft tissue: {d['lean_soft_tissue_lb']} lb")
if d.get("bmc_lb") is not None:
lines.append(f"- Bone mineral content: {d['bmc_lb']} lb")
lines.append(f"- Fatfree mass: {derived.get('fat_free_mass_lb')}")
lines.append(f"- BMI: {derived['bmi']}")
lines.append(f"- FFMI: {derived.get('ffmi')}; FMI: {derived.get('fmi')}; Lean Soft Tissue Index: {derived.get('lsti')}")
if derived.get("alm_lb") is not None:
lines.append(f"- Appendicular Lean Mass: {derived['alm_lb']} lb; Skeletal Muscle Index: {derived['smi']}")
if d.get("android_percent") is not None and d.get("gynoid_percent") is not None and d.get("ag_ratio") is not None:
lines.append(f"- Android: {d['android_percent']}%; Gynoid: {d['gynoid_percent']}%; A/G ratio: {d['ag_ratio']}")
if d.get("vat_mass_lb") is not None and d.get("vat_volume_in3") is not None:
lines.append(f"- VAT: {d['vat_mass_lb']} lb ({d['vat_volume_in3']} in³)")
if d.get("rmr_cal_per_day") is not None:
lines.append(f"- RMR: {d['rmr_cal_per_day']} cal/day")
lines.append("")
lines.append("## Regional")
for name, r in d.get("regional", {}).items():
lines.append(f"- {name}: {r['fat_percent']}% fat; {r['total_mass_lb']} lb total; {r['fat_tissue_lb']} lb fat; {r['lean_tissue_lb']} lb lean; {r['bmc_lb']} lb BMC")
return "\n".join(lines)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("pdf", help="Path to DEXA report PDF")
ap.add_argument("--height-in", type=float, required=True, help="Height in inches (Imperial)")
ap.add_argument("--weight-lb", type=float, help="Body weight in lb (optional; used if DEXA total mass missing)")
ap.add_argument("--outdir", default="dexa_out", help="Output directory")
args = ap.parse_args()
ensure_outdir(args.outdir)
d = parse_dexa_pdf(args.pdf)
measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y")
measured_date = convert_date_to_iso(measured_date_raw)
total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb)
# Overall CSV row
overall_cols = [
"MeasuredDate","Height_in","Height_ft_in","Weight_lb_Input","DEXA_TotalMass_lb","BodyFat_percent",
"LeanMass_percent","FatMass_lb","LeanSoftTissue_lb","BoneMineralContent_lb","FatFreeMass_lb",
"BMI","FFMI","FMI","LST_Index","ALM_lb","SMI","VAT_Mass_lb","VAT_Volume_in3","VAT_Index",
"BMDI","Android_percent","Gynoid_percent","AG_Ratio","Trunk_to_Limb_Fat_Ratio",
"Arms_Lean_pct","Legs_Lean_pct","Trunk_Lean_pct","Arm_Symmetry_Index","Leg_Symmetry_Index",
"Adjusted_Body_Weight_lb","RMR_cal_per_day"
]
overall_row = {
"MeasuredDate": measured_date,
"Height_in": derived["height_in"],
"Height_ft_in": derived["height_ft_in"],
"Weight_lb_Input": derived["weight_input_lb"],
"DEXA_TotalMass_lb": round(total_mass, 1),
"BodyFat_percent": d.get("body_fat_percent"),
"LeanMass_percent": derived.get("lean_mass_percent"),
"FatMass_lb": d.get("fat_mass_lb"),
"LeanSoftTissue_lb": d.get("lean_soft_tissue_lb"),
"BoneMineralContent_lb": d.get("bmc_lb"),
"FatFreeMass_lb": derived.get("fat_free_mass_lb"),
"BMI": derived["bmi"],
"FFMI": derived.get("ffmi"),
"FMI": derived.get("fmi"),
"LST_Index": derived.get("lsti"),
"ALM_lb": derived.get("alm_lb"),
"SMI": derived.get("smi"),
"VAT_Mass_lb": d.get("vat_mass_lb"),
"VAT_Volume_in3": d.get("vat_volume_in3"),
"VAT_Index": derived.get("vat_index"),
"BMDI": derived.get("bmdi"),
"Android_percent": d.get("android_percent"),
"Gynoid_percent": d.get("gynoid_percent"),
"AG_Ratio": d.get("ag_ratio"),
"Trunk_to_Limb_Fat_Ratio": derived.get("trunk_to_limb_fat_ratio"),
"Arms_Lean_pct": derived.get("arms_lean_pct"),
"Legs_Lean_pct": derived.get("legs_lean_pct"),
"Trunk_Lean_pct": derived.get("trunk_lean_pct"),
"Arm_Symmetry_Index": derived.get("arm_symmetry_index"),
"Leg_Symmetry_Index": derived.get("leg_symmetry_index"),
"Adjusted_Body_Weight_lb": derived.get("adjusted_body_weight_lb"),
"RMR_cal_per_day": d.get("rmr_cal_per_day"),
}
write_or_append_csv(os.path.join(args.outdir, "overall.csv"), overall_row, overall_cols)
# Regional table
regional_cols = ["Region","FatPercent","TotalMass_lb","FatTissue_lb","LeanTissue_lb","BMC_lb"]
reg_rows = []
for name, r in d.get("regional", {}).items():
reg_rows.append({
"Region": name,
"FatPercent": r["fat_percent"],
"TotalMass_lb": r["total_mass_lb"],
"FatTissue_lb": r["fat_tissue_lb"],
"LeanTissue_lb": r["lean_tissue_lb"],
"BMC_lb": r["bmc_lb"],
})
regional_path = os.path.join(args.outdir, "regional.csv")
if os.path.exists(regional_path):
pd.DataFrame(reg_rows).to_csv(regional_path, mode="a", header=False, index=False)
else:
pd.DataFrame(reg_rows).to_csv(regional_path, index=False)
# Muscle balance
mb_cols = ["Region","FatPercent","TotalMass_lb","FatMass_lb","LeanMass_lb","BMC_lb"]
mb_rows = []
for name, r in d.get("muscle_balance", {}).items():
mb_rows.append({
"Region": name,
"FatPercent": r["fat_percent"],
"TotalMass_lb": r["total_mass_lb"],
"FatMass_lb": r["fat_mass_lb"],
"LeanMass_lb": r["lean_mass_lb"],
"BMC_lb": r["bmc_lb"],
})
mb_path = os.path.join(args.outdir, "muscle_balance.csv")
if os.path.exists(mb_path):
pd.DataFrame(mb_rows).to_csv(mb_path, mode="a", header=False, index=False)
else:
pd.DataFrame(mb_rows).to_csv(mb_path, index=False)
# JSON (overall structured object)
# Convert regional and muscle_balance dicts to arrays
regional_array = [
{"region": name, **data}
for name, data in d.get("regional", {}).items()
]
muscle_balance_array = [
{"region": name, **data}
for name, data in d.get("muscle_balance", {}).items()
]
overall_json = {
"measured_date": measured_date,
"anthropometrics": {
"height_in": derived["height_in"],
"height_ft_in": derived["height_ft_in"],
"weight_input_lb": derived["weight_input_lb"],
"dexa_total_mass_lb": round(total_mass, 1),
"adjusted_body_weight_lb": derived.get("adjusted_body_weight_lb"),
"bmi": derived["bmi"]
},
"composition": {
"body_fat_percent": d.get("body_fat_percent"),
"lean_mass_percent": derived.get("lean_mass_percent"),
"fat_mass_lb": d.get("fat_mass_lb"),
"lean_soft_tissue_lb": d.get("lean_soft_tissue_lb"),
"bone_mineral_content_lb": d.get("bmc_lb"),
"fat_free_mass_lb": derived.get("fat_free_mass_lb"),
"derived_indices": {
"ffmi": derived.get("ffmi"),
"fmi": derived.get("fmi"),
"lsti": derived.get("lsti"),
"alm_lb": derived.get("alm_lb"),
"smi": derived.get("smi"),
"bmdi": derived.get("bmdi")
}
},
"regional": regional_array,
"regional_analysis": {
"trunk_to_limb_fat_ratio": derived.get("trunk_to_limb_fat_ratio"),
"lean_mass_distribution": {
"arms_percent": derived.get("arms_lean_pct"),
"legs_percent": derived.get("legs_lean_pct"),
"trunk_percent": derived.get("trunk_lean_pct")
}
},
"muscle_balance": muscle_balance_array,
"symmetry_indices": {
"arm_symmetry_index": derived.get("arm_symmetry_index"),
"leg_symmetry_index": derived.get("leg_symmetry_index")
},
"supplemental": {
"android_percent": d.get("android_percent"),
"gynoid_percent": d.get("gynoid_percent"),
"ag_ratio": d.get("ag_ratio"),
"vat": {
"mass_lb": d.get("vat_mass_lb"),
"volume_in3": d.get("vat_volume_in3"),
"vat_index": derived.get("vat_index")
},
"rmr_cal_per_day": d.get("rmr_cal_per_day")
},
"bone_density": d.get("bone_density", {})
}
write_or_append_json(os.path.join(args.outdir, "overall.json"), overall_json)
# Markdown summary (append)
md_text = make_markdown(measured_date, d, derived, total_mass)
append_markdown(os.path.join(args.outdir, "summary.md"), md_text)
print(f"Wrote files to: {args.outdir}")
print("Files: overall.csv, regional.csv, muscle_balance.csv, overall.json, summary.md")
if __name__ == "__main__":
main()