Refactor: eliminate duplicated code in PDF processing
- Remove ~200 lines of duplicate code between single-file and batch processing - Consolidate all PDF processing logic into process_single_pdf() function - Add batch_mode parameter to control output formatting - Single-file and batch modes now use the same code path - Improves maintainability and reduces chance of inconsistencies Net reduction: 202 lines deleted, 56 lines added (-146 lines total)
This commit is contained in:
parent
37267fbf34
commit
2c17d86fe7
1 changed files with 56 additions and 202 deletions
258
dexa_extract.py
258
dexa_extract.py
|
|
@ -320,22 +320,39 @@ def append_markdown(path, md_text):
|
||||||
with open(path, mode) as f:
|
with open(path, mode) as f:
|
||||||
f.write(md_text.strip() + "\n\n")
|
f.write(md_text.strip() + "\n\n")
|
||||||
|
|
||||||
def process_single_pdf(pdf_path, height_in, weight_lb, outdir):
|
def process_single_pdf(pdf_path, height_in, weight_lb, outdir, batch_mode=False):
|
||||||
"""Process a single PDF file and return success status"""
|
"""Process a single PDF file and return success status
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pdf_path: Path to PDF file
|
||||||
|
height_in: Height in inches
|
||||||
|
weight_lb: Weight in pounds (optional)
|
||||||
|
outdir: Output directory
|
||||||
|
batch_mode: If True, use batch-style output messages
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if successful, False otherwise
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# Validate PDF file
|
# Validate PDF file
|
||||||
pdf_file = Path(pdf_path)
|
pdf_file = Path(pdf_path)
|
||||||
if not pdf_file.exists():
|
if not pdf_file.exists():
|
||||||
print(f" ❌ Skipping {pdf_path}: File not found", file=sys.stderr)
|
msg = f" ❌ Skipping {pdf_path}: File not found" if batch_mode else f"❌ Error: PDF file not found: {pdf_path}"
|
||||||
|
print(msg, file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
if not pdf_file.is_file():
|
if not pdf_file.is_file():
|
||||||
print(f" ❌ Skipping {pdf_path}: Not a file", file=sys.stderr)
|
msg = f" ❌ Skipping {pdf_path}: Not a file" if batch_mode else f"❌ Error: Path is not a file: {pdf_path}"
|
||||||
|
print(msg, file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
if pdf_file.suffix.lower() != '.pdf':
|
if pdf_file.suffix.lower() != '.pdf':
|
||||||
print(f" ❌ Skipping {pdf_path}: Not a PDF", file=sys.stderr)
|
msg = f" ❌ Skipping {pdf_path}: Not a PDF" if batch_mode else f"❌ Error: File is not a PDF: {pdf_path}"
|
||||||
|
print(msg, file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
print(f"\n📄 Processing: {pdf_file.name}")
|
if batch_mode:
|
||||||
|
print(f"\n📄 Processing: {pdf_file.name}")
|
||||||
|
else:
|
||||||
|
print("📊 Computing derived metrics...")
|
||||||
|
|
||||||
# Parse PDF
|
# Parse PDF
|
||||||
d = parse_dexa_pdf(pdf_path)
|
d = parse_dexa_pdf(pdf_path)
|
||||||
|
|
@ -515,11 +532,19 @@ def process_single_pdf(pdf_path, height_in, weight_lb, outdir):
|
||||||
md_text = make_markdown(measured_date, d, derived, total_mass)
|
md_text = make_markdown(measured_date, d, derived, total_mass)
|
||||||
append_markdown(os.path.join(outdir, "summary.md"), md_text)
|
append_markdown(os.path.join(outdir, "summary.md"), md_text)
|
||||||
|
|
||||||
print(f" ✅ {pdf_file.name}: Body fat {d.get('body_fat_percent')}%, FFMI {derived.get('ffmi')}")
|
if batch_mode:
|
||||||
|
print(f" ✅ {pdf_file.name}: Body fat {d.get('body_fat_percent')}%, FFMI {derived.get('ffmi')}")
|
||||||
|
else:
|
||||||
|
# Single-file mode prints detailed success info outside this function
|
||||||
|
pass
|
||||||
return True
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" ❌ Error processing {pdf_path}: {e}", file=sys.stderr)
|
if batch_mode:
|
||||||
|
print(f" ❌ Error processing {pdf_path}: {e}", file=sys.stderr)
|
||||||
|
else:
|
||||||
|
print(f"❌ Error reading PDF: {e}", file=sys.stderr)
|
||||||
|
print("This tool is specifically designed for BodySpec PDF reports.", file=sys.stderr)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def make_markdown(measured_date, d, derived, total_mass):
|
def make_markdown(measured_date, d, derived, total_mass):
|
||||||
|
|
@ -638,7 +663,7 @@ def main():
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # If we can't extract date, try to process anyway
|
pass # If we can't extract date, try to process anyway
|
||||||
|
|
||||||
if process_single_pdf(str(pdf_file), args.height_in, args.weight_lb, args.outdir):
|
if process_single_pdf(str(pdf_file), args.height_in, args.weight_lb, args.outdir, batch_mode=True):
|
||||||
success_count += 1
|
success_count += 1
|
||||||
else:
|
else:
|
||||||
fail_count += 1
|
fail_count += 1
|
||||||
|
|
@ -657,202 +682,31 @@ def main():
|
||||||
return
|
return
|
||||||
|
|
||||||
# Single file mode
|
# Single file mode
|
||||||
pdf_file = Path(args.pdf)
|
|
||||||
if not pdf_file.exists():
|
|
||||||
print(f"❌ Error: PDF file not found: {args.pdf}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
if not pdf_file.is_file():
|
|
||||||
print(f"❌ Error: Path is not a file: {args.pdf}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
if pdf_file.suffix.lower() != '.pdf':
|
|
||||||
print(f"❌ Error: File is not a PDF: {args.pdf}", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print(f"📄 Reading PDF: {args.pdf}")
|
print(f"📄 Reading PDF: {args.pdf}")
|
||||||
|
|
||||||
try:
|
# Use the shared processing function
|
||||||
d = parse_dexa_pdf(args.pdf)
|
success = process_single_pdf(args.pdf, args.height_in, args.weight_lb, args.outdir, batch_mode=False)
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Error reading PDF: {e}", file=sys.stderr)
|
if not success:
|
||||||
print("This tool is specifically designed for BodySpec PDF reports.", file=sys.stderr)
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# Check if critical data was extracted
|
# Parse the result to show summary info
|
||||||
if d.get("body_fat_percent") is None or d.get("total_mass_lb") is None:
|
try:
|
||||||
print("⚠️ Warning: Missing critical data from PDF. This may not be a BodySpec report.", file=sys.stderr)
|
# Read the latest entry from overall.json to get the summary data
|
||||||
if d.get("body_fat_percent") is None:
|
json_path = os.path.join(args.outdir, "overall.json")
|
||||||
print(" - Body Fat % not found", file=sys.stderr)
|
if os.path.exists(json_path):
|
||||||
if d.get("total_mass_lb") is None:
|
with open(json_path, 'r') as f:
|
||||||
print(" - Total Mass not found", file=sys.stderr)
|
data = json.load(f)
|
||||||
|
latest = data[-1] if isinstance(data, list) and data else data
|
||||||
print("📊 Computing derived metrics...")
|
measured_date = latest.get("measured_date", "Unknown")
|
||||||
measured_date_raw = d.get("measured_date") or datetime.now().strftime("%m/%d/%Y")
|
body_fat = latest.get("composition", {}).get("body_fat_percent", "N/A")
|
||||||
measured_date = convert_date_to_iso(measured_date_raw)
|
ffmi = latest.get("composition", {}).get("derived_indices", {}).get("ffmi", "N/A")
|
||||||
total_mass, derived = compute_derived(d, height_in=args.height_in, weight_lb=args.weight_lb)
|
else:
|
||||||
|
measured_date = body_fat = ffmi = "N/A"
|
||||||
# Overall CSV row
|
except Exception:
|
||||||
overall_cols = [
|
measured_date = body_fat = ffmi = "N/A"
|
||||||
"MeasuredDate","Height_in","Height_ft_in","Weight_lb_Input","DEXA_TotalMass_lb","BodyFat_percent",
|
|
||||||
"LeanMass_percent","FatMass_lb","LeanSoftTissue_lb","BoneMineralContent_lb","FatFreeMass_lb",
|
|
||||||
"BMI","FFMI","FMI","LST_Index","ALM_lb","SMI","VAT_Mass_lb","VAT_Volume_in3","VAT_Index",
|
|
||||||
"BMDI","Android_percent","Gynoid_percent","AG_Ratio","Trunk_to_Limb_Fat_Ratio",
|
|
||||||
"Arms_Lean_pct","Legs_Lean_pct","Trunk_Lean_pct","Arm_Symmetry_Index","Leg_Symmetry_Index",
|
|
||||||
"Adjusted_Body_Weight_lb","RMR_cal_per_day"
|
|
||||||
]
|
|
||||||
overall_row = {
|
|
||||||
"MeasuredDate": measured_date,
|
|
||||||
"Height_in": derived["height_in"],
|
|
||||||
"Height_ft_in": derived["height_ft_in"],
|
|
||||||
"Weight_lb_Input": derived["weight_input_lb"],
|
|
||||||
"DEXA_TotalMass_lb": round(total_mass, 1),
|
|
||||||
"BodyFat_percent": d.get("body_fat_percent"),
|
|
||||||
"LeanMass_percent": derived.get("lean_mass_percent"),
|
|
||||||
"FatMass_lb": d.get("fat_mass_lb"),
|
|
||||||
"LeanSoftTissue_lb": d.get("lean_soft_tissue_lb"),
|
|
||||||
"BoneMineralContent_lb": d.get("bmc_lb"),
|
|
||||||
"FatFreeMass_lb": derived.get("fat_free_mass_lb"),
|
|
||||||
"BMI": derived["bmi"],
|
|
||||||
"FFMI": derived.get("ffmi"),
|
|
||||||
"FMI": derived.get("fmi"),
|
|
||||||
"LST_Index": derived.get("lsti"),
|
|
||||||
"ALM_lb": derived.get("alm_lb"),
|
|
||||||
"SMI": derived.get("smi"),
|
|
||||||
"VAT_Mass_lb": d.get("vat_mass_lb"),
|
|
||||||
"VAT_Volume_in3": d.get("vat_volume_in3"),
|
|
||||||
"VAT_Index": derived.get("vat_index"),
|
|
||||||
"BMDI": derived.get("bmdi"),
|
|
||||||
"Android_percent": d.get("android_percent"),
|
|
||||||
"Gynoid_percent": d.get("gynoid_percent"),
|
|
||||||
"AG_Ratio": d.get("ag_ratio"),
|
|
||||||
"Trunk_to_Limb_Fat_Ratio": derived.get("trunk_to_limb_fat_ratio"),
|
|
||||||
"Arms_Lean_pct": derived.get("arms_lean_pct"),
|
|
||||||
"Legs_Lean_pct": derived.get("legs_lean_pct"),
|
|
||||||
"Trunk_Lean_pct": derived.get("trunk_lean_pct"),
|
|
||||||
"Arm_Symmetry_Index": derived.get("arm_symmetry_index"),
|
|
||||||
"Leg_Symmetry_Index": derived.get("leg_symmetry_index"),
|
|
||||||
"Adjusted_Body_Weight_lb": derived.get("adjusted_body_weight_lb"),
|
|
||||||
"RMR_cal_per_day": d.get("rmr_cal_per_day"),
|
|
||||||
}
|
|
||||||
write_or_append_csv(os.path.join(args.outdir, "overall.csv"), overall_row, overall_cols)
|
|
||||||
|
|
||||||
# Regional table
|
|
||||||
regional_cols = ["Region","FatPercent","LeanPercent","TotalMass_lb","FatTissue_lb","LeanTissue_lb","BMC_lb"]
|
|
||||||
reg_rows = []
|
|
||||||
for name, r in d.get("regional", {}).items():
|
|
||||||
# Calculate lean percentage (lean tissue only, not including BMC - matches BodySpec report)
|
|
||||||
lean_pct = round(100 * r["lean_tissue_lb"] / r["total_mass_lb"], 1) if r["total_mass_lb"] > 0 else None
|
|
||||||
reg_rows.append({
|
|
||||||
"Region": name,
|
|
||||||
"FatPercent": r["fat_percent"],
|
|
||||||
"LeanPercent": lean_pct,
|
|
||||||
"TotalMass_lb": r["total_mass_lb"],
|
|
||||||
"FatTissue_lb": r["fat_tissue_lb"],
|
|
||||||
"LeanTissue_lb": r["lean_tissue_lb"],
|
|
||||||
"BMC_lb": r["bmc_lb"],
|
|
||||||
})
|
|
||||||
regional_path = os.path.join(args.outdir, "regional.csv")
|
|
||||||
df_regional = pd.DataFrame(reg_rows, columns=regional_cols)
|
|
||||||
if os.path.exists(regional_path):
|
|
||||||
df_regional.to_csv(regional_path, mode="a", header=False, index=False)
|
|
||||||
else:
|
|
||||||
df_regional.to_csv(regional_path, index=False)
|
|
||||||
|
|
||||||
# Muscle balance
|
|
||||||
mb_cols = ["Region","FatPercent","TotalMass_lb","FatMass_lb","LeanMass_lb","BMC_lb"]
|
|
||||||
mb_rows = []
|
|
||||||
for name, r in d.get("muscle_balance", {}).items():
|
|
||||||
mb_rows.append({
|
|
||||||
"Region": name,
|
|
||||||
"FatPercent": r["fat_percent"],
|
|
||||||
"TotalMass_lb": r["total_mass_lb"],
|
|
||||||
"FatMass_lb": r["fat_mass_lb"],
|
|
||||||
"LeanMass_lb": r["lean_mass_lb"],
|
|
||||||
"BMC_lb": r["bmc_lb"],
|
|
||||||
})
|
|
||||||
mb_path = os.path.join(args.outdir, "muscle_balance.csv")
|
|
||||||
if os.path.exists(mb_path):
|
|
||||||
pd.DataFrame(mb_rows).to_csv(mb_path, mode="a", header=False, index=False)
|
|
||||||
else:
|
|
||||||
pd.DataFrame(mb_rows).to_csv(mb_path, index=False)
|
|
||||||
|
|
||||||
# JSON (overall structured object)
|
|
||||||
# Convert regional and muscle_balance dicts to arrays
|
|
||||||
regional_array = []
|
|
||||||
for name, data in d.get("regional", {}).items():
|
|
||||||
lean_pct = round(100 * data["lean_tissue_lb"] / data["total_mass_lb"], 1) if data["total_mass_lb"] > 0 else None
|
|
||||||
regional_array.append({
|
|
||||||
"region": name,
|
|
||||||
"fat_percent": data["fat_percent"],
|
|
||||||
"lean_percent": lean_pct,
|
|
||||||
"total_mass_lb": data["total_mass_lb"],
|
|
||||||
"fat_tissue_lb": data["fat_tissue_lb"],
|
|
||||||
"lean_tissue_lb": data["lean_tissue_lb"],
|
|
||||||
"bmc_lb": data["bmc_lb"]
|
|
||||||
})
|
|
||||||
muscle_balance_array = [
|
|
||||||
{"region": name, **data}
|
|
||||||
for name, data in d.get("muscle_balance", {}).items()
|
|
||||||
]
|
|
||||||
|
|
||||||
overall_json = {
|
|
||||||
"measured_date": measured_date,
|
|
||||||
"anthropometrics": {
|
|
||||||
"height_in": derived["height_in"],
|
|
||||||
"height_ft_in": derived["height_ft_in"],
|
|
||||||
"weight_input_lb": derived["weight_input_lb"],
|
|
||||||
"dexa_total_mass_lb": round(total_mass, 1),
|
|
||||||
"adjusted_body_weight_lb": derived.get("adjusted_body_weight_lb"),
|
|
||||||
"bmi": derived["bmi"]
|
|
||||||
},
|
|
||||||
"composition": {
|
|
||||||
"body_fat_percent": d.get("body_fat_percent"),
|
|
||||||
"lean_mass_percent": derived.get("lean_mass_percent"),
|
|
||||||
"fat_mass_lb": d.get("fat_mass_lb"),
|
|
||||||
"lean_soft_tissue_lb": d.get("lean_soft_tissue_lb"),
|
|
||||||
"bone_mineral_content_lb": d.get("bmc_lb"),
|
|
||||||
"fat_free_mass_lb": derived.get("fat_free_mass_lb"),
|
|
||||||
"derived_indices": {
|
|
||||||
"ffmi": derived.get("ffmi"),
|
|
||||||
"fmi": derived.get("fmi"),
|
|
||||||
"lsti": derived.get("lsti"),
|
|
||||||
"alm_lb": derived.get("alm_lb"),
|
|
||||||
"smi": derived.get("smi"),
|
|
||||||
"bmdi": derived.get("bmdi")
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"regional": regional_array,
|
|
||||||
"regional_analysis": {
|
|
||||||
"trunk_to_limb_fat_ratio": derived.get("trunk_to_limb_fat_ratio"),
|
|
||||||
"lean_mass_distribution": {
|
|
||||||
"arms_percent": derived.get("arms_lean_pct"),
|
|
||||||
"legs_percent": derived.get("legs_lean_pct"),
|
|
||||||
"trunk_percent": derived.get("trunk_lean_pct")
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"muscle_balance": muscle_balance_array,
|
|
||||||
"symmetry_indices": {
|
|
||||||
"arm_symmetry_index": derived.get("arm_symmetry_index"),
|
|
||||||
"leg_symmetry_index": derived.get("leg_symmetry_index")
|
|
||||||
},
|
|
||||||
"supplemental": {
|
|
||||||
"android_percent": d.get("android_percent"),
|
|
||||||
"gynoid_percent": d.get("gynoid_percent"),
|
|
||||||
"ag_ratio": d.get("ag_ratio"),
|
|
||||||
"vat": {
|
|
||||||
"mass_lb": d.get("vat_mass_lb"),
|
|
||||||
"volume_in3": d.get("vat_volume_in3"),
|
|
||||||
"vat_index": derived.get("vat_index")
|
|
||||||
},
|
|
||||||
"rmr_cal_per_day": d.get("rmr_cal_per_day")
|
|
||||||
},
|
|
||||||
"bone_density": d.get("bone_density", {})
|
|
||||||
}
|
|
||||||
write_or_append_json(os.path.join(args.outdir, "overall.json"), overall_json)
|
|
||||||
|
|
||||||
# Markdown summary (append)
|
|
||||||
md_text = make_markdown(measured_date, d, derived, total_mass)
|
|
||||||
append_markdown(os.path.join(args.outdir, "summary.md"), md_text)
|
|
||||||
|
|
||||||
|
# Success output
|
||||||
print(f"\n✅ Success! Wrote files to: {args.outdir}")
|
print(f"\n✅ Success! Wrote files to: {args.outdir}")
|
||||||
print(" 📁 Files created:")
|
print(" 📁 Files created:")
|
||||||
print(" - overall.csv (time-series data)")
|
print(" - overall.csv (time-series data)")
|
||||||
|
|
@ -861,8 +715,8 @@ def main():
|
||||||
print(" - overall.json (structured data)")
|
print(" - overall.json (structured data)")
|
||||||
print(" - summary.md (readable report)")
|
print(" - summary.md (readable report)")
|
||||||
print(f"\n 📈 Scan date: {measured_date}")
|
print(f"\n 📈 Scan date: {measured_date}")
|
||||||
print(f" 💪 Body fat: {d.get('body_fat_percent')}%")
|
print(f" 💪 Body fat: {body_fat}%")
|
||||||
print(f" 🏋️ FFMI: {derived.get('ffmi')}")
|
print(f" 🏋️ FFMI: {ffmi}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue