1
0
Fork 0
adhd200-cat12.8.1/code/xml2csv.py

217 lines
No EOL
6.6 KiB
Python
Executable file

#!/usr/bin/env python3
# call with xml2csv <basename> <path/to/subject>
#
# writes header only for new files
import sys
import shutil
from xml.etree.ElementTree import parse as xmlparse
import csv
from pathlib import Path
from tempfile import (
TemporaryDirectory,
TemporaryFile,
)
import re
# define CSV columns (changing here will re-order)
csv_fieldnames = [
'SubjectID', 'filepath',
'res_ECR', 'NCR', 'ICR', 'IQR', 'GM', 'WM', 'CSF',
'TIV', 'TSA', 'EulerNr', 'EulerR', 'SIQR'
]
# NCR: noise to contrast ratio
# ICR: inhomogeneity to contrast ratio
# IQR: image quality rating
# TIV: total intracranial volume (GM+WM+CSF)
# GM: total gray matter volume
# WM: total white matter volume
# CSF: total cerebral spinal fluid volume
# WMH: total white matter hyperintensities volume
# TSA: total surface area
def val2out(str_float):
"""Uniform formating of floating point values for output.
The input does not have to be a float, but can also be a str that
is convertable to float.
"""
return '{:.4f}'.format(float(str_float))
def get_basic_catlog(filepath, sub, outfilebase):
# load report XML
# catrep_file = report_dir / 'cat_{}_ses-baselineYear1Arm1_run-05_T1w.xml'.format(sub)
with TemporaryFile() as tf:
tf.write(re.sub(b'item\.\.\.', b'item>...', filepath.read_bytes()))
tf.seek(0)
catrep = xmlparse(tf)
catreport = catrep.getroot()
# build CSV record
catlog = {
'SubjectID': sub,
'filepath': filepath,
'res_ECR': val2out(catreport.find('qualityratings/res_ECR').text),
'NCR': val2out(catreport.find('qualityratings/NCR').text),
'ICR': val2out(catreport.find('qualityratings/ICR').text),
'IQR': val2out(catreport.find('qualityratings/IQR').text),
'EulerNr': val2out(catreport.find('qualitymeasures//SurfaceEulerNumber').text),
'EulerR': val2out(catreport.find('qualityratings//SurfaceEulerNumber').text),
'SIQR': val2out(catreport.find('qualityratings/SIQR').text),
'TIV': val2out(catreport.find('subjectmeasures/vol_TIV').text),
'TSA': val2out(catreport.find('subjectmeasures/surf_TSA').text),
}
# get total and tissue volumes
absTV = catreport.find('subjectmeasures/vol_abs_CGW').text.strip('[]')
for t, tv in zip(('CSF', 'GM', 'WM'), absTV.split()):
if float(tv) > 0:
catlog[t] = val2out(tv)
# write QC only file
destfile = Path('{}_QC.csv'.format(
outfilebase,
))
need_header = not destfile.is_file()
# use context manager to get automatic cleanup
with destfile.open('a') as catlog_data:
# build CSV record
writer = csv.DictWriter(
catlog_data,
fieldnames=csv_fieldnames
)
# if there was no CSV, write the header
if need_header:
writer.writeheader()
# write CSV row
writer.writerow(catlog)
return catlog
def xml2csv(infile, outfilebase, catlog_templ, data_tag,
additional_extractor=None):
# load XML
root_node = xmlparse(infile).getroot()
# iterate over surface atlas found in XML
for child in root_node:
destfile = Path('{}_rois_{}.csv'.format(
outfilebase,
child.tag,
))
# get ROI names
rois = [
name.text
for name in root_node.findall(child.tag + '/names/item')
]
# this list will define the output columns
roi_names = list(rois)
need_header = not destfile.is_file()
# use context manager to get automatic cleanup
with destfile.open('a') as catlog_data:
# build CSV record
catlog = catlog_templ.copy()
# get ROI thickness matching succession
ROIvol = root_node.find(
child.tag + '/data/' + data_tag).text.strip('[]')
for id, vol in zip(roi_names, ROIvol.split(';')):
catlog[id] = val2out(vol)
if additional_extractor:
additional_extractor(
root_node, child.tag, rois, catlog, roi_names)
writer = csv.DictWriter(
catlog_data,
fieldnames=csv_fieldnames + roi_names
)
# if there was no CSV, write the header
if need_header:
writer.writeheader()
# write CSV row
writer.writerow(catlog)
def add_WM_CSF(root_node, tag, rois, catlog, roi_names):
# if atlas has WM volume, add at the end
if root_node.findtext(tag + '/data/Vwm'):
roi_namesWM = [name + '_WM' for name in rois]
ROIwm = root_node.find(tag + '/data/Vwm').text.strip('[]')
for id, vol in zip(roi_namesWM, ROIwm.split(';')):
catlog[id] = val2out(vol)
roi_names.extend(roi_namesWM)
# if atlas has CSF volume, add at the end
if root_node.findtext(tag + '/data/Vcsf'):
roi_namesCSF = [name + '_CSF' for name in rois]
ROIcsf = root_node.find(tag + '/data/Vcsf').text.strip('[]')
for id, vol in zip(roi_namesCSF, ROIcsf.split(';')):
catlog[id] = val2out(vol)
roi_names.extend(roi_namesCSF)
# output base name
base_name = sys.argv[1]
# path to the report
path2data= Path(sys.argv[2])
# extract subject identifier from path
sub = path2data.parts[0]
for path in path2data.glob(r'**/report/**/*'):
if path.suffix in {'.xml'}:
# load report XML
catlog = get_basic_catlog(path, sub, base_name)
# write QC table
for path in path2data.glob(r'**/label/**/catROI_*'):
if path.suffix in {'.xml'}:
xml2csv(
path,
'{}'.format(base_name),
catlog,
'Vgm',
add_WM_CSF,
)
for path in path2data.glob(r'**/label/**/catROIs_*'):
if path.suffix in {'.xml'}:
xml2csv(
path,
'{}'.format(base_name),
catlog,
'thickness',
)
xml2csv(
path,
'{}'.format(base_name),
catlog,
'gyrification',
)
xml2csv(
path,
'{}'.format(base_name),
catlog,
'toroGI20mm',
)
xml2csv(
path,
'{}'.format(base_name),
catlog,
'area',
)
xml2csv(
path,
'{}'.format(base_name),
catlog,
'gmv',
)
xml2csv(
path,
'{}'.format(base_name),
catlog,
'depth',
)
xml2csv(
path,
'{}'.format(base_name),
catlog,
'fractaldimension',
)