217 lines
No EOL
6.6 KiB
Python
Executable file
217 lines
No EOL
6.6 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# call with xml2csv <basename> <path/to/subject>
|
|
#
|
|
# writes header only for new files
|
|
|
|
import sys
|
|
import shutil
|
|
from xml.etree.ElementTree import parse as xmlparse
|
|
import csv
|
|
from pathlib import Path
|
|
from tempfile import (
|
|
TemporaryDirectory,
|
|
TemporaryFile,
|
|
)
|
|
import re
|
|
|
|
|
|
# define CSV columns (changing here will re-order)
|
|
csv_fieldnames = [
|
|
'SubjectID', 'filepath',
|
|
'res_ECR', 'NCR', 'ICR', 'IQR', 'GM', 'WM', 'CSF',
|
|
'TIV', 'TSA', 'EulerNr', 'EulerR', 'SIQR'
|
|
]
|
|
|
|
# NCR: noise to contrast ratio
|
|
# ICR: inhomogeneity to contrast ratio
|
|
# IQR: image quality rating
|
|
# TIV: total intracranial volume (GM+WM+CSF)
|
|
# GM: total gray matter volume
|
|
# WM: total white matter volume
|
|
# CSF: total cerebral spinal fluid volume
|
|
# WMH: total white matter hyperintensities volume
|
|
# TSA: total surface area
|
|
|
|
def val2out(str_float):
|
|
"""Uniform formating of floating point values for output.
|
|
|
|
The input does not have to be a float, but can also be a str that
|
|
is convertable to float.
|
|
"""
|
|
return '{:.4f}'.format(float(str_float))
|
|
|
|
def get_basic_catlog(filepath, sub, outfilebase):
|
|
# load report XML
|
|
# catrep_file = report_dir / 'cat_{}_ses-baselineYear1Arm1_run-05_T1w.xml'.format(sub)
|
|
with TemporaryFile() as tf:
|
|
tf.write(re.sub(b'item\.\.\.', b'item>...', filepath.read_bytes()))
|
|
tf.seek(0)
|
|
catrep = xmlparse(tf)
|
|
catreport = catrep.getroot()
|
|
# build CSV record
|
|
catlog = {
|
|
'SubjectID': sub,
|
|
'filepath': filepath,
|
|
'res_ECR': val2out(catreport.find('qualityratings/res_ECR').text),
|
|
'NCR': val2out(catreport.find('qualityratings/NCR').text),
|
|
'ICR': val2out(catreport.find('qualityratings/ICR').text),
|
|
'IQR': val2out(catreport.find('qualityratings/IQR').text),
|
|
'EulerNr': val2out(catreport.find('qualitymeasures//SurfaceEulerNumber').text),
|
|
'EulerR': val2out(catreport.find('qualityratings//SurfaceEulerNumber').text),
|
|
'SIQR': val2out(catreport.find('qualityratings/SIQR').text),
|
|
'TIV': val2out(catreport.find('subjectmeasures/vol_TIV').text),
|
|
'TSA': val2out(catreport.find('subjectmeasures/surf_TSA').text),
|
|
}
|
|
# get total and tissue volumes
|
|
absTV = catreport.find('subjectmeasures/vol_abs_CGW').text.strip('[]')
|
|
for t, tv in zip(('CSF', 'GM', 'WM'), absTV.split()):
|
|
if float(tv) > 0:
|
|
catlog[t] = val2out(tv)
|
|
# write QC only file
|
|
destfile = Path('{}_QC.csv'.format(
|
|
outfilebase,
|
|
))
|
|
need_header = not destfile.is_file()
|
|
# use context manager to get automatic cleanup
|
|
with destfile.open('a') as catlog_data:
|
|
# build CSV record
|
|
writer = csv.DictWriter(
|
|
catlog_data,
|
|
fieldnames=csv_fieldnames
|
|
)
|
|
# if there was no CSV, write the header
|
|
if need_header:
|
|
writer.writeheader()
|
|
# write CSV row
|
|
writer.writerow(catlog)
|
|
return catlog
|
|
|
|
|
|
def xml2csv(infile, outfilebase, catlog_templ, data_tag,
|
|
additional_extractor=None):
|
|
# load XML
|
|
root_node = xmlparse(infile).getroot()
|
|
# iterate over surface atlas found in XML
|
|
for child in root_node:
|
|
destfile = Path('{}_rois_{}.csv'.format(
|
|
outfilebase,
|
|
child.tag,
|
|
))
|
|
# get ROI names
|
|
rois = [
|
|
name.text
|
|
for name in root_node.findall(child.tag + '/names/item')
|
|
]
|
|
# this list will define the output columns
|
|
roi_names = list(rois)
|
|
need_header = not destfile.is_file()
|
|
# use context manager to get automatic cleanup
|
|
with destfile.open('a') as catlog_data:
|
|
# build CSV record
|
|
catlog = catlog_templ.copy()
|
|
# get ROI thickness matching succession
|
|
ROIvol = root_node.find(
|
|
child.tag + '/data/' + data_tag).text.strip('[]')
|
|
for id, vol in zip(roi_names, ROIvol.split(';')):
|
|
catlog[id] = val2out(vol)
|
|
if additional_extractor:
|
|
additional_extractor(
|
|
root_node, child.tag, rois, catlog, roi_names)
|
|
writer = csv.DictWriter(
|
|
catlog_data,
|
|
fieldnames=csv_fieldnames + roi_names
|
|
)
|
|
# if there was no CSV, write the header
|
|
if need_header:
|
|
writer.writeheader()
|
|
# write CSV row
|
|
writer.writerow(catlog)
|
|
|
|
|
|
def add_WM_CSF(root_node, tag, rois, catlog, roi_names):
|
|
# if atlas has WM volume, add at the end
|
|
if root_node.findtext(tag + '/data/Vwm'):
|
|
roi_namesWM = [name + '_WM' for name in rois]
|
|
ROIwm = root_node.find(tag + '/data/Vwm').text.strip('[]')
|
|
for id, vol in zip(roi_namesWM, ROIwm.split(';')):
|
|
catlog[id] = val2out(vol)
|
|
roi_names.extend(roi_namesWM)
|
|
# if atlas has CSF volume, add at the end
|
|
if root_node.findtext(tag + '/data/Vcsf'):
|
|
roi_namesCSF = [name + '_CSF' for name in rois]
|
|
ROIcsf = root_node.find(tag + '/data/Vcsf').text.strip('[]')
|
|
for id, vol in zip(roi_namesCSF, ROIcsf.split(';')):
|
|
catlog[id] = val2out(vol)
|
|
roi_names.extend(roi_namesCSF)
|
|
|
|
|
|
# output base name
|
|
base_name = sys.argv[1]
|
|
|
|
# path to the report
|
|
path2data= Path(sys.argv[2])
|
|
|
|
# extract subject identifier from path
|
|
sub = path2data.parts[0]
|
|
|
|
for path in path2data.glob(r'**/report/**/*'):
|
|
if path.suffix in {'.xml'}:
|
|
# load report XML
|
|
catlog = get_basic_catlog(path, sub, base_name)
|
|
|
|
# write QC table
|
|
for path in path2data.glob(r'**/label/**/catROI_*'):
|
|
if path.suffix in {'.xml'}:
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'Vgm',
|
|
add_WM_CSF,
|
|
)
|
|
|
|
for path in path2data.glob(r'**/label/**/catROIs_*'):
|
|
if path.suffix in {'.xml'}:
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'thickness',
|
|
)
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'gyrification',
|
|
)
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'toroGI20mm',
|
|
)
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'area',
|
|
)
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'gmv',
|
|
)
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'depth',
|
|
)
|
|
xml2csv(
|
|
path,
|
|
'{}'.format(base_name),
|
|
catlog,
|
|
'fractaldimension',
|
|
)
|
|
|