1
0
Fork 0
id1000-cat12.8.1/code/xml2csv_run-1.py

202 lines
5.6 KiB
Python
Executable file

#!/usr/bin/env python3
# call with xml2csv <basename> <path/to/subject>
#
# writes header only for new files
import sys
import shutil
from xml.etree.ElementTree import parse as xmlparse
import csv
from pathlib import Path
from tempfile import (
TemporaryDirectory,
TemporaryFile,
)
import re
# define CSV columns (changing here will re-order)
csv_fieldnames = [
'SubjectID',
'NCR', 'ICR', 'IQR', 'TIV', 'GM', 'WM', 'CSF', 'WMH', 'TSA'
]
# NCR: noise to contrast ratio
# ICR: inhomogeneity to contrast ratio
# IQR: image quality rating
# TIV: total intracranial volume (GM+WM+CSF)
# GM: total gray matter volume
# WM: total white matter volume
# CSF: total cerebral spinal fluid volume
# WMH: total white matter hyperintensities volume
# TSA: total surface area
def val2out(str_float):
"""Uniform formating of floating point values for output.
The input does not have to be a float, but can also be a str that
is convertable to float.
"""
return '{:.4f}'.format(float(str_float))
def get_basic_catlog(report_dir, sub):
# load report XML
catrep_file = report_dir / 'cat_{}_run-1_T1w.xml'.format(sub)
with TemporaryFile() as tf:
tf.write(re.sub(b'item\.\.\.', b'item>...', catrep_file.read_bytes()))
tf.seek(0)
catrep = xmlparse(tf)
catreport = catrep.getroot()
# build CSV record
catlog = {
'SubjectID': sub,
'IQR': val2out(catreport.find('qualityratings/IQR').text),
'NCR': val2out(catreport.find('qualityratings/NCR').text),
'ICR': val2out(catreport.find('qualityratings/ICR').text),
'TIV': val2out(catreport.find('subjectmeasures/vol_TIV').text),
'TSA': val2out(catreport.find('subjectmeasures/surf_TSA').text),
}
# get total and tissue volumes
absTV = catreport.find('subjectmeasures/vol_abs_CGW').text.strip('[]')
for t, tv in zip(('CSF', 'GM', 'WM', 'WMH'), absTV.split()):
if float(tv) > 0:
catlog[t] = val2out(tv)
return catlog
def xml2csv(infile, outfilebase, catlog_templ, data_tag,
additional_extractor=None):
# load surface XML
root_node = xmlparse(infile).getroot()
# iterate over surface atlas found in XML
for child in root_node:
destfile = Path('{}_{}.csv'.format(
outfilebase,
child.tag,
))
# get ROI names
rois = [
name.text
for name in root_node.findall(child.tag + '/names/item')
]
# this list will define the output columns
roi_names = list(rois)
need_header = not destfile.is_file()
# use context manager to get automatic cleanup
with destfile.open('a') as catlog_data:
# build CSV record
catlog = catlog_templ.copy()
# get ROI thickness matching succession
ROIvol = root_node.find(
child.tag + '/data/' + data_tag).text.strip('[]')
for id, vol in zip(roi_names, ROIvol.split(';')):
catlog[id] = val2out(vol)
if additional_extractor:
additional_extractor(
root_node, child.tag, rois, catlog, roi_names)
writer = csv.DictWriter(
catlog_data,
fieldnames=csv_fieldnames + roi_names
)
# if there was no CSV, write the header
if need_header:
writer.writeheader()
# write CSV row
writer.writerow(catlog)
def add_WM_CSF(root_node, tag, rois, catlog, roi_names):
# if atlas has WM volume, add at the end
if root_node.findtext(tag + '/data/Vwm'):
roi_namesWM = [name + '_WM' for name in rois]
ROIwm = root_node.find(tag + '/data/Vwm').text.strip('[]')
for id, vol in zip(roi_namesWM, ROIwm.split(';')):
catlog[id] = val2out(vol)
roi_names.extend(roi_namesWM)
# if atlas has CSF volume, add at the end
if root_node.findtext(tag + '/data/Vcsf'):
roi_namesCSF = [name + '_CSF' for name in rois]
ROIcsf = root_node.find(tag + '/data/Vcsf').text.strip('[]')
for id, vol in zip(roi_namesCSF, ROIcsf.split(';')):
catlog[id] = val2out(vol)
roi_names.extend(roi_namesCSF)
# output base name
base_name = sys.argv[1]
# path to the report
path2data= Path(sys.argv[2])
# extract subject identifier from path
sub = path2data.parts[0]
# load report XML
catlog = get_basic_catlog(path2data / 'report', sub)
# load atlas ROIs volume
xml2csv(
path2data / 'label' / 'catROI_{}_run-1_T1w.xml'.format(sub),
'{}_rois'.format(base_name),
catlog,
'Vgm',
add_WM_CSF,
)
# load surface XML
xml2csv(
path2data / 'label' / 'catROIs_{}_run-1_T1w.xml'.format(sub),
'{}_thickness'.format(base_name),
catlog,
'thickness',
)
xml2csv(
path2data / 'label' / 'catROIs_{}_run-1_T1w.xml'.format(sub),
'{}_gyrification'.format(base_name),
catlog,
'gyrification',
)
xml2csv(
path2data / 'label' / 'catROIs_{}_run-1_T1w.xml'.format(sub),
'{}_toroGI20mm'.format(base_name),
catlog,
'toroGI20mm',
)
xml2csv(
path2data / 'label' / 'catROIs_{}_run-1_T1w.xml'.format(sub),
'{}_surfarea'.format(base_name),
catlog,
'area',
)
xml2csv(
path2data / 'label' / 'catROIs_{}_run-1_T1w.xml'.format(sub),
'{}_surfgmvol'.format(base_name),
catlog,
'gmv',
)
xml2csv(
path2data / 'label' / 'catROIs_{}_run-1_T1w.xml'.format(sub),
'{}_sulcusdepth'.format(base_name),
catlog,
'depth',
)
xml2csv(
path2data / 'label' / 'catROIs_{}_run-1_T1w.xml'.format(sub),
'{}_fractaldim'.format(base_name),
catlog,
'fractaldimension',
)