Source code for firescipy.instruments.netzsch_sta

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

from io import StringIO

import pandas as pd

from .base import InstrumentFile
from .helpers import split_line, strip_empty_edges, row_has_numeric_content, try_convert_to_float


[docs]def read_netzsch_sta_file(file_path): """ Convenience wrapper around NetzschSTAParser. """ parser = NetzschSTAParser(file_path=file_path) return parser.parse()
[docs]class NetzschSTAParser: """ Parser for NETZSCH STA/DSC/TGA ASCII export files. Expected structure ------------------ - metadata lines starting with '#' - table header line starting with '##' - data rows below the table header Example ------- #EXPORTTYPE: ;DATA ALL ;;; #DECIMAL: ;COMMA ;;; #SEPARATOR: ;SEMICOLON;;; ... ##Temp./°C;Time/min;DSC/(mW/mg);Mass/% 29,958;0;7,67E-02;99,99772 ... """ # Maps the keyword found in the DECIMAL metadata field to the actual character. DECIMAL_MAP = { "COMMA": ",", "POINT": ".", "DOT": ".", } # Maps the keyword found in the SEPARATOR metadata field to the actual character. SEPARATOR_MAP = { "SEMICOLON": ";", "COMMA": ",", "TAB": "\t", } # Metadata fields whose values should be converted to numbers. # All other fields are kept as strings. NUMERIC_METADATA_KEYS = { "SAMPLE MASS /mg", "REFERENCE MASS /mg", "SAMPLE CRUCIBLE MASS /mg", "REFERENCE CRUCIBLE MASS /mg", } def __init__(self, file_path, instrument_file=None): """ Parameters ---------- file_path : str or Path Path to the NETZSCH export file. instrument_file : InstrumentFile, optional Pre-loaded InstrumentFile instance. If None, one is created. """ self.file_path = file_path # Use an existing InstrumentFile if provided, otherwise create one. self.file = instrument_file or InstrumentFile(file_path).read() self.meta = dict() # will hold all metadata key-value pairs self.data_df = None # will hold the measurement table as a DataFrame # These are set while scanning for the '##' column header line. self.table_header_idx = None # line index of the '##' row self.table_header_line = None # the '##' row content (without the '##' prefix)
[docs] def parse(self): """ Parse metadata and data table. Returns ------- meta : dict Parsed metadata. data_df : pandas.DataFrame Parsed measurement table. """ self._parse_metadata_and_find_table_header() self._parse_data_table() # Record which encoding was used so the caller can inspect it. self.meta["USED_ENCODING"] = self.file.used_encoding return self.meta, self.data_df
def _parse_metadata_and_find_table_header(self): """ Parse metadata block and locate the table header line. """ for idx, raw_line in enumerate(self.file.lines): line = raw_line.strip() # Skip blank lines. if not line: continue # A line starting with '##' is the column header — stop scanning metadata. if line.startswith("##"): self.table_header_idx = idx # Strip the leading '##' to get the raw column name string. self.table_header_line = line[2:].strip() break # A line starting with a single '#' is a metadata line. if line.startswith("#"): parsed = self._parse_metadata_line(line) if parsed is not None: key, value = parsed self.meta[key] = value # Convert numeric metadata fields from strings to numbers. self._postprocess_metadata() if self.table_header_idx is None: raise ValueError("Could not find table header line starting with '##'.") def _parse_metadata_line(self, line): """ Parse one metadata line of the form: #KEY: ;VALUE ;;; Returns ------- tuple[str, str | list[str] | None] or None """ # Remove the leading '#' characters before processing. cleaned = line.lstrip("#").strip() # Split on the column separator and remove trailing empty filler cells. column_sep = self._get_column_separator() cells = split_line(cleaned, sep=column_sep) cells = strip_empty_edges(cells) if not cells: return None first_cell = cells[0] # Only lines containing ":" are treated as metadata. if ":" not in first_cell: return None # Split "KEY: value" into key and its first value fragment. key, first_value = first_cell.split(":", maxsplit=1) key = key.strip() first_value = first_value.strip() # Collect all non-empty value fragments (some fields span multiple cells). values = [] if first_value: values.append(first_value) if len(cells) > 1: values.extend(cell.strip() for cell in cells[1:] if cell.strip()) # Store as None, a single string, or a list depending on how many # value fragments were found. if len(values) == 0: value = None elif len(values) == 1: value = values[0] else: value = values return key, value def _postprocess_metadata(self): """ Convert selected metadata values to numeric types where appropriate. """ decimal_sep = self._get_decimal_separator() # Only process the keys listed in NUMERIC_METADATA_KEYS. for key in self.NUMERIC_METADATA_KEYS: if key not in self.meta: continue value = self.meta[key] self.meta[key] = try_convert_to_float(value, decimal_sep) def _parse_data_table(self): """ Parse the measurement table below the table header. """ if self.table_header_idx is None or self.table_header_line is None: raise RuntimeError("Table header information is missing.") decimal_sep = self._get_decimal_separator() column_sep = self._get_column_separator() column_names = self._get_column_names(column_sep) # Take all lines after the column header row and join them back into # a single string so pandas can read it like a file. table_lines = self.file.lines[self.table_header_idx + 1:] table_text = "\n".join(table_lines) data_df = pd.read_csv( StringIO(table_text), sep=column_sep, decimal=decimal_sep, header=None, # column names are provided manually via 'names' names=column_names, engine="python", skip_blank_lines=True, ) # Clean up any stray '#' prefixes that some exports add to column names. data_df.columns = [col.lstrip("#").strip() for col in data_df.columns] # Drop columns that are entirely empty (padding artefact in some exports). data_df = data_df.dropna(axis=1, how="all") self.data_df = data_df def _get_decimal_separator(self): """ Determine decimal separator from metadata. """ # Look for a "DECIMAL" entry in metadata; fall back to "POINT" (i.e. "."). decimal_token = str(self.meta.get("DECIMAL", "POINT")).upper() return self.DECIMAL_MAP.get(decimal_token, ".") def _get_column_separator(self): """ Determine column separator from metadata. """ # Look for a "SEPARATOR" entry in metadata; fall back to semicolon. separator_token = str(self.meta.get("SEPARATOR", "SEMICOLON")).upper() return self.SEPARATOR_MAP.get(separator_token, ";") def _get_column_names(self, column_sep): """ Parse and clean table column names. """ column_names = split_line(self.table_header_line, sep=column_sep) # Remove any leading '#' characters from column names. column_names = [name.lstrip("#").strip() for name in column_names] return column_names