Skip to content

Data Types API

src.core.data_types

Core data type definitions for diabetes data processing.

This module defines the core data types and structures used for processing diabetes device data exports. It supports multiple file formats, different units of measurement, and various data types commonly found in diabetes management tools.

The structure allows for
  • Multiple files in a single format
  • Multiple data types per table
  • Different file types (SQLite, CSV, etc.)
  • Flexible column mapping
  • Primary/secondary data distinction

FileType

Bases: Enum

Supported file types for diabetes data.

Source code in src/core/data_types.py
class FileType(Enum):
    """Supported file types for diabetes data."""

    SQLITE = "sqlite"
    CSV = "csv"
    JSON = "json"
    XML = "xml"

SQLITE = 'sqlite' class-attribute instance-attribute

CSV = 'csv' class-attribute instance-attribute

JSON = 'json' class-attribute instance-attribute

XML = 'xml' class-attribute instance-attribute

DataType

Bases: Enum

Core diabetes data types.

Source code in src/core/data_types.py
class DataType(Enum):
    """Core diabetes data types."""

    # CGM Data
    CGM = auto()  # Continuous glucose monitoring data

    # BGM Data
    BGM = auto()  # Blood glucose meter readings

    # Treatment Data
    INSULIN = auto()  # Insulin doses
    INSULIN_META = auto()  # Insulin metadata eg brand
    CARBS = auto()  # Carbohydrate intake
    NOTES = auto()  # Text notes/comments

CGM = auto() class-attribute instance-attribute

BGM = auto() class-attribute instance-attribute

INSULIN = auto() class-attribute instance-attribute

INSULIN_META = auto() class-attribute instance-attribute

CARBS = auto() class-attribute instance-attribute

NOTES = auto() class-attribute instance-attribute

TimestampType

Bases: Enum

Common types of timestamp format, ensuring correct conversion

Source code in src/core/data_types.py
class TimestampType(Enum):
    """Common types of timestamp format, ensuring correct conversion"""

    UNIX_SECONDS = "unix_seconds"
    UNIX_MILLISECONDS = "unix_milliseconds"
    UNIX_MICROSECONDS = "unix_microseconds"
    ISO_8601 = "iso_8601"
    UNKNOWN = "unknown"

UNIX_SECONDS = 'unix_seconds' class-attribute instance-attribute

UNIX_MILLISECONDS = 'unix_milliseconds' class-attribute instance-attribute

UNIX_MICROSECONDS = 'unix_microseconds' class-attribute instance-attribute

ISO_8601 = 'iso_8601' class-attribute instance-attribute

UNKNOWN = 'unknown' class-attribute instance-attribute

ColumnRequirement

Bases: Enum

Defines how column should be validated and if data reading is required

Source code in src/core/data_types.py
class ColumnRequirement(Enum):
    """Defines how column should be validated and if data reading is required"""

    CONFIRMATION_ONLY = auto()  # Just needs to exist - no data read
    REQUIRED_WITH_DATA = auto()  # Must exist - data read & fail if not
    REQUIRED_NULLABLE = auto()  # Must exist, can have all missing values - data read
    OPTIONAL = auto()  # May or may not exist - data read

CONFIRMATION_ONLY = auto() class-attribute instance-attribute

REQUIRED_WITH_DATA = auto() class-attribute instance-attribute

REQUIRED_NULLABLE = auto() class-attribute instance-attribute

OPTIONAL = auto() class-attribute instance-attribute

Unit

Bases: Enum

Supported units of measurement.

Source code in src/core/data_types.py
class Unit(Enum):
    """Supported units of measurement."""

    MGDL = "mg/dL"  # Blood glucose in mg/dL
    MMOL = "mmol/L"  # Blood glucose in mmol/L
    UNITS = "U"  # Insulin units
    GRAMS = "g"  # Carbohydrates in grams

MGDL = 'mg/dL' class-attribute instance-attribute

MMOL = 'mmol/L' class-attribute instance-attribute

UNITS = 'U' class-attribute instance-attribute

GRAMS = 'g' class-attribute instance-attribute

ColumnMapping dataclass

Maps source columns to standardized data types.

Parameters:

Name Type Description Default
source_name str

Original column name in the data source

required
data_type Optional[DataType]

Type of data this column contains (if applicable - Any column can be used for confirming device.)

None
unit Optional[Unit]

Unit of measurement (if applicable)

None
requirement ColumnRequirement

Type of requirement - default = REQUIRED_WITH_DATA

REQUIRED_WITH_DATA
is_primary bool

Whether this is the primary column - default = True

True

Examples:

>>> glucose_column = ColumnMapping(
...     source_name="calculated_value",
...     data_type=DataType.CGM,
...     unit=Unit.MGDL,
... )
>>> raw_glucose = ColumnMapping(
...     source_name="raw_data",
...     data_type=DataType.CGM,
...     requirement=ColumnRequirement.REQUIRED_NULLABLE,
...     is_primary=False
... )
Source code in src/core/data_types.py
@dataclass
class ColumnMapping:
    """Maps source columns to standardized data types.

    Args:
        source_name: Original column name in the data source
        data_type: Type of data this column contains (if applicable - Any column can be used for confirming device.)
        unit: Unit of measurement (if applicable)
        requirement: Type of requirement - default = REQUIRED_WITH_DATA
        is_primary: Whether this is the primary column - default = True

    Examples:
        >>> glucose_column = ColumnMapping(
        ...     source_name="calculated_value",
        ...     data_type=DataType.CGM,
        ...     unit=Unit.MGDL,
        ... )
        >>> raw_glucose = ColumnMapping(
        ...     source_name="raw_data",
        ...     data_type=DataType.CGM,
        ...     requirement=ColumnRequirement.REQUIRED_NULLABLE,
        ...     is_primary=False
        ... )
    """

    source_name: str
    data_type: Optional[DataType] = None
    unit: Optional[Unit] = None
    requirement: ColumnRequirement = ColumnRequirement.REQUIRED_WITH_DATA
    is_primary: bool = True

source_name: str instance-attribute

data_type: Optional[DataType] = None class-attribute instance-attribute

unit: Optional[Unit] = None class-attribute instance-attribute

requirement: ColumnRequirement = ColumnRequirement.REQUIRED_WITH_DATA class-attribute instance-attribute

is_primary: bool = True class-attribute instance-attribute

__init__(source_name: str, data_type: Optional[DataType] = None, unit: Optional[Unit] = None, requirement: ColumnRequirement = ColumnRequirement.REQUIRED_WITH_DATA, is_primary: bool = True) -> None

TableStructure dataclass

Defines the structure of a data table.

Parameters:

Name Type Description Default
name str

Table name in the data source (empty string for CSV files)

required
timestamp_column str

Name of the timestamp column

required
columns List[ColumnMapping]

List of column mappings

required

Examples:

>>> bgreadings = TableStructure(
...     name="bgreadings",
...     timestamp_column="timestamp",
...     columns=[
...         ColumnMapping(
...             source_name="calculated_value",
...             data_type=DataType.CGM,
...             unit=Unit.MGDL
...         ),
...         ColumnMapping(
...             source_name="raw_data",
...             data_type=DataType.CGM,
...             requirement=ColumnRequirement.REQUIRED_NULLABLE,
...             is_primary=False
...         )
...     ]
... )
Source code in src/core/data_types.py
@dataclass
class TableStructure:
    """Defines the structure of a data table.

    Args:
        name: Table name in the data source (empty string for CSV files)
        timestamp_column: Name of the timestamp column
        columns: List of column mappings

    Examples:
        >>> bgreadings = TableStructure(
        ...     name="bgreadings",
        ...     timestamp_column="timestamp",
        ...     columns=[
        ...         ColumnMapping(
        ...             source_name="calculated_value",
        ...             data_type=DataType.CGM,
        ...             unit=Unit.MGDL
        ...         ),
        ...         ColumnMapping(
        ...             source_name="raw_data",
        ...             data_type=DataType.CGM,
        ...             requirement=ColumnRequirement.REQUIRED_NULLABLE,
        ...             is_primary=False
        ...         )
        ...     ]
        ... )
    """

    name: str
    timestamp_column: str
    columns: List[ColumnMapping]

    def validate_columns(self):
        """Validate that table has at least one column defined.

        Raises:
            FormatValidationError: If table has no columns defined
        """
        if not self.columns:
            raise FormatValidationError(
                f"Table {self.name} must have at least one column defined",
                details={"table_name": self.name, "columns_count": 0},
            )

    def validate_unique_source_names(self):
        """Validate that all column names are unique.

        Raises:
            FormatValidationError: If duplicate column names are found
        """
        column_names = [col.source_name for col in self.columns]
        unique_names = set(column_names)
        if len(column_names) != len(unique_names):
            duplicates = [name for name in unique_names if column_names.count(name) > 1]
            raise FormatValidationError(
                f"Duplicate column names in table {self.name}",
                details={"table_name": self.name, "duplicate_columns": duplicates},
            )

    def validate_primary_columns(self):
        """Validate that each data type has at most one primary column.

        Raises:
            FormatValidationError: If multiple primary columns exist for any data type
        """
        for data_type in DataType:
            primary_columns = [
                col.source_name
                for col in self.columns
                if col.data_type == data_type and col.is_primary
            ]
            if len(primary_columns) > 1:
                raise FormatValidationError(
                    f"Multiple primary columns for {data_type.value} in table {self.name}",
                    details={
                        "table_name": self.name,
                        "data_type": data_type.value,
                        "primary_columns": primary_columns,
                    },
                )

    def __post_init__(self):
        self.validate_columns()
        self.validate_unique_source_names()
        self.validate_primary_columns()

name: str instance-attribute

timestamp_column: str instance-attribute

columns: List[ColumnMapping] instance-attribute

__init__(name: str, timestamp_column: str, columns: List[ColumnMapping]) -> None

validate_columns()

Validate that table has at least one column defined.

Raises:

Type Description
FormatValidationError

If table has no columns defined

Source code in src/core/data_types.py
def validate_columns(self):
    """Validate that table has at least one column defined.

    Raises:
        FormatValidationError: If table has no columns defined
    """
    if not self.columns:
        raise FormatValidationError(
            f"Table {self.name} must have at least one column defined",
            details={"table_name": self.name, "columns_count": 0},
        )

validate_unique_source_names()

Validate that all column names are unique.

Raises:

Type Description
FormatValidationError

If duplicate column names are found

Source code in src/core/data_types.py
def validate_unique_source_names(self):
    """Validate that all column names are unique.

    Raises:
        FormatValidationError: If duplicate column names are found
    """
    column_names = [col.source_name for col in self.columns]
    unique_names = set(column_names)
    if len(column_names) != len(unique_names):
        duplicates = [name for name in unique_names if column_names.count(name) > 1]
        raise FormatValidationError(
            f"Duplicate column names in table {self.name}",
            details={"table_name": self.name, "duplicate_columns": duplicates},
        )

validate_primary_columns()

Validate that each data type has at most one primary column.

Raises:

Type Description
FormatValidationError

If multiple primary columns exist for any data type

Source code in src/core/data_types.py
def validate_primary_columns(self):
    """Validate that each data type has at most one primary column.

    Raises:
        FormatValidationError: If multiple primary columns exist for any data type
    """
    for data_type in DataType:
        primary_columns = [
            col.source_name
            for col in self.columns
            if col.data_type == data_type and col.is_primary
        ]
        if len(primary_columns) > 1:
            raise FormatValidationError(
                f"Multiple primary columns for {data_type.value} in table {self.name}",
                details={
                    "table_name": self.name,
                    "data_type": data_type.value,
                    "primary_columns": primary_columns,
                },
            )

__post_init__()

Source code in src/core/data_types.py
def __post_init__(self):
    self.validate_columns()
    self.validate_unique_source_names()
    self.validate_primary_columns()

FileConfig dataclass

Configuration for a specific file in a device format.

Parameters:

Name Type Description Default
name_pattern str

Pattern to match filename (e.g., "*.sqlite", "glucose.csv")

required
file_type FileType

Type of the data file

required
tables List[TableStructure]

List of table structures in the file

required

Examples:

1
2
3
4
5
>>> sqlite_file = FileConfig(
...     name_pattern="*.sqlite",
...     file_type=FileType.SQLITE,
...     tables=[bgreadings]  # TableStructure from previous example
... )
Source code in src/core/data_types.py
@dataclass
class FileConfig:
    """Configuration for a specific file in a device format.

    Args:
        name_pattern: Pattern to match filename (e.g., "*.sqlite", "glucose.csv")
        file_type: Type of the data file
        tables: List of table structures in the file

    Examples:
        >>> sqlite_file = FileConfig(
        ...     name_pattern="*.sqlite",
        ...     file_type=FileType.SQLITE,
        ...     tables=[bgreadings]  # TableStructure from previous example
        ... )
    """

    name_pattern: str
    file_type: FileType
    tables: List[TableStructure]

    def __post_init__(self):
        """Validate file configuration after initialization.

        Raises:
            FormatValidationError: If file configuration is invalid
        """
        if not self.tables:
            raise FormatValidationError(
                f"File {self.name_pattern} must have at least one table defined",
                details={"file_pattern": self.name_pattern},
            )

        # For CSV files, ensure only one table with empty name
        if self.file_type == FileType.CSV:
            if len(self.tables) > 1:
                raise FormatValidationError(
                    "CSV files can only have one table structure",
                    details={
                        "file_pattern": self.name_pattern,
                        "tables_count": len(self.tables),
                    },
                )
            if self.tables[0].name != "":
                raise FormatValidationError(
                    f"CSV file table name should be empty string for file {self.name_pattern}",
                    details={
                        "file_pattern": self.name_pattern,
                        "table_name": self.tables[0].name,
                    },
                )

name_pattern: str instance-attribute

file_type: FileType instance-attribute

tables: List[TableStructure] instance-attribute

__init__(name_pattern: str, file_type: FileType, tables: List[TableStructure]) -> None

__post_init__()

Validate file configuration after initialization.

Raises:

Type Description
FormatValidationError

If file configuration is invalid

Source code in src/core/data_types.py
def __post_init__(self):
    """Validate file configuration after initialization.

    Raises:
        FormatValidationError: If file configuration is invalid
    """
    if not self.tables:
        raise FormatValidationError(
            f"File {self.name_pattern} must have at least one table defined",
            details={"file_pattern": self.name_pattern},
        )

    # For CSV files, ensure only one table with empty name
    if self.file_type == FileType.CSV:
        if len(self.tables) > 1:
            raise FormatValidationError(
                "CSV files can only have one table structure",
                details={
                    "file_pattern": self.name_pattern,
                    "tables_count": len(self.tables),
                },
            )
        if self.tables[0].name != "":
            raise FormatValidationError(
                f"CSV file table name should be empty string for file {self.name_pattern}",
                details={
                    "file_pattern": self.name_pattern,
                    "table_name": self.tables[0].name,
                },
            )

DeviceFormat dataclass

Complete format specification for a diabetes device data export.

Parameters:

Name Type Description Default
name str

Name of the device/format

required
files List[FileConfig]

List of file configurations

required

Examples:

1
2
3
4
>>> xdrip_format = DeviceFormat(
...     name="xdrip_sqlite",
...     files=[sqlite_file]  # FileConfig from previous example
... )
Source code in src/core/data_types.py
@dataclass
class DeviceFormat:
    """Complete format specification for a diabetes device data export.

    Args:
        name: Name of the device/format
        files: List of file configurations

    Examples:
        >>> xdrip_format = DeviceFormat(
        ...     name="xdrip_sqlite",
        ...     files=[sqlite_file]  # FileConfig from previous example
        ... )
    """

    name: str
    files: List[FileConfig]

    def __post_init__(self):
        """Validate device format after initialization.

        Raises:
            FormatValidationError: If device format is invalid
        """
        if not self.files:
            raise FormatValidationError(
                f"Device format {self.name} must have at least one file defined",
                details={"format_name": self.name},
            )

    def __str__(self) -> str:
        """String representation including available data types."""
        types = set()
        for file_config in self.files:
            for table in file_config.tables:
                for column in table.columns:
                    if column.is_primary:
                        types.add(column.data_type.name)
        return f"{self.name} - Available data: {', '.join(sorted(types))}"

name: str instance-attribute

files: List[FileConfig] instance-attribute

__init__(name: str, files: List[FileConfig]) -> None

__post_init__()

Validate device format after initialization.

Raises:

Type Description
FormatValidationError

If device format is invalid

Source code in src/core/data_types.py
def __post_init__(self):
    """Validate device format after initialization.

    Raises:
        FormatValidationError: If device format is invalid
    """
    if not self.files:
        raise FormatValidationError(
            f"Device format {self.name} must have at least one file defined",
            details={"format_name": self.name},
        )

__str__() -> str

String representation including available data types.

Source code in src/core/data_types.py
def __str__(self) -> str:
    """String representation including available data types."""
    types = set()
    for file_config in self.files:
        for table in file_config.tables:
            for column in table.columns:
                if column.is_primary:
                    types.add(column.data_type.name)
    return f"{self.name} - Available data: {', '.join(sorted(types))}"

Module Location

src/core/data_types.py

Key Components

FileType

1
2
3
4
5
6
class FileType(Enum):
    """Supported file types for diabetes data."""
    SQLITE = "sqlite"
    CSV = "csv"
    JSON = "json"
    XML = "xml"

Usage

Used to specify and validate input file types during format detection.

DataType

1
2
3
4
5
6
7
8
class DataType(Enum):
    """Core diabetes data types."""
    CGM = auto()        # Continuous glucose monitoring data
    BGM = auto()        # Blood glucose meter readings
    INSULIN = auto()    # Insulin doses
    INSULIN_META = auto() # Insulin metadata
    CARBS = auto()      # Carbohydrate intake
    NOTES = auto()      # Text notes/comments

Example

1
2
3
4
5
from src.core.data_types import DataType

# Check if data is CGM reading
if column.data_type == DataType.CGM:
    process_cgm_data(column)

TimestampType

1
2
3
4
5
6
7
class TimestampType(Enum):
    """Common types of timestamp format."""
    UNIX_SECONDS = "unix_seconds"
    UNIX_MILLISECONDS = "unix_milliseconds"
    UNIX_MICROSECONDS = "unix_microseconds"
    ISO_8601 = "iso_8601"
    UNKNOWN = "unknown"

Important

All timestamps are converted to UTC during processing.

ColumnRequirement

1
2
3
4
5
6
class ColumnRequirement(Enum):
    """Defines column validation requirements."""
    CONFIRMATION_ONLY = auto()    # Just needs to exist
    REQUIRED_WITH_DATA = auto()   # Must exist with data
    REQUIRED_NULLABLE = auto()    # Can have missing values
    OPTIONAL = auto()             # May not exist

Unit

1
2
3
4
5
6
class Unit(Enum):
    """Supported units of measurement."""
    MGDL = "mg/dL"    # Blood glucose
    MMOL = "mmol/L"   # Blood glucose
    UNITS = "U"       # Insulin
    GRAMS = "g"       # Carbohydrates

ColumnMapping

@dataclass
class ColumnMapping:
    """Maps source columns to standardized data types.

    Args:
        source_name: Original column name
        data_type: Column data type
        unit: Unit of measurement
        requirement: Validation requirement
        is_primary: Primary column flag
    """
    source_name: str
    data_type: Optional[DataType] = None
    unit: Optional[Unit] = None
    requirement: ColumnRequirement = ColumnRequirement.REQUIRED_WITH_DATA
    is_primary: bool = True

ColumnMapping Example

1
2
3
4
5
    glucose = ColumnMapping(
        source_name="calculated_value",
        data_type=DataType.CGM,
        unit=Unit.MGDL
    )

TableStructure

@dataclass
class TableStructure:
    """Defines data table structure.

    Args:
        name: Table name
        timestamp_column: Timestamp column name
        columns: Column mappings

    Methods:
        validate_columns(): Ensures table has columns
        validate_unique_source_names(): Checks for duplicates
        validate_primary_columns(): Validates primary columns
    """
    name: str
    timestamp_column: str
    columns: List[ColumnMapping]

Validation Methods

All validation methods raise FormatValidationError on failure

FileConfig

@dataclass
class FileConfig:
    """Configuration for device format file.

    Args:
        name_pattern: Filename pattern
        file_type: File type enum
        tables: Table structures

    Validates:
        - At least one table
        - CSV files have one unnamed table
    """
    name_pattern: str
    file_type: FileType
    tables: List[TableStructure]

DeviceFormat

@dataclass
class DeviceFormat:
    """Complete device format specification.

    Args:
        name: Format name
        files: File configurations

    Methods:
        __str__: Returns format name and data types
    """
    name: str
    files: List[FileConfig]

Complete Format Example

    xdrip_format = DeviceFormat(
        name="xdrip_sqlite",
        files=[
            FileConfig(
                name_pattern="*.sqlite",
                file_type=FileType.SQLITE,
                tables=[
                    TableStructure(
                        name="BgReadings",
                        timestamp_column="timestamp",
                        columns=[
                            ColumnMapping(
                                source_name="calculated_value",
                                data_type=DataType.CGM,
                                unit=Unit.MGDL
                            )
                        ]
                    )
                ]
            )
        ]
    )