Source code for openfloor.dialog_event

from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Union, Any, Iterator, Tuple, ClassVar, Type
from enum import Enum
from abc import ABC
import json
from jsonpath_ng import jsonpath, parse
from .json_serializable import JsonSerializableList, JsonSerializableDict, JsonSerializableDataclass
import uuid

[docs] def get_isosplit(s: str, split: str) -> Tuple[int, str]: """Split string at delimiter and return number and remainder Returns 0 and the string if the delimiter is not found or the text preceding the delimiter is not a number""" if split in s: n, s = s.split(split) try: n = int(n) except: n=0 else: n = 0 return int(n), s
[docs] def parse_isoduration(s: str) -> timedelta: """Parse ISO 8601 duration format to timedelta Args: s: ISO 8601 duration string (e.g., "PT3H30M15S") Returns: datetime.timedelta object """ # Remove prefix s = s.split('P')[-1] # Step through letter dividers days, s = get_isosplit(s, 'D') _, s = get_isosplit(s, 'T') hours, s = get_isosplit(s, 'H') minutes, s = get_isosplit(s, 'M') seconds, s = get_isosplit(s, 'S') return timedelta(days=days, hours=hours, minutes=minutes, seconds=seconds)
[docs] def timedelta_to_iso_duration(td: timedelta) -> str: """Convert timedelta to ISO 8601 duration format Args: td: datetime.timedelta object Returns: ISO 8601 duration string (e.g., "PT3H30M15S") """ total_seconds = int(td.total_seconds()) hours = total_seconds // 3600 minutes = (total_seconds % 3600) // 60 seconds = total_seconds % 60 parts = [] if hours > 0: parts.append(f"{hours}H") if minutes > 0: parts.append(f"{minutes}M") if seconds > 0 or not parts: parts.append(f"{seconds}S") return f"PT{''.join(parts)}"
[docs] @dataclass class Span(JsonSerializableDataclass): """Represents a time span for a dialog event or token""" startTime: Optional[datetime] = None startOffset: Optional[timedelta] = None # Duration as timedelta endTime: Optional[datetime] = None endOffset: Optional[timedelta] = None # Duration as timedelta def __post_init__(self): """Initialize after dataclass initialization""" super().__init__() if self.endTime is None and self.startOffset is None: # Default to now if there is no startOffset self.startTime = datetime.now() if self.startTime is not None and self.startOffset is not None: raise ValueError(f"Cannot specify both startTime and startOffset: {self.startTime} and {self.startOffset}") if self.endTime is not None and self.endOffset is not None: raise ValueError(f"Cannot specify both endTime and endOffset: {self.endTime} and {self.endOffset}") if self.startTime is None and self.startOffset is None: raise ValueError(f"Must specify either startTime or startOffset: {self.startTime} and {self.startOffset}") def __iter__(self) -> Iterator[Tuple[str, Any]]: """Convert Span instance to JSON-compatible dictionary""" if self.startTime is not None: yield 'startTime', self.startTime.isoformat() if self.startOffset is not None: yield 'startOffset', timedelta_to_iso_duration(self.startOffset) if self.endTime is not None: yield 'endTime', self.endTime.isoformat() if self.endOffset is not None: yield 'endOffset', timedelta_to_iso_duration(self.endOffset)
[docs] @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Span': """Create a Span instance from a dictionary""" if 'endTime' in data and isinstance(data['endTime'], str): data['endTime'] = datetime.fromisoformat(data['endTime']) if 'startTime' in data and isinstance(data['startTime'], str): data['startTime'] = datetime.fromisoformat(data['startTime']) if 'startOffset' in data and isinstance(data['startOffset'], str): data['startOffset'] = parse_isoduration(data['startOffset']) if 'endOffset' in data and isinstance(data['endOffset'], str): data['endOffset'] = parse_isoduration(data['endOffset']) return cls(**data)
[docs] @dataclass class Token(JsonSerializableDataclass): """Represents a single token in a feature""" value: Optional[Any] = None valueUrl: Optional[str] = None span: Optional[Span] = None confidence: Optional[float] = None links: List[str] = field(default_factory=list) # JSON Path references def __post_init__(self): """Initialize after dataclass initialization""" super().__init__() if self.value is None and self.valueUrl is None: raise ValueError("Must specify either value or valueUrl") if self.value is not None and self.valueUrl is not None: raise ValueError("Cannot specify both value and valueUrl") if self.confidence is not None and not 0 <= self.confidence <= 1: raise ValueError("Confidence must be between 0 and 1") def __iter__(self) -> Iterator[Tuple[str, Any]]: """Convert Token instance to JSON-compatible dictionary""" if self.value is not None: yield 'value', self.value if self.valueUrl is not None: yield 'valueUrl', self.valueUrl if self.span is not None: yield 'span', dict(self.span) if self.confidence is not None: yield 'confidence', self.confidence if self.links: yield 'links', self.links
[docs] @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Token': """Create a Token instance from a dictionary""" if 'span' in data: data['span'] = Span.from_dict(data['span']) return cls(**data)
[docs] def linked_values(self,dialog_event) -> List[Tuple[str, Any]]: values=[] for l in self.links: jsonpath_expr = parse(l) features_dict=dict(dialog_event.features) for match in jsonpath_expr.find(features_dict): if match: values.append([match.full_path,match.value]) return values
[docs] @dataclass class Feature(JsonSerializableDataclass): """Represents a feature in a dialog event""" mimeType: str tokens: List[Token] = field(default_factory=list) alternates: List[List[Token]] = field(default_factory=list) lang: Optional[str] = None # BCP 47 language tag encoding: Optional[str] = None # "ISO-8859-1" or "UTF-8" tokenSchema: Optional[str] = None # e.g., "BertTokenizer.from_pretrained(bert-base-uncased)" def __post_init__(self): """Initialize after dataclass initialization""" super().__init__() if self.encoding is not None and self.encoding not in ["ISO-8859-1", "UTF-8", "iso-8859-1", "utf-8"]: raise ValueError("Encoding must be either 'ISO-8859-1', 'iso-8859-1', 'UTF-8', or 'utf-8'") def __iter__(self) -> Iterator[Tuple[str, Any]]: """Convert Feature instance to JSON-compatible dictionary""" yield 'mimeType', self.mimeType yield 'tokens', [dict(token) for token in self.tokens] if self.alternates: yield 'alternates', [[dict(token) for token in alt] for alt in self.alternates] if self.lang is not None: yield 'lang', self.lang if self.encoding is not None: yield 'encoding', self.encoding if self.tokenSchema is not None: yield 'tokenSchema', self.tokenSchema
[docs] @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Feature': """Create a Feature instance from a dictionary""" if 'tokens' in data: data['tokens'] = [Token.from_dict(token) for token in data['tokens']] if 'alternates' in data: data['alternates'] = [[Token.from_dict(token) for token in alt] for alt in data['alternates']] return cls(**data)
[docs] @dataclass class TextFeature(Feature): """Represents a text feature in a dialog event with mime type set to text/plain by default""" mimeType: str = "text/plain" values: Optional[List[str]] = field(default=None, repr=False) def __post_init__(self): """Initialize tokens from values if provided""" if self.values is not None: self.tokens = [Token(value=value) for value in self.values] def __iter__(self) -> Iterator[Tuple[str, Any]]: """Exclude the values field from the iteration""" yield from super().__iter__()
[docs] @dataclass class DialogEvent(JsonSerializableDataclass): """Represents a dialog event according to the specification""" speakerUri: str id: Optional[str] = None span: Optional[Span] = field(default_factory=Span) features: Dict[str, Feature] = field(default_factory=dict) previousId: Optional[str] = None context: Optional[str] = None def __post_init__(self): """Initialize after dataclass initialization""" super().__init__() if self.id is None: self.id = f"de:{uuid.uuid4()}" def __iter__(self) -> Iterator[Tuple[str, Any]]: """Convert DialogEvent instance to JSON-compatible dictionary""" yield 'id', self.id yield 'speakerUri', self.speakerUri yield 'span', dict(self.span) yield 'features', {name: dict(feature) for name, feature in self.features.items()} if self.previousId is not None: yield 'previousId', self.previousId if self.context is not None: yield 'context', self.context
[docs] @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'DialogEvent': """Create a DialogEvent instance from a dictionary""" if 'span' in data: data['span'] = Span.from_dict(data['span']) if 'features' in data: data['features'] = {name: Feature.from_dict(feature) for name, feature in data['features'].items()} return cls(**data)
[docs] class DialogHistory(JsonSerializableList): pass