Source code for stellium.analysis.frames

"""
DataFrame conversion utilities for chart analysis.

Provides functions to convert CalculatedChart objects to pandas DataFrames
in various schemas optimized for different analysis use cases.

Requires pandas: pip install stellium[analysis]
"""

import hashlib
from collections.abc import Sequence
from typing import Any

from stellium.core.models import CalculatedChart, ObjectType

# Optional pandas import
try:
    import pandas as pd

    PANDAS_AVAILABLE = True
except ImportError:
    PANDAS_AVAILABLE = False
    pd = None  # type: ignore


def _require_pandas() -> None:
    """Raise ImportError if pandas is not available."""
    if not PANDAS_AVAILABLE:
        raise ImportError(
            "pandas is required for DataFrame operations. "
            "Install with: pip install stellium[analysis]"
        )


def _generate_chart_id(chart: CalculatedChart) -> str:
    """Generate a unique ID for a chart based on datetime and location."""
    key = f"{chart.datetime.julian_day}:{chart.location.latitude}:{chart.location.longitude}"
    return hashlib.md5(key.encode()).hexdigest()[:12]


def _get_chart_name(chart: CalculatedChart) -> str:
    """Get the name from chart metadata."""
    return chart.metadata.get("name", "")


# Element and Modality mappings
SIGN_ELEMENTS = {
    "Aries": "fire",
    "Taurus": "earth",
    "Gemini": "air",
    "Cancer": "water",
    "Leo": "fire",
    "Virgo": "earth",
    "Libra": "air",
    "Scorpio": "water",
    "Sagittarius": "fire",
    "Capricorn": "earth",
    "Aquarius": "air",
    "Pisces": "water",
}

SIGN_MODALITIES = {
    "Aries": "cardinal",
    "Taurus": "fixed",
    "Gemini": "mutable",
    "Cancer": "cardinal",
    "Leo": "fixed",
    "Virgo": "mutable",
    "Libra": "cardinal",
    "Scorpio": "fixed",
    "Sagittarius": "mutable",
    "Capricorn": "cardinal",
    "Aquarius": "fixed",
    "Pisces": "mutable",
}


def _count_elements(chart: CalculatedChart) -> dict[str, int]:
    """Count planets in each element."""
    counts = {"fire": 0, "earth": 0, "air": 0, "water": 0}
    planets = chart.get_planets()

    for planet in planets:
        element = SIGN_ELEMENTS.get(planet.sign)
        if element:
            counts[element] += 1

    return counts


def _count_modalities(chart: CalculatedChart) -> dict[str, int]:
    """Count planets in each modality."""
    counts = {"cardinal": 0, "fixed": 0, "mutable": 0}
    planets = chart.get_planets()

    for planet in planets:
        modality = SIGN_MODALITIES.get(planet.sign)
        if modality:
            counts[modality] += 1

    return counts


def _count_retrogrades(chart: CalculatedChart) -> int:
    """Count retrograde planets."""
    return sum(1 for p in chart.get_planets() if p.is_retrograde)


def _has_pattern(chart: CalculatedChart, pattern_name: str) -> bool:
    """Check if chart has a specific aspect pattern."""
    patterns = chart.metadata.get("aspect_patterns", [])
    for pattern in patterns:
        if isinstance(pattern, dict):
            if pattern.get("name", "").lower() == pattern_name.lower():
                return True
        elif hasattr(pattern, "name"):
            if pattern.name.lower() == pattern_name.lower():
                return True
    return False


[docs] def charts_to_dataframe( charts: Sequence[CalculatedChart], include_patterns: bool = True, ) -> "pd.DataFrame": """ Convert charts to a DataFrame with one row per chart. This schema is best for: - Comparing charts across a dataset - Element/modality distribution analysis - Chart-wide pattern matching Args: charts: Sequence of CalculatedChart objects include_patterns: Include pattern detection columns (requires patterns in metadata) Returns: DataFrame with columns: - chart_id: Unique identifier - name: Chart name (from metadata) - datetime_utc: UTC datetime - julian_day: Julian day number - latitude, longitude: Location coordinates - location_name: Location name - sun_longitude, sun_sign, moon_longitude, moon_sign, moon_phase - asc_longitude, asc_sign, mc_longitude, mc_sign - fire_count, earth_count, air_count, water_count - cardinal_count, fixed_count, mutable_count - sect: "day" or "night" - retrograde_count: Number of retrograde planets - has_grand_trine, has_t_square, has_grand_cross (if include_patterns) Example:: from stellium.analysis import BatchCalculator, charts_to_dataframe charts = BatchCalculator.from_registry(category="artist").calculate_all() df = charts_to_dataframe(charts) # Filter by sun sign aries_suns = df[df['sun_sign'] == 'Aries'] """ _require_pandas() records = [] for chart in charts: record = _chart_to_record(chart, include_patterns) records.append(record) return pd.DataFrame(records)
def _chart_to_record(chart: CalculatedChart, include_patterns: bool) -> dict[str, Any]: """Convert a single chart to a flat record dict.""" record: dict[str, Any] = {} # Chart identification record["chart_id"] = _generate_chart_id(chart) record["name"] = _get_chart_name(chart) # DateTime record["datetime_utc"] = chart.datetime.utc_datetime record["julian_day"] = chart.datetime.julian_day # Location record["latitude"] = chart.location.latitude record["longitude"] = chart.location.longitude record["location_name"] = chart.location.name or "" # Sun position sun = chart.get_object("Sun") if sun: record["sun_longitude"] = sun.longitude record["sun_sign"] = sun.sign record["sun_sign_degree"] = sun.sign_degree else: record["sun_longitude"] = None record["sun_sign"] = None record["sun_sign_degree"] = None # Moon position moon = chart.get_object("Moon") if moon: record["moon_longitude"] = moon.longitude record["moon_sign"] = moon.sign record["moon_sign_degree"] = moon.sign_degree # Moon phase (if available) if moon.phase: record["moon_phase"] = moon.phase.phase_name record["moon_illumination"] = moon.phase.illuminated_fraction else: record["moon_phase"] = None record["moon_illumination"] = None else: record["moon_longitude"] = None record["moon_sign"] = None record["moon_sign_degree"] = None record["moon_phase"] = None record["moon_illumination"] = None # Ascendant asc = chart.get_object("ASC") if asc: record["asc_longitude"] = asc.longitude record["asc_sign"] = asc.sign else: record["asc_longitude"] = None record["asc_sign"] = None # Midheaven mc = chart.get_object("MC") if mc: record["mc_longitude"] = mc.longitude record["mc_sign"] = mc.sign else: record["mc_longitude"] = None record["mc_sign"] = None # Element counts elements = _count_elements(chart) record["fire_count"] = elements["fire"] record["earth_count"] = elements["earth"] record["air_count"] = elements["air"] record["water_count"] = elements["water"] # Modality counts modalities = _count_modalities(chart) record["cardinal_count"] = modalities["cardinal"] record["fixed_count"] = modalities["fixed"] record["mutable_count"] = modalities["mutable"] # Sect record["sect"] = chart.sect() # Retrograde count record["retrograde_count"] = _count_retrogrades(chart) # Pattern detection (if requested) if include_patterns: record["has_grand_trine"] = _has_pattern(chart, "grand trine") record["has_t_square"] = _has_pattern(chart, "t-square") record["has_grand_cross"] = _has_pattern(chart, "grand cross") record["has_yod"] = _has_pattern(chart, "yod") record["has_stellium"] = _has_pattern(chart, "stellium") return record
[docs] def positions_to_dataframe( charts: Sequence[CalculatedChart], object_types: Sequence[ObjectType] | None = None, ) -> "pd.DataFrame": """ Convert charts to a DataFrame with one row per celestial position. This schema is best for: - Position distributions across many charts - Sign/house analysis - Speed and retrograde analysis Args: charts: Sequence of CalculatedChart objects object_types: Filter to specific ObjectTypes (default: all) Returns: DataFrame with columns: - chart_id: Links to chart-level data - chart_name: Chart name - object_name: "Sun", "Moon", etc. - object_type: "planet", "angle", etc. - longitude: Ecliptic longitude (0-360) - latitude: Ecliptic latitude - sign: Zodiac sign - sign_degree: Degree within sign (0-30) - house: House placement (1-12, if available) - speed: Longitude speed (deg/day) - is_retrograde: Retrograde flag - declination: Declination (nullable) - is_out_of_bounds: OOB flag Example:: from stellium.analysis import BatchCalculator, positions_to_dataframe charts = BatchCalculator.from_registry().calculate_all() df = positions_to_dataframe(charts) # Sun sign distribution sun_df = df[df['object_name'] == 'Sun'] sun_df['sign'].value_counts() """ _require_pandas() records = [] for chart in charts: chart_id = _generate_chart_id(chart) chart_name = _get_chart_name(chart) # Get default house system for placements try: default_system = chart.default_house_system house_placements = chart.house_placements.get(default_system, {}) except ValueError: house_placements = {} for pos in chart.positions: # Filter by object type if specified if object_types is not None and pos.object_type not in object_types: continue record: dict[str, Any] = { "chart_id": chart_id, "chart_name": chart_name, "object_name": pos.name, "object_type": pos.object_type.value, "longitude": pos.longitude, "latitude": pos.latitude, "sign": pos.sign, "sign_degree": pos.sign_degree, "house": house_placements.get(pos.name), "speed": pos.speed_longitude, "is_retrograde": pos.is_retrograde, "declination": pos.declination, "is_out_of_bounds": pos.is_out_of_bounds, } records.append(record) return pd.DataFrame(records)
[docs] def aspects_to_dataframe( charts: Sequence[CalculatedChart], include_declination: bool = False, ) -> "pd.DataFrame": """ Convert charts to a DataFrame with one row per aspect. This schema is best for: - Aspect frequency analysis - Aspect pattern research - Orb distribution analysis Args: charts: Sequence of CalculatedChart objects include_declination: Include declination aspects (parallel/contraparallel) Returns: DataFrame with columns: - chart_id: Links to chart-level data - chart_name: Chart name - object1: First object name - object2: Second object name - aspect_name: "Conjunction", "Square", etc. - aspect_degree: 0, 60, 90, 120, 180, etc. - orb: Actual orb in degrees - is_applying: Applying vs separating - aspect_type: "longitude" or "declination" Example:: from stellium.analysis import BatchCalculator, aspects_to_dataframe charts = BatchCalculator.from_registry().with_aspects().calculate_all() df = aspects_to_dataframe(charts) # Most common aspects df['aspect_name'].value_counts() # Sun-Moon aspects sun_moon = df[(df['object1'] == 'Sun') & (df['object2'] == 'Moon')] """ _require_pandas() records = [] for chart in charts: chart_id = _generate_chart_id(chart) chart_name = _get_chart_name(chart) # Regular (longitude) aspects for aspect in chart.aspects: record: dict[str, Any] = { "chart_id": chart_id, "chart_name": chart_name, "object1": aspect.object1.name, "object2": aspect.object2.name, "aspect_name": aspect.aspect_name, "aspect_degree": aspect.aspect_degree, "orb": aspect.orb, "is_applying": aspect.is_applying, "aspect_type": "longitude", } records.append(record) # Declination aspects (if requested) if include_declination: for aspect in chart.declination_aspects: record = { "chart_id": chart_id, "chart_name": chart_name, "object1": aspect.object1.name, "object2": aspect.object2.name, "aspect_name": aspect.aspect_name, "aspect_degree": aspect.aspect_degree, "orb": aspect.orb, "is_applying": aspect.is_applying, "aspect_type": "declination", } records.append(record) return pd.DataFrame(records)
# Convenience aliases to_chart_dataframe = charts_to_dataframe to_positions_dataframe = positions_to_dataframe to_aspects_dataframe = aspects_to_dataframe