import logging
import threading
import time
import uuid
from dataclasses import dataclass, field
from enum import IntEnum
from typing import Any, Dict, List, Literal, Optional, Tuple

import yaml
from mpi4py.MPI import COMM_WORLD, Comm

from .._utils import global_mpi_rank, global_mpi_size

__all__ = [
    'ServerConfig',
    'parse_disagg_config_file',
    'extract_server_configs',
    'split_world_comm',
]


class ServerRole(IntEnum):
    CONTEXT = 0
    GENERATION = 1
    MM_ENCODER = 2


@dataclass
class CtxGenServerConfig():
    type: Literal['ctx', 'gen']
    hostname: Optional[str] = None
    port: Optional[int] = None
    instance_num_ranks: int = 1
    other_args: dict = field(default_factory=dict)


@dataclass
class RouterConfig():
    type: str = "round_robin"
    args: dict = field(default_factory=dict)
    server_role: ServerRole = None


@dataclass
class ConditionalDisaggConfig():
    max_local_prefill_length: int = 0


@dataclass
class OtlpConfig():
    otlp_traces_endpoint: Optional[
        str] = None  # Target URL to which OpenTelemetry traces will be sent


@dataclass
class MinimalInstances:
    context_servers: int = 1  # the minimal number of context servers
    generation_servers: int = 1  # the minimal number of generation servers


@dataclass
class DisaggClusterConfig:
    cluster_uri: str  # the uri of the cluster storage
    cluster_name: str = ""  # the name of the cluster, used like a namespace
    minimal_instances: Optional[MinimalInstances] = None
    heartbeat_interval_sec: int = 5  # the worker will send heartbeat to the cluster storage every heartbeat_interval_sec seconds
    inactive_timeout_sec: int = 10  # the worker will be considered inactive if it doesn't send heartbeat for inactive_timeout_sec seconds


@dataclass
class DisaggServerConfig():
    server_configs: List[CtxGenServerConfig]
    hostname: str = "localhost"
    port: int = 8000
    ctx_router_config: Optional[RouterConfig] = None
    gen_router_config: Optional[RouterConfig] = None
    conditional_disagg_config: Optional[ConditionalDisaggConfig] = None
    otlp_config: Optional[OtlpConfig] = None
    max_retries: int = 1
    perf_metrics_max_requests: int = 0
    disagg_cluster_config: Optional[DisaggClusterConfig] = None
    node_id: int = uuid.getnode(
    ) % 1021  # Assuming only one disagg-server is running on a machine, moding mac by the largest 10-bit prime
    # If this causes collisions, users can set node_id manually within range [0, 1023] in config


@dataclass
class MetadataServerConfig():
    server_type: Literal['etcd']
    hostname: str = "localhost"
    port: int = 2379
    health_check_timeout: float = 5.0
    refresh_interval: float = 10.0


def get_ctx_gen_server_addrs(
        server_configs: list[CtxGenServerConfig]
) -> tuple[list[str], list[str]]:
    ctx_server_urls = []
    gen_server_urls = []
    for cfg in server_configs:
        if cfg.type == "ctx":
            ctx_server_urls.append(f"{cfg.hostname}:{cfg.port}")
        else:
            gen_server_urls.append(f"{cfg.hostname}:{cfg.port}")

    return ctx_server_urls, gen_server_urls


def parse_disagg_config_file(yaml_config_file: str):

    with open(yaml_config_file, 'r') as file:

        config = yaml.safe_load(file)

        disagg_server_config = extract_disagg_cfg(**config)

        return disagg_server_config


def extract_disagg_cfg(hostname: str = 'localhost',
                       port: int = 8000,
                       max_retries: int = 1,
                       perf_metrics_max_requests: int = 0,
                       context_servers: Optional[dict] = None,
                       generation_servers: Optional[dict] = None,
                       conditional_disagg_config: Optional[dict] = None,
                       otlp_config: Optional[dict] = None,
                       disagg_cluster: Optional[dict] = None,
                       **kwargs: Any) -> DisaggServerConfig:
    context_servers = context_servers or {}
    generation_servers = generation_servers or {}

    # If parameters are specified outside the context_severs and generation_servers sections,
    # make sure they match
    # Also inherit the values from the top-level
    for key, value in kwargs.items():
        for server_type, servers in [("context_servers", context_servers),
                                     ("generation_servers", generation_servers)
                                     ]:
            if key in servers:
                if servers[key] != value:
                    raise ValueError(
                        f"Parameter {key} is specified both in the top-level and in the {server_type} section, but with different values"
                    )
            else:
                # Inherit the value from the top-level
                servers[key] = value

    server_configs = []
    disagg_cluster_config = None
    ctx_router_config = extract_router_config(context_servers)
    gen_router_config = extract_router_config(generation_servers)
    ctx_router_config.server_role = ServerRole.CONTEXT
    gen_router_config.server_role = ServerRole.GENERATION
    if disagg_cluster:
        disagg_cluster_config = extract_disagg_cluster_config(disagg_cluster)
    else:
        server_configs = extract_ctx_gen_cfgs(
            type="ctx", **context_servers) + extract_ctx_gen_cfgs(
                type="gen", **generation_servers)

    conditional_disagg_config = ConditionalDisaggConfig(
        **conditional_disagg_config) if conditional_disagg_config else None

    otlp_config = OtlpConfig(**otlp_config) if otlp_config else None

    config = DisaggServerConfig(server_configs, hostname, port,
                                ctx_router_config, gen_router_config,
                                conditional_disagg_config, otlp_config,
                                max_retries, perf_metrics_max_requests,
                                disagg_cluster_config)

    return config


def extract_ctx_gen_cfgs(type: Literal['ctx', 'gen'],
                         num_instances: int = 1,
                         urls: Optional[List[str]] = None,
                         **kwargs: Any) -> List[CtxGenServerConfig]:

    hostnames = []
    ports = []
    if urls:
        for url in urls:
            hostname, port_str = url.split(':')
            port = int(port_str)
            hostnames.append(hostname)
            ports.append(port)

        if len(hostnames) != num_instances:
            raise ValueError(
                f"Number of hostnames ({len(hostnames)}) should be equal to the number of instances ({num_instances})"
            )

        if len(ports) != num_instances:
            raise ValueError(
                f"Number of ports ({len(ports)}) should be equal to the number of instances ({num_instances})"
            )

    else:
        hostnames = [None] * num_instances
        ports = [None] * num_instances

    # Compute the number of ranks per instance
    instance_num_ranks = kwargs.get('tensor_parallel_size', 1) * kwargs.get(
        'pipeline_parallel_size', 1) * kwargs.get('context_parallel_size', 1)

    cfgs = []
    for hostname, port in zip(hostnames, ports):
        cfgs.append(
            CtxGenServerConfig(type=type,
                               hostname=hostname,
                               port=port,
                               instance_num_ranks=instance_num_ranks,
                               other_args=kwargs))
    return cfgs


def extract_router_config(server_cfg: dict) -> RouterConfig:

    args = server_cfg.pop("router", {})
    router_type = args.pop("type", "round_robin")

    # add fields that are not specific to router
    extract_keys = ["max_batch_size", "max_num_tokens"]
    for key in extract_keys:
        if key in server_cfg:
            args[key] = server_cfg[key]

    return RouterConfig(type=router_type, args=args)


def get_server_configs_dict(
        server_configs: List[CtxGenServerConfig]) -> Tuple[int, dict]:

    num_workers = 0
    server_dict = {}

    # check for duplicate server configs
    for cfg in server_configs:
        url = (cfg.hostname, cfg.port)
        if url in server_dict:
            cfg_prev = server_dict[url]
            if cfg_prev.type == cfg.type:
                raise ValueError(
                    f"Duplicated {cfg.type} server config for {url}")
            # mixed server, config should be the same
            if cfg_prev.other_args != cfg.other_args:
                raise ValueError(
                    f"Server config for {url} has different args:\n{cfg_prev.other_args}\n{cfg.other_args}"
                )
        else:
            server_dict[url] = cfg
            num_workers += cfg.instance_num_ranks

    return num_workers, server_dict


def extract_disagg_cluster_config(
        cluster_config_dict: Dict[str, Any],
        cluster_uri: Optional[str] = None) -> DisaggClusterConfig:
    """
    Build the DisaggClusterConfig from the cluster_config_dict.
    Use the default value of DisaggClusterConfig and MinimalInstances if the corresponding fields are not provided.
    If cluster_uri is provided, it will override the cluster_uri in the cluster_config_dict.
    """

    def update_dataclass(obj, data_dict: Dict[str, Any]):
        for key, value in data_dict.items():
            if key not in obj.__dataclass_fields__:
                raise KeyError(
                    f"Key {key} not found in {obj.__class__.__name__}")
            if value is not None:
                setattr(obj, key, value)
        return obj

    cluster_config_dict["minimal_instances"] = update_dataclass(
        MinimalInstances(), cluster_config_dict.get("minimal_instances", {}))
    cluster_config = update_dataclass(
        DisaggClusterConfig(cluster_uri or cluster_config_dict["cluster_uri"]),
        cluster_config_dict,
    )
    return cluster_config


def split_world_comm(
        server_configs: List[CtxGenServerConfig]) -> Tuple[bool, int, Comm]:

    # Check that MPI_COMM_WORLD size is compatible with the number of workers
    global_size = global_mpi_size()
    global_rank = global_mpi_rank()

    [num_workers, server_dict] = get_server_configs_dict(server_configs)
    assert global_size == num_workers, f"global_size ({global_size}) should be equal to the number of distinct workers ({num_workers})"

    # Identify the leader ranks and the instance idx for each rank
    is_leader = False
    offset = 0
    instance_idx = 0
    instance_sub_rank = 0
    for idx, cfg in enumerate(server_configs):
        if (cfg.hostname, cfg.port) not in server_dict:
            continue
        server_dict.pop((cfg.hostname, cfg.port))
        if global_rank >= offset and global_rank < offset + cfg.instance_num_ranks:
            instance_idx = idx
            instance_sub_rank = global_rank - offset
            # The first rank in each instance is the leader
            if global_rank == offset:
                is_leader = True
        offset += cfg.instance_num_ranks

    # Split MPI_COMM_WORLD into sub-communicators based on rank_instance_idx
    sub_comm = COMM_WORLD.Split(color=instance_idx, key=instance_sub_rank)
    sub_rank = sub_comm.Get_rank()
    if sub_rank != instance_sub_rank:
        raise RuntimeError(
            f"Expected sub_rank {sub_rank} to be equal to instance_sub_rank {instance_sub_rank}"
        )

    sub_comm.Barrier()

    logging.info(
        f"global_rank: {global_rank}, instance_idx: {instance_idx}, sub_rank: {sub_rank}, is_leader: {is_leader}"
    )

    return is_leader, instance_idx, sub_comm


def parse_metadata_server_config_file(
    metadata_server_config_file: Optional[str]
) -> Optional[MetadataServerConfig]:
    if metadata_server_config_file is None:
        return None

    with open(metadata_server_config_file, 'r') as file:
        config = yaml.safe_load(file)
        return MetadataServerConfig(**config)


MIN_GLOBAL_ID = 1 << 42

# Consider GIL being removed in the future, use a lock to protect the counter
_global_disagg_request_id_lock = threading.Lock()
_global_disagg_request_id_counter = 0


def get_global_disagg_request_id(machine_id: int) -> int:
    """
    a snowflake global disagg request id that doesn't guarantee monotonicity
    0: positive integer
    1-41  41 bits: timestamp_ms
    42-51 10 bits: machine_id
    52-63 12 bits: counter
    """
    global _global_disagg_request_id_lock
    global _global_disagg_request_id_counter

    COUNTER_BITS = 12
    MACHINE_ID_BITS = 10
    COUNTER_MASK = (1 << COUNTER_BITS) - 1
    MAX_INT64 = (1 << 63) - 1

    if machine_id not in range(0, (1 << MACHINE_ID_BITS) - 1):
        raise ValueError(
            f"machine_id must be in range [0, {(1 << MACHINE_ID_BITS) - 1})")

    timestamp_ms = int(time.monotonic() * 1000)
    with _global_disagg_request_id_lock:
        counter = _global_disagg_request_id_counter & COUNTER_MASK
        _global_disagg_request_id_counter += 1

    # Rotate in [MIN_GLOBAL_ID, MAX_INT64)
    # [0, MIN_GLOBAL_ID) is reserved for local ids
    global_id = (timestamp_ms << (MACHINE_ID_BITS + COUNTER_BITS)) | (
        machine_id << COUNTER_BITS) | counter
    global_id_int64 = global_id % (MAX_INT64 - MIN_GLOBAL_ID) + MIN_GLOBAL_ID
    return global_id_int64


def get_local_request_id(last_id: int) -> int:
    """ increment the last_id by 1 and mod by MIN_GLOBAL_ID """
    return (last_id + 1) & (MIN_GLOBAL_ID - 1)