import { useMemo } from 'react';
import useSWR, { Key as SWRKey, SWRResponse, useSWRConfig } from 'swr';

import type { DataTableColumn, ValueRow } from '@kusto/client';
import { escapeNameIfNecessary } from '@kusto/utils';

import { useDataExplorationContext } from './context/DataExplorationContext';
import { useKustoClient } from './kustoClientWrapper';
import { getV2PrimaryResult } from './lib';
import type { QueryRunnerV1, QueryRunnerV2 } from './types';

export interface ColumnWithInsights extends DataTableColumn {
    minValue?: number | string;
    maxValue?: number | string;
    cardinality?: number;
    isHighCardinality?: boolean;
    description?: string;
}

interface TimespanLiteralAgo {
    type: 'timespanLiteralAgo';
    timespanLiteral: string;
    startTime?: undefined;
    endTime?: undefined;
}

interface CustomDateRange {
    type: 'customDateRange';
    startTime: string;
    endTime: string;
    timespanLiteral?: undefined;
}

interface IngestionTimeFilterDisabled {
    type: 'filterDisabled';
}

export type DataProfileTimeRange = TimespanLiteralAgo | CustomDateRange | IngestionTimeFilterDisabled;

const DATA_PROFILE_REFRESH_SCOPE = 'dataProfileRefreshScope';
const HOT_CACHE_FILTER = 'set query_datascope="hotcache"; ';

function timeFilter(timeRange: DataProfileTimeRange) {
    if (timeRange.type === 'filterDisabled') {
        return '';
    }
    if (timeRange.type === 'timespanLiteralAgo') {
        return `| where ingestion_time() > ago(${timeRange.timespanLiteral})`;
    }
    return ``;
}

export function useRefreshDataProfile() {
    const { mutate } = useSWRConfig();
    return () => mutate((key) => Array.isArray(key) && key.includes(DATA_PROFILE_REFRESH_SCOPE));
}

export const CARDINALITY_CHECK_SAMPLE_SIZE = 1000000;
export const CARDINALITY_CHECK_THRESHOLD = 10000;
const SMALL_TABLE_ROW_COUNT = 10 * 1000 * 1000;
const HUGE_TABLE_ROW_COUNT = 5 * 1000 * 1000 * 1000;

function useHighCardinalityColumns(
    dbCacheToken: string,
    queryText: string,
    executeV2Query: QueryRunnerV2,
    columns: DataTableColumn[] | null,
    timeRange: DataProfileTimeRange
): SWRResponse<{ ColumnName: string; count?: number }[]> {
    //bool and dynamic are filtered out
    const typesToSample = ['real', 'int', 'long', 'decimal', 'string', 'guid', 'datetime', 'timespan'];

    const { data: rowCount } = useScopedRowCount(queryText, timeRange);

    const queryPartsPerColumn = columns
        ?.filter((col) => typesToSample.includes(col.ColumnType))
        .map((col) => {
            const escapedColumnName = escapeNameIfNecessary(col.ColumnName);
            return `bag_pack('ColumnName','${col.ColumnName}','count', dcount(${escapedColumnName}, 0))`;
        })
        .join(',');

    return useSWR(
        () =>
            !!rowCount &&
            !!columns && [
                DATA_PROFILE_REFRESH_SCOPE,
                'Schema high cardinality columns',
                dbCacheToken,
                queryText,
                JSON.stringify(columns),
                timeRange,
            ],
        async () => {
            if (rowCount! <= SMALL_TABLE_ROW_COUNT) {
                return [];
            }

            if (rowCount! > HUGE_TABLE_ROW_COUNT) {
                return columns!.map((col) => ({ ColumnName: col.ColumnName }));
            }

            const executionResult = await executeV2Query(
                `${HOT_CACHE_FILTER}${queryText} ${timeFilter(
                    timeRange
                )}| take ${CARDINALITY_CHECK_SAMPLE_SIZE} | summarize ${queryPartsPerColumn} | evaluate narrow() | project Value = parse_json(Value) | where Value.["count"] > ${CARDINALITY_CHECK_THRESHOLD}`
            );

            return getV2PrimaryResult(executionResult).Rows.flat() as {
                ColumnName: string;
                count: number;
            }[];
        }
    );
}

function useColumnWithInsightsPart(
    queryText: string,
    timeRange: DataProfileTimeRange,
    queryPartsPerColumn: string,
    key: SWRKey,
    executeV2Query: QueryRunnerV2
) {
    const query = `${HOT_CACHE_FILTER}${queryText} ${timeFilter(
        timeRange
    )} | summarize ${queryPartsPerColumn} | evaluate narrow() | project Value = parse_json(Value)`;

    return useSWR(key, async () => {
        if (!queryPartsPerColumn) {
            return [];
        }
        const res = await executeV2Query(query);
        return getV2PrimaryResult(res).Rows.flat() as ColumnWithInsights[];
    });
}

function useMinMaxForColumns(
    dbCacheToken: string,
    queryText: string,
    executeV2Query: QueryRunnerV2,
    columns: DataTableColumn[] | null,
    timeRange: DataProfileTimeRange
) {
    const queryPartsPerColumn =
        columns
            ?.map((col) => {
                const escapedColumnName = escapeNameIfNecessary(col.ColumnName);
                return `bag_pack('ColumnName','${col.ColumnName}','ColumnType','${col.ColumnType}','minValue', min(${escapedColumnName}),'maxValue', max(${escapedColumnName}))`;
            })
            .join(',') ?? '';

    return useColumnWithInsightsPart(
        queryText,
        timeRange,
        queryPartsPerColumn,
        () =>
            !!columns && [
                DATA_PROFILE_REFRESH_SCOPE,
                'Schema min max columns',
                dbCacheToken,
                queryText,
                queryPartsPerColumn,
                timeRange,
            ],
        executeV2Query
    );
}

function useCardinalityForColumns(
    dbCacheToken: string,
    queryText: string,
    executeV2Query: QueryRunnerV2,
    columns: DataTableColumn[] | null,
    timeRange: DataProfileTimeRange,
    ready: boolean
) {
    const queryPartsPerColumn =
        columns
            ?.map((col) => {
                const escapedColumnName = escapeNameIfNecessary(col.ColumnName);
                return `bag_pack('ColumnName','${col.ColumnName}','cardinality', dcount(${escapedColumnName},0))`;
            })
            .join(',') ?? '';
    return useColumnWithInsightsPart(
        queryText,
        timeRange,
        queryPartsPerColumn,
        () =>
            ready &&
            !!columns && [
                DATA_PROFILE_REFRESH_SCOPE,
                'Schema cardinality columns',
                dbCacheToken,
                queryText,
                queryPartsPerColumn,
                timeRange,
            ],
        executeV2Query
    );
}

export function useSchemaWithInsights(
    queryText: string,
    columns: DataTableColumn[] | null,
    timeRange: DataProfileTimeRange
) {
    const { clusterUrl, databaseName, t } = useDataExplorationContext();
    const { executeV2Query } = useKustoClient({ clusterUrl, databaseName, t });

    const cacheToken = `${clusterUrl}/${databaseName}`;

    // High Cardinality
    const highCardinalityColumns = useHighCardinalityColumns(cacheToken, queryText, executeV2Query, columns, timeRange);

    // Cardinality
    const cardinalityForColumns = useCardinalityForColumns(
        cacheToken,
        queryText,
        executeV2Query,
        columns?.filter(
            (col) =>
                ['string', 'guid', 'long', 'int'].includes(col.ColumnType) &&
                !highCardinalityColumns.data?.find((highCardCol) => highCardCol.ColumnName === col.ColumnName)
        ) ?? null,
        timeRange,
        !highCardinalityColumns.isValidating
    );

    // Min/Max
    const minMaxForColumns = useMinMaxForColumns(
        cacheToken,
        queryText,
        executeV2Query,
        columns?.filter((col) => ['real', 'int', 'long', 'decimal', 'datetime'].includes(col.ColumnType)) || null,
        timeRange
    );

    // Result
    const data = useMemo<ColumnWithInsights[]>(
        () =>
            columns?.map((col) => {
                const highCardinalityResult = highCardinalityColumns.data?.find(
                    (highCardCol) => highCardCol.ColumnName === col.ColumnName
                );

                return {
                    cardinality: highCardinalityResult?.count,
                    isHighCardinality: highCardinalityColumns.isLoading ? undefined : Boolean(highCardinalityResult),
                    ...cardinalityForColumns.data?.find((cardCol) => cardCol.ColumnName === col.ColumnName),
                    ...minMaxForColumns.data?.find((mmCol) => mmCol.ColumnName === col.ColumnName),
                    ...col,
                };
            }) ?? [],
        [columns, cardinalityForColumns, highCardinalityColumns, minMaxForColumns]
    );

    return {
        data,
        isLoading: highCardinalityColumns.isLoading || cardinalityForColumns.isLoading || minMaxForColumns.isLoading,
        isValidating:
            highCardinalityColumns.isValidating || cardinalityForColumns.isValidating || minMaxForColumns.isValidating,
    };
}

export function useIsIngestionTimePolicyEnabled(
    dbCacheToken: string,
    tableName: string,
    executeV1Query: QueryRunnerV1,
    isAppInsightsDomain: boolean
): SWRResponse<boolean> {
    return useSWR(['ingestionTime policy', dbCacheToken, tableName], () => {
        return isAppInsightsDomain
            ? Promise.resolve(true)
            : executeV1Query(
                  `.show table ${tableName} policy ingestiontime | extend isEnabled = extract_json("$IsEnabled", Policy, typeof(string)) | project isEnabled`
              )
                  .then((res) => {
                      return !!(res.apiCallResult.Tables?.at(0)?.Rows?.at(0) as undefined | ValueRow)?.at(0);
                  })
                  .catch(() => false);
    });
}

export function useFullRowCount(queryText: string): SWRResponse<number | undefined> {
    const { clusterUrl, databaseName, t } = useDataExplorationContext();
    const { executeV2Query } = useKustoClient({ clusterUrl, databaseName, t });
    const cacheToken = `${clusterUrl}/${databaseName}`;

    return useSWR([DATA_PROFILE_REFRESH_SCOPE, 'full row count', cacheToken, queryText], () =>
        executeV2Query(`${queryText} | count`).then((res) => (getV2PrimaryResult(res).Rows.flat() as number[]).at(0))
    );
}

export function useScopedRowCount(queryText: string, timeRange: DataProfileTimeRange): SWRResponse<number | undefined> {
    const { clusterUrl, databaseName, t } = useDataExplorationContext();
    const { executeV2Query } = useKustoClient({ clusterUrl, databaseName, t });
    const dbCacheToken = `${clusterUrl}/${databaseName}`;

    return useSWR([DATA_PROFILE_REFRESH_SCOPE, 'scoped row count', dbCacheToken, queryText, timeRange], () =>
        executeV2Query(`${HOT_CACHE_FILTER}${queryText} ${timeFilter(timeRange)} | count`).then((res) =>
            (getV2PrimaryResult(res).Rows.flat() as number[]).at(0)
        )
    );
}

export function useEmptyCellCountForColumn(
    queryText: string,
    columnName: string,
    timeRange: DataProfileTimeRange
): SWRResponse<number | undefined> {
    const { clusterUrl, databaseName, t } = useDataExplorationContext();
    const { executeV2Query } = useKustoClient({ clusterUrl, databaseName, t });
    const dbCacheToken = `${clusterUrl}/${databaseName}`;
    const escapedColumnName = escapeNameIfNecessary(columnName);

    return useSWR([DATA_PROFILE_REFRESH_SCOPE, 'empty row count', dbCacheToken, queryText, columnName, timeRange], () =>
        executeV2Query(
            `${HOT_CACHE_FILTER}${queryText} ${timeFilter(timeRange)} | where isempty(${escapedColumnName}) | count`
        ).then((res) => (getV2PrimaryResult(res).Rows.flat() as number[]).at(0))
    );
}

export function useColumnDistribution(
    queryText: string,
    columnName: string,
    timeRange: DataProfileTimeRange
): SWRResponse<{ title: unknown; itemCount: number }[]> {
    const { clusterUrl, databaseName, t } = useDataExplorationContext();
    const { executeV2Query } = useKustoClient({ clusterUrl, databaseName, t });
    const dbCacheToken = `${clusterUrl}/${databaseName}`;
    const escapedColumnName = escapeNameIfNecessary(columnName);

    return useSWR(
        [DATA_PROFILE_REFRESH_SCOPE, 'schema distribution', dbCacheToken, queryText, columnName, timeRange],
        () =>
            executeV2Query(
                `${HOT_CACHE_FILTER}${queryText} ${timeFilter(
                    timeRange
                )}| summarize count() by ${escapedColumnName} | top 10 by count_ |  project title = ${escapedColumnName}, itemCount = count_ | project item = pack_all()`
            ).then((res) => getV2PrimaryResult(res).Rows.flat() as { title: unknown; itemCount: number }[])
    );
}

export function useSampleValue(
    queryText: string,
    columnName: string,
    timeRange: DataProfileTimeRange
): SWRResponse<unknown> {
    const { clusterUrl, databaseName, t } = useDataExplorationContext();
    const { executeV2Query } = useKustoClient({ clusterUrl, databaseName, t });
    const dbCacheToken = `${clusterUrl}/${databaseName}`;
    const escapedColumnName = escapeNameIfNecessary(columnName);

    return useSWR(
        [DATA_PROFILE_REFRESH_SCOPE, 'schema sample value', dbCacheToken, queryText, columnName, timeRange],
        () =>
            executeV2Query(
                `${queryText} ${timeFilter(
                    timeRange
                )}|  where isnotnull(${escapedColumnName})| sample 100 | extend randomValue = rand() | summarize arg_min(randomValue, ${escapedColumnName})| project ${escapedColumnName}`
            ).then((res) => getV2PrimaryResult(res).Rows.flat().at(0))
    );
}

export interface DateTimeColumn extends Omit<DataTableColumn, 'ColumnType'> {
    ColumnType: 'datetime';
}

export type HistogramYColumn = { ColumnType: 'ingestionTime' } | { ColumnType: 'datetime'; ColumnName: string };

/**
 * Fetches histogram data for a given query and column
 * @param queryText - The query to fetch histogram data for
 * @param timeRange - The time range to fetch histogram data for
 * @param selectedColumn - The column to fetch histogram data for
 * @param shouldWait - If true, the histogram will not be fetched
 * @param binSize - 'unit' for variable number of bins of of a reasonable unit like minute or day, 'split' for fixed number of bins over the entire time range
 * @returns SWRResponse with histogram data
 */
export function useHistogram(
    queryText: string,
    timeRange: DataProfileTimeRange,
    selectedColumn: HistogramYColumn,
    shouldWait: boolean,
    binSize: 'unit' | 'split'
): SWRResponse<{ itemCount: number; binStart: number; binEnd: number }[]> {
    const { clusterUrl, databaseName, t } = useDataExplorationContext();
    const { executeV2Query } = useKustoClient({ clusterUrl, databaseName, t });

    const histoGramKqlFn = binSize === 'split' ? createHistogramQuery : createFixedBinHistogramQuery;

    return useSWR(
        () =>
            shouldWait
                ? false
                : [
                      DATA_PROFILE_REFRESH_SCOPE,
                      'histogram',
                      binSize,
                      clusterUrl,
                      databaseName,
                      queryText,
                      timeRange,
                      selectedColumn,
                  ],
        () =>
            executeV2Query(histoGramKqlFn(queryText, timeRange, selectedColumn)).then((queryResults) =>
                (
                    getV2PrimaryResult(queryResults).Rows.flat() as Array<{
                        itemCount: number;
                        binStart: string;
                        binEnd: string;
                    }>
                ).map((row) => ({
                    itemCount: row.itemCount,
                    // TODO: JavaScript dates aren't as accurate as Kusto Dates so we're
                    // losing some accuracy here parsing like this
                    binStart: Date.parse(row.binStart),
                    binEnd: Date.parse(row.binEnd),
                }))
            )
    );
}

function createHistogramQuery(queryText: string, timeRange: DataProfileTimeRange, selectedColumn: HistogramYColumn) {
    const escapedColumnName =
        selectedColumn.ColumnType === 'ingestionTime'
            ? 'ingestion_time()'
            : escapeNameIfNecessary(selectedColumn.ColumnName);
    const countColumnName = selectedColumn.ColumnType === 'ingestionTime' ? '$IngestionTime' : escapedColumnName;

    const query = `${HOT_CACHE_FILTER}
    let ResultSet = ${queryText} ${timeFilter(timeRange)};
    let min_t = toscalar(ResultSet | summarize min(${escapedColumnName}));
    let max_t = toscalar(ResultSet | summarize max(${escapedColumnName}));
    let n_steps = 15;
    let dt = (max_t - min_t)/n_steps;
    let defaultTimespan = 1s;
    let isEmptyTimespan = dt == timespan(00:00);
    let normalizedDt = iff(isEmptyTimespan, defaultTimespan, dt);
    let normalizedMin_t = iff(isEmptyTimespan, min_t-normalizedDt*30, min_t);
    let normalizedMax_t = iff(isEmptyTimespan, max_t+normalizedDt*90, max_t);
    ResultSet
    | make-series itemCount=count() on ${escapedColumnName} from normalizedMin_t to normalizedMax_t - normalizedDt step normalizedDt
    | mv-expand itemCount, ${countColumnName}
    | project itemCount, binStart=${countColumnName}, binEnd=todatetime(${countColumnName} + normalizedDt)
    | project item=pack_all()`;

    return query;
}

function createFixedBinHistogramQuery(
    queryText: string,
    timeRange: DataProfileTimeRange,
    selectedColumn: HistogramYColumn
) {
    const escapedColumnName =
        selectedColumn.ColumnType === 'ingestionTime'
            ? 'ingestion_time()'
            : escapeNameIfNecessary(selectedColumn.ColumnName);

    const countColumnName = selectedColumn.ColumnType === 'ingestionTime' ? '$IngestionTime' : escapedColumnName;

    const query = `${HOT_CACHE_FILTER}
    let ResultSet = ${queryText} ${timeFilter(timeRange)};
    let min_t = toscalar(ResultSet | summarize min(${escapedColumnName}));
    let max_t = toscalar(ResultSet | summarize max(${escapedColumnName}));
    let Range = max_t - min_t ;
    let dt = case(Range>= 365d,7d,Range>= 7d,1d,Range >= 1d,1h,Range >=1h,10m,Range > 1m,10s,1s);
    let isEmptyTimespan = max_t - min_t == timespan(00:00);
    let normalizedMin_t = iff(isEmptyTimespan, min_t-dt*30, min_t);
    let normalizedMax_t = iff(isEmptyTimespan, max_t+dt*90, max_t);
    let floor_min_t = bin(normalizedMin_t, dt);
    ResultSet
    | make-series itemCount=count() on ${escapedColumnName} from floor_min_t to normalizedMax_t - dt step dt
    | mv-expand itemCount, ${countColumnName}
    | project itemCount, binStart=${countColumnName}, binEnd=todatetime(${countColumnName} + dt)
    | project item=pack_all()`;

    return query;
}
