# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import os
from pathlib import Path
import re
import math
import sqlite3
import json
import traceback # Importējām traceback

# --- 1. KONFIGURĀCIJA UN CEĻI ---
SCRIPT_DIR = Path(__file__).parent.resolve()
CSV_DIR = SCRIPT_DIR / 'csv'
OUTPUT_DIR = SCRIPT_DIR / 'out' / 'pensionars'

SECTOR_MULTIPLIERS = {
    '01': 4.0, '02': 4.0, '03': 3.5, '05': 3.5, '06': 3.5, '07': 3.5, '08': 3.5, '09': 4.0,
    '10': 3.5, '11': 3.5, '12': 3.0, '13': 3.0, '14': 3.0, '15': 3.0, '16': 3.5, '17': 3.5, '18': 3.0,
    '19': 4.0, '20': 4.5, '21': 6.0, '22': 3.5, '23': 3.5, '24': 3.5, '25': 3.5, '26': 5.5, '27': 4.5,
    '28': 4.0, '29': 3.5, '30': 3.5, '31': 3.5, '32': 3.5, '33': 3.5, '35': 5.0, '36': 4.5, '37': 4.5,
    '38': 4.5, '39': 4.5, '41': 2.5, '42': 2.5, '43': 2.5, '45': 3.0, '46': 3.5, '47': 3.0, '49': 3.5,
    '50': 3.5, '51': 3.5, '52': 4.0, '53': 3.5, '55': 2.5, '56': 2.5, '58': 5.0, '59': 4.5, '60': 4.5,
    '61': 5.5, '62': 6.5, '63': 6.0, '64': 4.0, '65': 4.0, '66': 4.0, '68': 5.0, '69': 4.0, '70': 4.0,
    '71': 4.0, '72': 5.0, '73': 3.5, '74': 3.5, '75': 4.5, '77': 4.0, '78': 3.5, '79': 3.0, '80': 3.0,
    '81': 3.0, '82': 3.0, '84': 3.0, '85': 3.5, '86': 5.0, '87': 4.0, '88': 3.5, '90': 3.0, '91': 3.0,
    '92': 7.0, '93': 3.5, '94': 3.0, '95': 3.0, '96': 2.5
}

# --- 2. PALĪGFUNKCIJAS (GLOBAL SCOPE) ---

def get_valuation_multiplier(nace_code):
    if not nace_code: return 3.0
    code_str = str(nace_code).strip().replace('.', '')
    if len(code_str) >= 2:
        prefix = code_str[:2]
        return SECTOR_MULTIPLIERS.get(prefix, 3.0)
    return 3.0

def _get_health_symbol(key, val):
    if val is None or not math.isfinite(val): return 'n'
    t = {'current_ratio': (1.0,1.5,3.0),'quick_ratio':(0.8,1.0,2.0),'net_profit_margin': (0,10),
        'roa': (2,5),'roe': (5,15),'debt_to_equity': (2.0,1.5,0),'asset_turnover': (0.5,1.0),
        'roce': (5,15),'interest_coverage': (1.5,3.0),'altman_z_score': (1.23,2.90)}.get(key)
    if not t: return 'n'
    if key == 'debt_to_equity': return 'g' if val < t[1] else 'o' if val <= t[0] else 'r'
    if len(t) == 3: return 'r' if val < t[0] else 'o' if val < t[1] else 'g' if val <= t[2] else 'b'
    return 'r' if val < t[0] else 'o' if val < t[1] else 'g'

def calculate_financial_ratios(row):
    ratios = {}
    def get_val(key): return pd.to_numeric(row.get(key), errors='coerce') or 0
    i = {k: get_val(k) for k in ['net_income', 'net_turnover', 'interest_expenses', 'income_before_income_taxes']}
    b = {k: get_val(k) for k in ['total_current_assets', 'current_liabilities', 'inventories', 'total_assets', 'equity', 'non_current_liabilities']}
    def safe_divide(n, d): return (n / d) if d and d != 0 and math.isfinite(d) else None
    ratios['current_ratio'] = safe_divide(b.get('total_current_assets'), b.get('current_liabilities'))
    ratios['quick_ratio'] = safe_divide(b.get('total_current_assets') - b.get('inventories'), b.get('current_liabilities'))
    ratios['net_profit_margin'] = safe_divide(i.get('net_income') * 100, i.get('net_turnover'))
    ratios['roa'] = safe_divide(i.get('net_income') * 100, b.get('total_assets'))
    ratios['roe'] = safe_divide(i.get('net_income') * 100, b.get('equity'))
    total_liabilities = b.get('current_liabilities') + b.get('non_current_liabilities')
    ratios['debt_to_equity'] = safe_divide(total_liabilities, b.get('equity'))
    ratios['asset_turnover'] = safe_divide(i.get('net_turnover'), b.get('total_assets'))
    ebit = i.get('income_before_income_taxes') + i.get('interest_expenses')
    iegulditais_kapitals = b.get('total_assets') - b.get('current_liabilities')
    ratios['roce'] = safe_divide(ebit * 100, iegulditais_kapitals)
    ratios['interest_coverage'] = safe_divide(ebit, i.get('interest_expenses'))
    try:
        A=safe_divide(b.get('total_current_assets')-b.get('current_liabilities'), b.get('total_assets'))
        B=safe_divide(b.get('equity'), b.get('total_assets')); C=safe_divide(ebit, b.get('total_assets'))
        D=safe_divide(b.get('equity'), total_liabilities); E=safe_divide(i.get('net_turnover'), b.get('total_assets'))
        ratios['altman_z_score'] = 0.717*A + 0.847*B + 3.107*C + 0.420*D + 0.998*E if all(v is not None for v in [A,B,C,D,E]) else None
    except: ratios['altman_z_score'] = None
    return ratios

def process_row_sequential(row, hist_turnover_map, hist_profit_map, hist_employees_map):
    try:
        reg = row['regcode']
        ratios = calculate_financial_ratios(row)
        health_string = "".join([_get_health_symbol(k, ratios.get(k)) for k in ['current_ratio','quick_ratio','net_profit_margin','roa','roe','debt_to_equity','asset_turnover','roce','interest_coverage','altman_z_score']])
        multiplier = get_valuation_multiplier(row.get('nace_code'))
        
        return {
            'regcode': reg, 'name': row['name'], 'location': row.get('location', '-'),
            'turnover': row.get('net_turnover', 0), 'employees': row.get('employees', 0),
            'profit': row.get('net_income', 0), 'founded_year': row['founded_year'],
            'owner_name': row['owner_name'], 'owner_pk': row['owner_pk'],
            'nace_description': row.get('nace_description', 'Nav norādīts'),
            'financial_health_string': health_string,
            'balance_year': row.get('year'), 
            'total_assets': row.get('total_assets', 0),
            'total_non_current_assets': row.get('total_non_current_assets', 0),
            'total_current_assets': row.get('total_current_assets', 0),
            'equity': row.get('equity', 0),
            'provisions': row.get('provisions', 0),
            'non_current_liabilities': row.get('non_current_liabilities', 0),
            'current_liabilities': row.get('current_liabilities', 0),
            'valuation_multiplier': multiplier,
            'history_turnover': hist_turnover_map.get(reg, '[]'),
            'history_profit': hist_profit_map.get(reg, '[]'),
            'history_employees': hist_employees_map.get(reg, '[]'),
            'tax_rating': row.get('tax_rating', '')
        }
    except Exception as e:
        print(f"Kļūda apstrādājot rindu {row.get('regcode', 'Unknown')}: {e}")
        traceback.print_exc() #Izvadam detalizētu kļūdu
        return None

def format_company_name(row):
    t = str(row.get('type', '')).strip()
    n = str(row.get('name_in_quotes', '')).strip()
    if t and n: return f"{t} \"{n}\""
    elif n: return f'\"{n}\"' # Corrected: escaped quotes for string literal
    return str(row.get('name', '')).strip()

def load_data():
    print("Sāk ielādēt CSV datus...")
    dataframes = {}
    required_files = {
        'register': 'register.csv', 'beneficial_owners': 'beneficial_owners.csv',
        'officers': 'officers.csv', 'tax_data': 'pdb_nm_komersantu_samaksato_nodoklu_kopsumas_odata.csv',
        'financial_statements': 'financial_statements.csv', 'income_statements': 'income_statements.csv',
        'balance_sheets': 'balance_sheets.csv', 'stockholders': 'stockholders.csv',
        'members': 'members.csv', 'nace': 'NACE.csv',
        'rating': 'reitings_uznemumi.csv'
    }
    for key, filename in required_files.items():
        path = CSV_DIR / filename
        if not path.exists():
            print(f"Kļūda: Fails '{filename}' nav atrasts. Tiks izlaists.")
            dataframes[key] = None
            continue
        try:
            delimiter = ',' if key in ['tax_data', 'nace', 'rating'] else ';'
            df = pd.read_csv(path, delimiter=delimiter, dtype=str, on_bad_lines='skip')
            dataframes[key] = df
            print(f"Veiksmīgi ielādēts: {filename}")
        except Exception as e:
            print(f"Kļūda, ielādējot '{filename}': {e}. Tiks izlaists.")
            dataframes[key] = None
    return dataframes

def create_and_save_db():
    OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
    db_file = OUTPUT_DIR / "pensionari.sqlite"
    if db_file.exists(): db_file.unlink()
    
    conn = None # Savienojums tiek inicializēts te un aizvērts finally blokā
    try:
        conn = sqlite3.connect(db_file)
        cursor = conn.cursor()
        cursor.execute('''
            CREATE TABLE companies (
                regcode TEXT PRIMARY KEY, name TEXT, location TEXT, turnover REAL,
                employees INTEGER, profit REAL, founded_year INTEGER,
                owner_name TEXT, owner_pk TEXT, nace_description TEXT,
                financial_health_string TEXT,
                balance_year INTEGER,
                total_assets REAL,
                total_non_current_assets REAL,
                total_current_assets REAL,
                equity REAL,
                provisions REAL,
                non_current_liabilities REAL,
                current_liabilities REAL,
                valuation_multiplier REAL,
                history_turnover TEXT,
                history_profit TEXT,
                history_employees TEXT,
                tax_rating TEXT
            )
        ''')
        conn.commit()
        
        data = load_data()
        if not data or data.get('register') is None or data.get('beneficial_owners') is None:
            print("Kļūda: Nevarēja ielādēt nepieciešamos failus.")
            return

        print("Sāk datu sagatavošanu un filtrēšanu...")
        
        register_df = data['register'].copy()
        register_df['name'] = register_df.apply(format_company_name, axis=1)
        
        bo_df = data['beneficial_owners'].copy().rename(columns={'legal_entity_registration_number': 'regcode', 'latvian_identity_number_masked': 'owner_pk', 'forename': 'name_owner', 'surname':'surname_owner'})
        officers_df = data.get('officers').copy().rename(columns={'at_legal_entity_registration_number': 'regcode', 'latvian_identity_number_masked': 'owner_pk'})
        stockholders_df = data.get('stockholders').copy().rename(columns={'at_legal_entity_registration_number': 'regcode'})
        members_df = data.get('members').copy().rename(columns={'at_legal_entity_registration_number': 'regcode'})
        tax_df = data.get('tax_data').copy().rename(columns={'Registracijas_kods': 'regcode', 'Pamatdarbibas_NACE_kods': 'nace_code'})
        
        nace_df = data['nace'].copy().rename(columns={'Kods': 'nace_code', 'Nosaukums': 'nace_description'})
        nace_df['nace_code'] = nace_df['nace_code'].str.replace('.', '', regex=False)
        
        if not stockholders_df.empty:
            stockholder_regcodes = set(stockholders_df['regcode'].dropna().unique())
            register_df = register_df[~register_df['regcode'].isin(stockholder_regcodes)]
            print(f"Pēc akcionāru filtra paliek {len(register_df)} uzņēmumi.")

        if not members_df.empty:
            member_counts = members_df.groupby('regcode').size()
            multi_member_regcodes = set(member_counts[member_counts > 1].index)
            register_df = register_df[~register_df['regcode'].isin(multi_member_regcodes)]
            print(f"Pēc dalībnieku skaita filtra paliek {len(register_df)} uzņēmumi.")

        active_mask = (~register_df['closed'].isin(['L', 'R'])) & (register_df['terminated'].isna() | register_df['terminated'].isin(['', '0000-00-00']))
        final_df = register_df[active_mask].copy()
        
        bo_counts = bo_df.groupby('regcode').size()
        single_bo_regcodes = set(bo_counts[bo_counts == 1].index)
        
        def is_valid_pk_prefix(pk):
            if not isinstance(pk, str) or '-' not in pk: return False
            pk_prefix = pk.split('-')[0]
            return len(pk_prefix) == 6 and (1 <= int(pk_prefix[0:2]) <= 31) and (30 <= int(pk_prefix[4:6]) <= 65)

        bo_filtered = bo_df[bo_df['owner_pk'].apply(is_valid_pk_prefix)].copy()
        
        bo_filtered['key'] = bo_filtered['regcode'] + '_' + bo_filtered['owner_pk']
        officers_df['key'] = officers_df['regcode'] + '_' + officers_df['owner_pk']
        final_bo_regcodes = set(bo_filtered[bo_filtered['key'].isin(officers_df['key'])]['regcode'])
            
        final_df = final_df[final_df['regcode'].isin(single_bo_regcodes & final_bo_regcodes)]
        print(f"Pēc PLG filtra paliek {len(final_df)} uzņēmumi.")
        
        final_df['founded_year'] = pd.to_datetime(final_df['registered'], errors='coerce').dt.year
        final_df.dropna(subset=['founded_year'], inplace=True)
        final_df['founded_year'] = final_df['founded_year'].astype(int)
        final_df = final_df[final_df['founded_year'] < 2016]
        print(f"Pēc dibināšanas gada filtra paliek {len(final_df)} uzņēmumi.")
        
        if final_df.empty:
            print("Netika atrasts neviens uzņēmums.")
            return

        print("Sāk datu apvienošanu...")
        final_df = pd.merge(final_df, bo_filtered[['regcode', 'owner_pk', 'name_owner', 'surname_owner']], on='regcode', how='left')
        final_df['owner_name'] = final_df['name_owner'].str.strip() + ' ' + final_df['surname_owner'].str.strip()
        
        fs_df = data['financial_statements'].copy().rename(columns={'legal_entity_registration_number': 'regcode', 'id': 'statement_id'})
        
        # --- FILTRS: Atstājam tikai individuālos pārskatus ---
        if 'source_type' in fs_df.columns:
            before_filter = len(fs_df)
            fs_df = fs_df[fs_df['source_type'] != 'UKGP']
            print(f"  Izfiltrēti konsolidētie pārskati: {before_filter - len(fs_df)} ieraksti noņemti.")
        # -----------------------------------------------------

        income_df = data['income_statements'].copy().rename(columns={'id': 'statement_id'})
        balance_df = data['balance_sheets'].copy().rename(columns={'id': 'statement_id'})
        
        fs_df = fs_df.merge(income_df, on='statement_id', how='left').merge(balance_df, on='statement_id', how='left')
        fs_df['year'] = pd.to_numeric(fs_df['year'], errors='coerce')
        
        fin_cols = [
            'net_turnover', 'net_income', 'income_before_income_taxes', 'interest_expenses',
            'total_assets', 'total_non_current_assets', 'total_current_assets', 'inventories',
            'equity', 'provisions', 'non_current_liabilities', 'current_liabilities', 'employees'
        ]
        for col in fin_cols:
            if col not in fs_df.columns: fs_df[col] = 0
            else: fs_df[col] = pd.to_numeric(fs_df[col], errors='coerce').fillna(0)
                
        print("  Apstrādā vēsturiskos datus...")
        fs_history = fs_df.sort_values(['regcode', 'year'], ascending=[True, False])
        
        def get_history_json(df, col):
            return json.dumps([{'y': int(r.year), 'v': float(r[col])} for _, r in df[df[col] != 0].head(5).iterrows()])

        history_turnover_map = fs_history.groupby('regcode')[['year', 'net_turnover']].apply(
            lambda x: get_history_json(x, 'net_turnover'), include_groups=False
        ).to_dict()
        
        history_profit_map = fs_history.groupby('regcode')[['year', 'net_income']].apply(
            lambda x: get_history_json(x, 'net_income'), include_groups=False
        ).to_dict()
        
        history_employees_map = fs_history.groupby('regcode')[['year', 'employees']].apply(
            lambda x: json.dumps([{'y': int(r.year), 'v': int(r.employees)} for _, r in x[x['employees'] != 0].head(5).iterrows()]), include_groups=False
        ).to_dict()

        latest_fs = fs_df.sort_values('year', ascending=False).drop_duplicates('regcode')
        
        tax_df['Taksacijas_gads'] = pd.to_numeric(tax_df['Taksacijas_gads'], errors='coerce')
        tax_df['nace_code'] = tax_df['nace_code'].str.strip().replace('?', np.nan)
        tax_df.sort_values(by=['regcode', 'Taksacijas_gads'], ascending=[True, True], inplace=True)
        tax_df['nace_code'] = tax_df.groupby('regcode')['nace_code'].ffill()
        tax_df_latest = tax_df.sort_values('Taksacijas_gads', ascending=False).drop_duplicates('regcode')
        tax_df_latest = pd.merge(tax_df_latest, nace_df, on='nace_code', how='left')

        result_df = pd.merge(final_df, latest_fs, on='regcode', how='left')
        result_df = pd.merge(result_df, tax_df_latest, on='regcode', how='left')
        result_df['location'] = result_df['address'].fillna('').str.split(',', n=1, expand=False).str[0].str.strip()

        rating_df = data.get('rating')
        if rating_df is not None:
            rating_df = rating_df.copy().rename(columns={'Registracijas_kods': 'regcode', 'Reitings': 'tax_rating'})
            result_df = pd.merge(result_df, rating_df[['regcode', 'tax_rating']], on='regcode', how='left', suffixes=('_fin', '_tax'))

        result_df['employees'] = pd.to_numeric(result_df['employees'], errors='coerce').fillna(0)
        result_df['net_turnover'] = pd.to_numeric(result_df['net_turnover'], errors='coerce').fillna(0)
        result_df['net_income'] = pd.to_numeric(result_df['net_income'], errors='coerce').fillna(0)

        before_data_filter = len(result_df)
        result_df = result_df[(result_df['net_turnover'] != 0) & (result_df['employees'] != 0) & (result_df['net_income'] != 0)]
        print(f"Pēc datu filtra paliek {len(result_df)} uzņēmumi (noņemti {before_data_filter - len(result_df)}).")

        print("  Sāk aprēķinu (sekvenciāli, lai atrastu kļūdas)...")
        final_data_for_db = []
        
        # Izmantojam sekvenciālu apstrādi, lai redzētu kļūdas
        records = result_df.to_dict('records')
        for rec in records:
            res = process_row_sequential(rec, history_turnover_map, history_profit_map, history_employees_map)
            if res:
                final_data_for_db.append(res)

        print(f"  Pēc apstrādes iegūti {len(final_data_for_db)} ieraksti.")
        db_df = pd.DataFrame(final_data_for_db)
        print(f"  Pirms saglabāšanas datubāzē: {len(db_df)} ieraksti.")
        
        if not db_df.empty:
            db_df.to_sql('companies', conn, if_exists='replace', index=False)
            print(f"Veiksmīgi saglabāti {len(db_df)} ieraksti datubāzē.")
        else:
            print("KĻŪDA: Rezultātu DataFrame ir tukšs!")

    except Exception as e:
        print(f"KRITISKA KĻŪDA APSTRĀDES LAIKĀ: {e}")
        traceback.print_exc()
    finally:
        if conn: conn.close()

if __name__ == "__main__":
    create_and_save_db()
