import os as PI_OS import polars as PI_POLARS # Import def FC_IMPORT_TEXT( ZVFCI_ST_SOURCE_FILE, ZVFCI_ST_FOLDER, ZVFCI_ST_DELIMITER, ZVFCI_ST_PREFIX, ZVFCI_LI_NUM_COLS ): ZV_ST_FILE_PATH = PI_OS.path.join(ZVFCI_ST_FOLDER, ZVFCI_ST_SOURCE_FILE) # Try encodings in order (utf8 first, then common Windows/SAP exports, then latin1 as last resort) ZV_LI_ENCODINGS = ["utf8", "utf-16", "cp1252", "windows-1252", "latin1"] ZV_DF_HEADER = None ZV_ST_ENCODING_USED = None ZV_LAST_ERR = None for ZV_ST_ENC in ZV_LI_ENCODINGS: try: ZV_DF_HEADER = PI_POLARS.read_csv( ZV_ST_FILE_PATH, separator=ZVFCI_ST_DELIMITER, encoding=ZV_ST_ENC, has_header=True, n_rows=1, infer_schema_length=0, quote_char=None ) ZV_ST_ENCODING_USED = ZV_ST_ENC break except Exception as ZV_E: ZV_LAST_ERR = ZV_E if ZV_DF_HEADER is None: raise RuntimeError(f"Could not read file with any encoding. Last error: {ZV_LAST_ERR}") ZV_LI_COLS = ZV_DF_HEADER.columns ZV_DI_SCHEMA_OVERRIDES = {ZV_COL: PI_POLARS.Utf8 for ZV_COL in ZV_LI_COLS} # Read full file with the detected encoding (as Utf8 for safe cleaning) ZV_DF = ( PI_POLARS.read_csv( ZV_ST_FILE_PATH, separator=ZVFCI_ST_DELIMITER, encoding=ZV_ST_ENCODING_USED, ignore_errors=True, has_header=True, schema_overrides=ZV_DI_SCHEMA_OVERRIDES, infer_schema_length=0, null_values=[''], quote_char=None ) .fill_null('') ) # Prefix all columns in one go (instead of renaming inside a loop) ZV_DF = ZV_DF.rename({ZV_COL: f"{ZVFCI_ST_PREFIX}{ZV_COL}" for ZV_COL in ZV_DF.columns}) # Clean + cast numeric columns (handles quotes, thousands separators, spaces, trailing minus, blanks) # Note: ZVFCI_LI_NUM_COLS should contain the *prefixed* names (e.g. "REGUH_RBETR"), # because we already prefixed columns above. for ZV_COL in ZVFCI_LI_NUM_COLS: if ZV_COL in ZV_DF.columns: ZV_DF = ( ZV_DF .with_columns( ( PI_POLARS.when((PI_POLARS.col(ZV_COL) == '') | (PI_POLARS.col(ZV_COL).is_null())) .then(0.0) .when(PI_POLARS.col(ZV_COL).str.ends_with('-')) .then( PI_POLARS.col(ZV_COL) .cast(PI_POLARS.Utf8) .str.replace_all('"', '') # remove quotes .str.replace_all(',', '') # remove thousands separators .str.replace_all(' ', '') # remove spaces .str.replace(r'-$', '') # remove trailing minus .cast(PI_POLARS.Float64, strict=False) * -1 ) .otherwise( PI_POLARS.col(ZV_COL) .cast(PI_POLARS.Utf8) .str.replace_all('"', '') .str.replace_all(',', '') .str.replace_all(' ', '') .cast(PI_POLARS.Float64, strict=False) ) .fill_null(0.0) ) .alias(ZV_COL) ) ) return ZV_DF