import React, { useState, useEffect } from 'react'; import { Settings2, Wrench, ArrowRight, Pencil, Trash2, Database } from 'lucide-react'; import type { Dataset, Column, ColumnType, PreprocessingConstants, PreprocessingSteps, PreprocessingStep } from '../../types/dataset'; import { Badge } from "@/components/ui/badge"; interface PreprocessingConfigProps { column: Column; dataset: Dataset; setColumnType: (columnName: string, columnType: string) => void; setDataset: (dataset: Dataset) => void; constants: PreprocessingConstants; onUpdate: ( training: PreprocessingStep, inference: PreprocessingStep | undefined, useDistinctInference: boolean ) => void; } const isNumericType = (type: ColumnType): boolean => type === 'float' || type === 'integer'; const createPreprocessingStep = (steps?: PreprocessingStep): PreprocessingStep => ({ method: steps?.method || 'none', params: { constant: steps?.params?.constant, categorical_min: steps?.params?.categorical_min ?? 100, one_hot: steps?.params?.one_hot ?? true, ordinal_encoding: steps?.params?.ordinal_encoding ?? false, clip: steps?.params?.clip } }); export function PreprocessingConfig({ column, dataset, setColumnType, setDataset, constants, onUpdate }: PreprocessingConfigProps) { const [useDistinctInference, setUseDistinctInference] = useState( Boolean(column.preprocessing_steps?.inference?.method && column.preprocessing_steps.inference.method !== 'none') ); const selectedType = column.datatype as ColumnType; const [training, setTraining] = useState(() => createPreprocessingStep(column.preprocessing_steps?.training) ); const [inference, setInference] = useState(() => createPreprocessingStep(column.preprocessing_steps?.inference) ); // Update all states when column changes useEffect(() => { setTraining(createPreprocessingStep(column.preprocessing_steps?.training)); setInference(createPreprocessingStep(column.preprocessing_steps?.inference)); }, [column.id]); // Only re-run when column changes const handleStrategyChange = ( type: 'training' | 'inference', method: PreprocessingStep['method'] ) => { let defaultParams: PreprocessingStep['params'] = {}; if (selectedType === 'categorical') { if (method === 'categorical') { defaultParams = { ...defaultParams, categorical_min: 100, one_hot: true }; } else if (method != 'none') { defaultParams = { ...defaultParams, one_hot: true }; } } if (column.is_target) { defaultParams = { ...defaultParams, ordinal_encoding: true }; } const newStrategy: PreprocessingStep = { method, params: defaultParams }; if (type === 'training') { setTraining(newStrategy); onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference); } else { setInference(newStrategy); onUpdate(training, newStrategy, useDistinctInference); } }; // Update the categorical params section: const handleCategoricalParamChange = ( type: 'training' | 'inference', updates: Partial ) => { const strategy = type === 'training' ? training : inference; const setStrategy = type === 'training' ? setTraining : setInference; const newStrategy: PreprocessingStep = { ...strategy, params: { categorical_min: strategy.params.categorical_min, one_hot: strategy.params.one_hot, ordinal_encoding: strategy.params.ordinal_encoding, ...updates } }; setStrategy(newStrategy); if (type === 'training') { onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference); } else { onUpdate(training, newStrategy, useDistinctInference); } }; // Update the numeric clipping section: const handleClipChange = ( type: 'training' | 'inference', clipUpdates: Partial<{ min?: number; max?: number }> ) => { const strategy = type === 'training' ? training : inference; const setStrategy = type === 'training' ? setTraining : setInference; const newStrategy: PreprocessingStep = { ...strategy, params: { ...strategy.params, clip: { ...strategy.params.clip, ...clipUpdates } } }; setStrategy(newStrategy); if (type === 'training') { onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference); } else { onUpdate(training, newStrategy, useDistinctInference); } }; const handleConstantValueChange = ( type: 'training' | 'inference', value: string ) => { const strategy = type === 'training' ? training : inference; const setStrategy = type === 'training' ? setTraining : setInference; const newStrategy: PreprocessingStep = { ...strategy, params: { ...strategy.params, constant: value } }; setStrategy(newStrategy); if (type === 'training') { onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference); } else { onUpdate(training, newStrategy, useDistinctInference); } }; const renderConstantValueInput = (type: 'training' | 'inference') => { const strategy = type === 'training' ? training : inference; if (strategy.method !== 'constant') return null; return (
{isNumericType(selectedType) ? ( handleConstantValueChange(type, e.target.value)} className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500" placeholder="Enter a number..." /> ) : ( handleConstantValueChange(type, e.target.value)} className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500" placeholder="Enter a value..." /> )}
); }; const [isEditingDescription, setIsEditingDescription] = useState(false); const onToggleDropIfNull = (e: React.ChangeEvent) => { const updatedColumns = dataset.columns.map(c => ({ ...c, drop_if_null: c.name === column.name ? e.target.checked : c.drop_if_null })); setDataset({ ...dataset, columns: updatedColumns }); }; const handleDescriptionChange = (e: React.ChangeEvent) => { const updatedColumns = dataset.columns.map(c => ({ ...c, description: c.name === column.name ? e.target.value : c.description })); setDataset({ ...dataset, columns: updatedColumns }); }; const handleDescriptionSave = () => { setIsEditingDescription(false); }; const handleDescriptionKeyDown = (e: React.KeyboardEvent) => { if (e.key === 'Enter') { e.preventDefault(); setIsEditingDescription(false); } else if (e.key === 'Escape') { setIsEditingDescription(false); } }; const handleDescriptionClick = () => { setIsEditingDescription(true); }; let nullCount = (column.statistics?.processed.null_count || column.statistics?.raw.null_count) || 0; const nullPercentage = nullCount && column.statistics?.raw.num_rows ? ((nullCount / column.statistics.raw.num_rows) * 100) : 0; const nullPercentageProcessed = column.statistics?.processed?.null_count && column.statistics?.raw.num_rows ? ((column.statistics.processed.null_count / column.statistics.raw.num_rows) * 100) : 0; const totalRows = column.statistics?.raw.num_rows ?? 0; const renderStrategySpecificInfo = (type: 'training' | 'inference') => { const strategy = type === 'training' ? training : inference; let content; if (strategy.method === 'most_frequent' && column.statistics?.raw.most_frequent_value) { content = `Most Frequent Value: ${column.statistics.raw.most_frequent_value}` } else if (strategy.method === 'ffill' && column.statistics?.raw.last_value) { content = `Last Value: ${column.statistics.raw.last_value}` } else if (strategy.method === 'median' && column.statistics?.raw?.median) { content = `Median: ${column.statistics.raw.median}` } else if (strategy.method === 'mean' && column.statistics?.raw?.mean) { content = `Mean: ${column.statistics.raw.mean}` } else { return null; } return (
{content}
); }; return (
{/* Column Header Section */}

{column.name}

{isEditingDescription ? (