From d56312a811be35d169417eb0c095719b2c763d9d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 4 Feb 2025 23:19:27 +0000 Subject: [PATCH] extract indexes and procedures --- .../full_schemas/extract_indexes.sh | 171 ++++++++++++++++++ .../full_schemas/extract_procedures.sh | 164 +++++++++++++++++ 2 files changed, 335 insertions(+) create mode 100755 ef-migration/infrastructure/sql-server/full_schemas/extract_indexes.sh create mode 100755 ef-migration/infrastructure/sql-server/full_schemas/extract_procedures.sh diff --git a/ef-migration/infrastructure/sql-server/full_schemas/extract_indexes.sh b/ef-migration/infrastructure/sql-server/full_schemas/extract_indexes.sh new file mode 100755 index 0000000..89037dc --- /dev/null +++ b/ef-migration/infrastructure/sql-server/full_schemas/extract_indexes.sh @@ -0,0 +1,171 @@ +#!/bin/bash + +# Check if input file is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +SQL_FILE="$1" +BASE_DIR="sql_objects" + +# Create directory structure +mkdir -p "$BASE_DIR/indexes/clustered" +mkdir -p "$BASE_DIR/indexes/nonclustered" +mkdir -p "$BASE_DIR/indexes/unique_clustered" + +# Function to clean filename +clean_filename() { + # Remove brackets, convert dots and spaces to underscores, remove parentheses + echo "$1" | sed 's/\[\|\]//g' | tr '.' '_' | tr ' ' '_' | sed 's/[()]//g' | sed 's/__*/_/g' | sed 's/_$//' +} + +# Function to determine if line is start of an index creation statement +is_index_start() { + # Matches with UNIQUE, CLUSTERED/NONCLUSTERED keywords + echo "$1" | grep -qiE "CREATE[[:space:]]*(UNIQUE[[:space:]]+)?(CLUSTERED|NONCLUSTERED)?[[:space:]]*INDEX[[:space:]]+\[?[^]]+\]?[[:space:]]+ON" +} + +# Function to extract table name +extract_table_name() { + # More aggressive extraction of table name + echo "$1" | sed -nE 's/.*CREATE[[:space:]]*(UNIQUE[[:space:]]+)?(CLUSTERED|NONCLUSTERED)?[[:space:]]*INDEX[[:space:]]+\[?[^]]+\]?[[:space:]]+ON[[:space:]]+(\[[^]]+\](\.[^]]+)?).*/\3/p' | tr -d '[]' +} + +# Function to extract index name +extract_index_name() { + # Extract index name, handling various formats + echo "$1" | sed -nE 's/.*CREATE[[:space:]]*(UNIQUE[[:space:]]+)?(CLUSTERED|NONCLUSTERED)?[[:space:]]*INDEX[[:space:]]+\[?([^]]+)\]?.*/\3/p' | tr -d '[]' +} + +# Function to determine index type +get_index_type() { + local line="$1" + local is_unique=$(echo "$line" | grep -qiE "CREATE[[:space:]]+UNIQUE" && echo true || echo false) + local is_clustered=$(echo "$line" | grep -qiE "CREATE[[:space:]]+CLUSTERED" && echo true || echo false) + local is_nonclustered=$(echo "$line" | grep -qiE "CREATE[[:space:]]+NONCLUSTERED" && echo true || echo false) + + if $is_unique && $is_clustered; then + echo "unique_clustered" + elif $is_clustered; then + echo "clustered" + elif $is_nonclustered; then + echo "nonclustered" + else + echo "clustered" # Default to clustered if no type specified + fi +} + +# Function to process each index statement +process_index() { + local line="$1" + local table_name=$(extract_table_name "$line") + local index_name=$(extract_index_name "$line") + local index_type=$(get_index_type "$line") + + # Skip if we couldn't extract table or index name + if [ -z "$table_name" ] || [ -z "$index_name" ]; then + echo "Warning: Could not extract table or index name from:" + echo "$line" + return + fi + + # Clean filename and save + clean_name=$(clean_filename "${table_name}_${index_name}") + output_dir="$BASE_DIR/indexes/$index_type" + output_file="${output_dir}/${clean_name}.sql" + + # Trim whitespace and ensure semicolon at end + line=$(echo "$line" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') + if [[ "$line" != *\; ]]; then + line="${line};" + fi + + echo "$line" > "$output_file" + echo "Created: $output_file" +} + +# Main processing function +extract_indexes() { + local current_statement="" + local capturing_index=false + + # Disable case sensitivity + shopt -s nocasematch + + # Read the file line by line + while IFS= read -r line || [ -n "$line" ]; do + # Skip empty lines and pure comment lines + if [ -z "${line// }" ] || [[ "$line" =~ ^[[:space:]]*-- ]]; then + continue + fi + + # Check for index start + if is_index_start "$line"; then + # Process previous statement if any + if [ -n "$current_statement" ] && $capturing_index; then + process_index "$current_statement" + fi + + # Start new index statement + current_statement="$line" + capturing_index=true + + # Check if GO is on the same line + if [[ "$line" =~ GO[[:space:]]*$ ]]; then + # Remove GO and process + current_statement=$(echo "$current_statement" | sed 's/[[:space:]]*GO[[:space:]]*$//') + process_index "$current_statement" + current_statement="" + capturing_index=false + fi + continue + fi + + # If we're capturing an index + if $capturing_index; then + # Add line to current statement + current_statement+=" $line" + + # Check for GO statement + if [[ "$line" =~ ^[[:space:]]*GO[[:space:]]*$ ]]; then + # Remove GO and process + current_statement=$(echo "$current_statement" | sed 's/[[:space:]]*GO[[:space:]]*$//; s/;[[:space:]]*$//') + process_index "$current_statement" + current_statement="" + capturing_index=false + fi + fi + done + + # Process any remaining index statement + if [ -n "$current_statement" ] && $capturing_index; then + process_index "$current_statement" + fi + + # Re-enable case sensitivity + shopt -u nocasematch +} + +# Remove comment lines and process +echo "Extracting indexes..." +sed 's/--.*$//' "$SQL_FILE" | extract_indexes + +echo "Indexes have been saved in $BASE_DIR/indexes" + +# Count and list files +echo -e "\nFile counts:" +for index_type in clustered nonclustered unique_clustered; do + count=$(ls -1 "$BASE_DIR/indexes/$index_type"/*.sql 2>/dev/null | wc -l || echo 0) + echo "$index_type indexes: $count files" + + if [ "$count" -gt 0 ]; then + echo -e "\nExtracted $index_type indexes:" + for file in "$BASE_DIR/indexes/$index_type"/*.sql; do + if [ -f "$file" ]; then + echo "- $(basename "$file")" + fi + done + echo + fi +done \ No newline at end of file diff --git a/ef-migration/infrastructure/sql-server/full_schemas/extract_procedures.sh b/ef-migration/infrastructure/sql-server/full_schemas/extract_procedures.sh new file mode 100755 index 0000000..674bea6 --- /dev/null +++ b/ef-migration/infrastructure/sql-server/full_schemas/extract_procedures.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +# Check if input file is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +SQL_FILE="$1" +BASE_DIR="sql_objects" + +# Create directory structure +mkdir -p "$BASE_DIR/procedures" + +# Function to clean filename +clean_filename() { + # Remove brackets, convert dots and spaces to underscores, remove parentheses + echo "$1" | sed 's/\[\|\]//g' | tr '.' '_' | tr ' ' '_' | sed 's/[()]//g' | sed 's/__*/_/g' | sed 's/_$//' +} + +# Function to extract procedure name +extract_procedure_name() { + # More flexible procedure name extraction + local name=$(echo "$1" | sed -nE 's/.*CREATE[[:space:]]+PROC(EDURE)?[[:space:]]+(\[[^]]+\](\.[^]]+)?|\S+).*/\2/pi' | tr -d '[]') + echo "$name" +} + +# Function to extract schema name (if present) +extract_schema_name() { + # Extract schema name with more flexible matching + local schema=$(echo "$1" | sed -nE 's/.*CREATE[[:space:]]+PROC(EDURE)?[[:space:]]+\[?([^.]+)\.([^]]+)\]?.*/\2/pi') + if [ -z "$schema" ]; then + # Try alternative extraction + schema=$(echo "$1" | sed -nE 's/.*CREATE[[:space:]]+PROC(EDURE)?[[:space:]]+\[?([^]]+)\]?.*/\2/pi' | cut -d. -f1) + fi + + if [ -z "$schema" ]; then + echo "dbo" # Default schema if not specified + else + echo "$schema" | tr -d '[]' + fi +} + +# Function to process each procedure statement +process_procedure() { + local procedure_statement="$1" + local procedure_name=$(extract_procedure_name "$procedure_statement") + local schema_name=$(extract_schema_name "$procedure_statement") + + # Skip if we couldn't extract procedure name + if [ -z "$procedure_name" ]; then + echo "Warning: Could not extract procedure name from:" + echo "$procedure_statement" + return + fi + + # Clean filename and save + clean_name=$(clean_filename "${schema_name}_${procedure_name}") + output_file="$BASE_DIR/procedures/${clean_name}.sql" + + # Trim whitespace and ensure semicolon at end if not already present + procedure_statement=$(echo "$procedure_statement" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//') + if [[ "$procedure_statement" != *\; ]]; then + procedure_statement="${procedure_statement};" + fi + + # Save the procedure + echo "$procedure_statement" > "$output_file" + echo "Created: $output_file" +} + +# Main processing function +extract_procedures() { + local current_statement="" + local capturing_procedure=false + local depth=0 + + # Disable case sensitivity + shopt -s nocasematch + + # Read the file line by line + while IFS= read -r line || [ -n "$line" ]; do + # Skip empty lines and pure comment lines + if [ -z "${line// }" ] || [[ "$line" =~ ^[[:space:]]*-- ]]; then + continue + fi + + # Check for procedure start (more flexible) + if [[ "$line" =~ CREATE[[:space:]]+PROC(EDURE)?[[:space:]]+ ]]; then + # Process previous statement if any + if [ -n "$current_statement" ] && $capturing_procedure; then + process_procedure "$current_statement" + fi + + # Start new procedure statement + current_statement="$line" + capturing_procedure=true + depth=0 + + # Check if GO is on the same line + if [[ "$line" =~ GO[[:space:]]*$ ]]; then + # Remove GO and process + current_statement=$(echo "$current_statement" | sed 's/[[:space:]]*GO[[:space:]]*$//') + process_procedure "$current_statement" + current_statement="" + capturing_procedure=false + fi + continue + fi + + # If we're capturing a procedure + if $capturing_procedure; then + # Track BEGIN/END blocks to ensure we capture the entire procedure + if [[ "$line" =~ BEGIN ]]; then + ((depth++)) + fi + + if [[ "$line" =~ END ]]; then + ((depth--)) + fi + + # Add line to current statement + current_statement+=" $line" + + # Check for GO statement or end of procedure + if [[ "$line" =~ ^[[:space:]]*GO[[:space:]]*$ ]] || + ([[ "$line" =~ END ]] && [[ $depth -le 0 ]]); then + # Remove GO and process + current_statement=$(echo "$current_statement" | sed 's/[[:space:]]*GO[[:space:]]*$//; s/;[[:space:]]*$//') + process_procedure "$current_statement" + current_statement="" + capturing_procedure=false + depth=0 + fi + fi + done + + # Process any remaining procedure statement + if [ -n "$current_statement" ] && $capturing_procedure; then + process_procedure "$current_statement" + fi + + # Re-enable case sensitivity + shopt -u nocasematch +} + +# Remove comment lines and process +echo "Extracting stored procedures..." +sed 's/--.*$//' "$SQL_FILE" | extract_procedures + +echo "Stored procedures have been saved in $BASE_DIR/procedures" + +# Count and list files +count=$(ls -1 "$BASE_DIR/procedures"/*.sql 2>/dev/null | wc -l || echo 0) +echo -e "\nTotal stored procedures extracted: $count" + +if [ "$count" -gt 0 ]; then + echo -e "\nExtracted stored procedures:" + for file in "$BASE_DIR/procedures"/*.sql; do + if [ -f "$file" ]; then + echo "- $(basename "$file")" + fi + done +fi \ No newline at end of file