Scripts Home

Reference Constraint Graph for Oracle, Informix

When reference constraints are in place it is handy to have a loading order of tables, especially when you lack the permissions to drop and apply the referential constraints or cannot remove the constraints because of others using the data. These scripts give a sorted order of the tables so you can load data into the tables without hitting the constraints.

It was written first for Informix, but there is Oracle SQL included, it can be easily adapted to any RDBMS that can get the referential dependency pairs from the database.

#!/bin/sh
# Find an order of loading tables with referential dependencies
# Uses dbaccess, awk, sh.
# Note: name of awk may have to be adjusted for your system (gawk,nawk).
# Algorithm of depth first sort of directed graph
# from "The AWK Programming Language" by Aho, Weinberger, Kernighan
# can be referenced at: http://cm.bell-labs.com/cm/cs/who/bwk/awkcode.txt
#
# Can be used for Oracle, check notes in procedure ora_ref_sql below.
#

# Configure awk, the name of awk may be different (nawk, gawk).
AWK=gawk; export AWK

# Use like: usage $0 .  This puts the name of the script in the
# usage message.
usage () {
    echo
    echo 'Usage: '$1 '[order|child|parent] database [table]'
    echo 'Example: '$1 'order project2'
    echo 'Example: '$1 'child project2 organization'
    echo 'Example: '$1 'parent project2 organization'
    echo '"order" lists a loading order of tables to comply with constraints.'
    echo '"child" lists children of a table in a loadable order.'
    echo '"parent" lists parents of a table in a loadable order.'
    echo
    exit 1
}

# Query for finding referential dependency pairs.
# Usage: $0 database_name
ref_sql () {
DB=$1

# remove blanks, duplicates
$INFORMIXDIR/bin/dbaccess << EOF | sed '/^$/d'
    database $DB;
    output to pipe "sort -u" without headings
    select a.tabname, b.tabname
    from "informix".sysreferences r, "informix".systables a,
    "informix".systables b, "informix".sysconstraints c
    where r.ptabid = a.tabid
    and c.constrtype in ("P","R")
    and c.tabid = b.tabid
    and c.constrid = r.constrid;
EOF
}

# Oracle referential dependency pairs.
# NOTE: change calls from ref_sql to ora_ref_sql to use Oracle 
#       instead of Informix.
#
# NOTE: bug is that any tablename with $ in it blows chunks because
#       it is being expanded by the shell.
ora_ref_sql () {
$ORACLE_HOME/bin/sqlplus -s << EOF | sed '/^$/d'
system/manager
  set heading off;
  set feedback off;
  set echo off;
  select p.table_name parent, f.table_name child
  from dba_constraints p, dba_constraints f
  where f.r_constraint_name = p.constraint_name
  and f.constraint_type = 'R';
EOF
}

# Depth first sort of directed graph.
# input: pairs of names representing dependency:  name  name
# use in pipeline: ref_sql| ref_parent NAME| rtsort| reverse
# output in reversed order.
rtsort () {
$AWK '
{ if (!($1 in pcnt))
     pcnt[$1] = 0
  pcnt[$2]++
  slist[$1, ++scnt[$1]] = $2
}

END { for (node in pcnt) {
          nodecnt++
          if (pcnt[node] == 0)
              rtsort(node)
      }
      if (pncnt != nodecnt) {
          print "error: input contains a cycle"
          print "node: ",node
      }
      printf("\n")
    }

function rtsort(node, i, s) {
    visited[node] = 1
    for ( i = 1; i <= scnt[node]; i++)
        if (visited[s = slist[node, i]] == 0)
            rtsort(s)
        else if (visited[s] == 1)
            printf("error: nodes %s and %s are in a cycle\n",s,node)
    visited[node] = 2
    printf("%s\n", node)
    pncnt++
}'
}

# Reverse order of lines input.
reverse () {
    $AWK '{ x[NR] = $0}
          END { for (i = NR; i > 0; i--) print x[i] }'
}

# input list of referential dependency pairs
# usage: ref_parent 
# output: tree of parent dependency pairs on name
# use in pipeline: ref_sql | ref_parent NAME | rtsort | reverse
# Note: recursive function tree_parent, will explode
# harmlessly with circular graph.
# use rtsort to depth first search the dependency tree, sort
# and find cycles.
# Usage: ref_parent '
ref_parent () {
    # In BEGIN is trick to set variables.
    $AWK 'BEGIN { start_node=ARGV[2]; ARGC = ARGC-1;}
    { nlist[++cnt] = $0; }
    END { tree_parent(start_node) }

    function tree_parent(newnode) {
        for ( node in nlist) {
              split(nlist[node],refs);
              if (newnode == refs[2]){
                  print nlist[node];
                  tree_parent(refs[1]);
              }
        }
    }' - $1
}

# input list of referential dependency pairs
# usage: ref_child 
# output: tree of child dependency on name
# use in pipeline:   ref_sql | ref_child NAME | rtsort | reverse
# Note: recursive function tree_child, will explode
# harmlessly with circular graph.
# use rtsort to depth first search the dependency tree, sort
# and find cycles.
# Usage: ref_child '
ref_child () {
    test $# -ne 1 && usage $0
    # In BEGIN is trick to set variables.
    $AWK 'BEGIN { start_node=ARGV[2]; ARGC = ARGC-1;}
      {nlist[++cnt] = $0;}
    END { tree_child(start_node) }

    function tree_child(newnode) {
        for ( node in nlist) {
              split(nlist[node],refs);
              if (newnode == refs[1]){
                  print nlist[node];
                  tree_child(refs[2]);
              }
        }
    }' - $1
}

order () {
    ref_sql $1 | rtsort | reverse
}

child () {
    ref_sql $1 | ref_child $2 | rtsort | reverse
}

parent () {
    ref_sql $1 | ref_parent $2 | rtsort | reverse
}

##############################################
# Main

    case $# in
        2|3 ) $1 $2 $3 ;;
         * ) usage $0;;
    esac

################ cut here #####################################