sp_GraphNELGene


USE [tempdb]
GO
/*********************************************************************************************
Graph NEL R Script Generator V4.06
(C) 2018, Paul Brewer
                          
Feedback: paulbrewer@yahoo.co.uk
Updates: https://paulbrewer.wordpress.com/sp_graphnelgene/
User Guide: https://paulbrewer.wordpress.com/2018/07/21/using-sql-server-hierarchies-with-the-bioconductor-network-diagramming-r-packages/
                     
Parameters:
  @Node GraphNELNodeTableType - A set of Nodes and Node Labels
  @NodeReferedTo GraphNELNodeRefersToTableType - Defines relationships between nodes, Node Name refers to Node Name
  @AnchorNodes GraphNELAnchorNodeTableType - A set of Node Names to anchor the graphs
  @IncludeLabels BIT = 0 - By default the node name is used as the label
  @IncludeRefersTo BIT - Include parent nodes in graph
  @IncludeReferredToBy BIT - Include child nodes in the graphs
  @PDFFile - Display when null else save the graph to this PDF file name
    
CHANGE LOG:
V1.0     - 23rd July, 2018 - First draft 
V1.01    - 26th July, 2018 - Change Anchor Node parameter to Anchor Nodes
V2.00    - 27th July, 2018 - Working prototype
V3.00    - 28th July, 2018 - Remove hierarchy data type and recursive queries, replace them with a dynamic cursor
V4.00    - 31st July, 2018 - Add PDF file parameter and additional node attribute settings
V4.01    - 6th Aug, 2018   - Replace TRIM with RTRIM for backward compatibility with SQL Server versions
V4.02    - 7th Aug, 2018   - Anchor Nodes not showing when unreferenced fix
V4.03    - 8th Aug, 2018   - Set global attributes (node colors and shapes) and change diagram orientation from TB to LR
V4.04    - 8th Aug, 2018   - Hide articial edges 
V4.05    - 9th Aug, 2018   - Code Refactor
V4.06    - 11th Aug,2018   - Fix endless loop problem with recursive, circular hierarchies.
    
********************************************************************************************/ 
   
-- Drop Procedure
USE [tempdb] 
GO 
BEGIN TRY
  DROP PROCEDURE dbo.sp_GraphNELGene;
END TRY
BEGIN CATCH
  PRINT 'First time setup of procedure sp_GraphNELGene.';
END CATCH
GO 
  
-- Create Anchor Nodes      
BEGIN TRY
  DROP TYPE dbo.GraphNELAnchorNodeTableType;
END TRY
BEGIN CATCH
  PRINT'First time setup of Table Type GraphNELAnchorNodeTableType.';
END CATCH
GO
  
CREATE TYPE dbo.GraphNELAnchorNodeTableType AS TABLE
( 
  NodeName varchar(250)
);  
      
-- Create Node Table Type
BEGIN TRY
  DROP TYPE dbo.GraphNELNodeTableType;
END TRY
BEGIN CATCH
  PRINT'First time setup of Table Type GraphNELNodeTableType.';
END CATCH
GO
  
CREATE TYPE dbo.GraphNELNodeTableType AS TABLE
( 
  NodeName varchar(250),
  NodeLabel varchar(250)  
);  
    
-- Create NodeRefersTo Table Type
BEGIN TRY
  DROP TYPE  GraphNELNodeRefersToTableType;
END TRY
BEGIN CATCH
  PRINT'First time setup of Table Type GraphNELNodeRefersToTableType.';
END CATCH
GO
    
CREATE TYPE GraphNELNodeRefersToTableType AS TABLE
( 
  NodeName varchar(250),
  NodeReferedTo varchar(250)  
);  
GO 
   
-- Create Procedure
CREATE PROCEDURE [dbo].[sp_GraphNELGene]
(
  @Node GraphNELNodeTableType READONLY,
  @NodeReferedTo GraphNELNodeRefersToTableType READONLY,
  @AnchorNodes GraphNELAnchorNodeTableType READONLY,
  @IncludeLabels BIT = 0,
  @IncludeRefersTo BIT = 1,
  @IncludeReferredToBy BIT = 1,
  @PDFFile VARCHAR(1000) = ''
)
AS
    
BEGIN
    
-------------------------------------------------------
-- Create temporary tables
IF OBJECT_ID('tempdb..#FilteredNodes') IS NOT NULL
  DROP TABLE #FilteredNodes;
CREATE TABLE #FilteredNodes (NodeName varchar(250),NodeLabel varchar(250));
    
IF OBJECT_ID('tempdb..#FilteredEdges') IS NOT NULL
  DROP TABLE #FilteredEdges;
CREATE TABLE #FilteredEdges (NodeName varchar(250),NodeReferedTo varchar(250), isArtificial bit NULL DEFAULT(0));
    
IF OBJECT_ID('tempdb..#Nodes_TVP') IS NOT NULL
  DROP TABLE #Nodes_TVP;
CREATE TABLE #Nodes_TVP (NodeName varchar(250),NodeLabel varchar(250));
    
IF OBJECT_ID('tempdb..#Edges_TVP') IS NOT NULL
  DROP TABLE #Edges_TVP;
CREATE TABLE #Edges_TVP (NodeName varchar(250),NodeReferedTo varchar(250));
    
IF OBJECT_ID('tempdb..#AnchorNodes_TVP') IS NOT NULL
  DROP TABLE #AnchorNodes_TVP;
CREATE TABLE #AnchorNodes_TVP (NodeName varchar(250), HierarchyLevel INT, ProcessingComplete bit DEFAULT(0));
    
IF OBJECT_ID('tempdb..#RScript') IS NOT NULL
  DROP TABLE #RScript;
CREATE TABLE #RScript (CMDSEQ int, RScript varchar(250));
    
-------------------------------------------------------
-- Populate TVP
INSERT INTO #Nodes_TVP SELECT * FROM @Node;
INSERT INTO #Edges_TVP SELECT * FROM @NodeReferedTo;
    
CREATE INDEX IDX_Nodes_TVP1 ON #Nodes_TVP (NodeName,NodeLabel);
CREATE INDEX IDX_Edges_TVP1 ON #Edges_TVP (NodeName,NodeReferedTo);
CREATE INDEX IDX_Edges_TVP2 ON #Edges_TVP (NodeReferedTo,NodeName);
    
-------------------------------------------------------
-- Hierarchy Selection Version 3 - Remove recusive queries and hierarchy data types to avoid n complete scaling issues
INSERT INTO #AnchorNodes_TVP (NodeName, HierarchyLevel, ProcessingComplete) SELECT NodeName,0,0 FROM @AnchorNodes;
IF @@ROWCOUNT > 0
BEGIN
  DECLARE @AnchorNode varchar(250), @HierarchyLevel INT = 0;
    
  -- Referred To By
  IF @IncludeReferredToBy = 1
  BEGIN
    
    DECLARE AnchorNodes CURSOR DYNAMIC FOR
    SELECT NodeName
    FROM #AnchorNodes_TVP
    WHERE ProcessingComplete = 0;
    
    OPEN AnchorNodes;
    FETCH NEXT FROM AnchorNodes INTO @AnchorNode;
    WHILE @@FETCH_STATUS = 0
    BEGIN
    
	  WITH CTE AS 
	  (
        SELECT NodeName 
		FROM #AnchorNodes_TVP
		WHERE ProcessingComplete = 1
	  )
      INSERT INTO #AnchorNodes_TVP (NodeName, HierarchyLevel, ProcessingComplete)
      SELECT NodeReferedTo, @HierarchyLevel, 0
      FROM #Edges_TVP e
      WHERE e.NodeName = @AnchorNode
	  AND e.NodeName NOT IN (SELECT NodeName FROM CTE);
    
      UPDATE #AnchorNodes_TVP SET ProcessingComplete = 1 WHERE NodeName = @AnchorNode;
    
      FETCH NEXT FROM AnchorNodes INTO @AnchorNode;
    
    END
    
    CLOSE AnchorNodes;
    DEALLOCATE AnchorNodes;
    
    WITH FilteredEdges AS
    ( 
      SELECT NodeName, NodeReferedTo 
      FROM #Edges_TVP
      WHERE NodeName IN (SELECT NodeName FROM #AnchorNodes_TVP GROUP BY NodeName)
      GROUP BY NodeName, NodeReferedTo
    )
    INSERT INTO #FilteredEdges (NodeName, NodeReferedTo) 
    SELECT NodeName, NodeReferedTo
    FROM FilteredEdges
    GROUP BY NodeName, NodeReferedTo;
    
    WITH FilteredNodes AS
    ( 
      SELECT NodeName, NodeLabel 
      FROM #Nodes_TVP
      WHERE NodeName IN (SELECT NodeName FROM #FilteredEdges)
      UNION
      SELECT NodeName, NodeLabel 
      FROM #Nodes_TVP
      WHERE NodeName IN (SELECT NodeReferedTo FROM #FilteredEdges) 
    )
    INSERT INTO #FilteredNodes 
    SELECT NodeName, NodeLabel
    FROM FilteredNodes
    GROUP BY NodeName, NodeLabel;
    
    
  END
    
  -- Referred To By
  IF @IncludeRefersTo = 1
  BEGIN
    TRUNCATE TABLE #AnchorNodes_TVP;
    INSERT INTO #AnchorNodes_TVP (NodeName, HierarchyLevel, ProcessingComplete) SELECT NodeName,0,0 FROM @AnchorNodes;
    
    DECLARE AnchorNodes CURSOR DYNAMIC FOR
    SELECT NodeName
    FROM #AnchorNodes_TVP
    WHERE ProcessingComplete = 0;
    
    OPEN AnchorNodes;
    FETCH NEXT FROM AnchorNodes INTO @AnchorNode;
    WHILE @@FETCH_STATUS = 0
    BEGIN
    
	  WITH CTE AS 
	  (
        SELECT NodeName 
		FROM #AnchorNodes_TVP
		WHERE ProcessingComplete = 1
	  )
      INSERT INTO #AnchorNodes_TVP (NodeName, HierarchyLevel, ProcessingComplete)
      SELECT NodeName, @HierarchyLevel, 0
      FROM #Edges_TVP e
      WHERE NodeReferedTo = @AnchorNode
	  AND e.NodeName NOT IN (SELECT NodeName FROM CTE);
    
      UPDATE #AnchorNodes_TVP SET ProcessingComplete = 1 WHERE NodeName = @AnchorNode;
    
      FETCH NEXT FROM AnchorNodes INTO @AnchorNode;
    
    END
    
    CLOSE AnchorNodes;
    DEALLOCATE AnchorNodes;
    
    WITH FilteredEdges AS
    ( 
      SELECT e.NodeName, e.NodeReferedTo 
      FROM #Edges_TVP e
      LEFT OUTER JOIN #FilteredEdges fe
        ON fe.NodeName = e.NodeName
        AND fe.NodeReferedTo = e.NodeReferedTo
      WHERE e.NodeReferedTo IN (SELECT NodeName FROM #AnchorNodes_TVP GROUP BY NodeName)
      AND fe.NodeName IS NULL
      GROUP BY e.NodeName, e.NodeReferedTo 
    )
    INSERT INTO #FilteredEdges (NodeName, NodeReferedTo) 
    SELECT NodeName, NodeReferedTo
    FROM FilteredEdges
    GROUP BY NodeName, NodeReferedTo;
    
    WITH FilteredNodes AS
    ( 
      SELECT NodeName, NodeLabel 
      FROM #Nodes_TVP
      WHERE NodeName IN (SELECT NodeName FROM #FilteredEdges)
      AND NodeName NOT IN (SELECT NodeName FROM #FilteredNodes)
      UNION
      SELECT NodeName, NodeLabel 
      FROM #Nodes_TVP
      WHERE NodeName IN (SELECT NodeReferedTo FROM #FilteredEdges) 
      AND NodeName NOT IN (SELECT NodeName FROM #FilteredNodes)
    )
    INSERT INTO #FilteredNodes 
    SELECT NodeName, NodeLabel
    FROM FilteredNodes
    GROUP BY NodeName, NodeLabel;
 
  END
    
END
ELSE
    
BEGIN
  -- Graph all nodes, no Anchor Nodes supplied
  INSERT INTO #FilteredNodes SELECT * FROM @Node;
  INSERT INTO #FilteredEdges (NodeName, NodeReferedTo) SELECT NodeName, NodeReferedTo FROM @NodeReferedTo;
END
   
-- Add articial edges for unreferenced anchor nodes
INSERT INTO #FilteredEdges (NodeName, NodeReferedTo, isArtificial)
SELECT ntvp.NodeName, ntvp.NodeName,1 
FROM #AnchorNodes_TVP atvp
JOIN #Nodes_TVP ntvp
  ON ntvp.NodeName = atvp.NodeName
WHERE atvp.NodeName NOT IN (SELECT NodeName FROM #FilteredEdges UNION SELECT NodeReferedTo FROM #FilteredEdges);
   
-- Create other artificial edges to keep Bioconductor happy 
INSERT INTO #FilteredEdges (NodeName, NodeReferedTo, isArtificial)
SELECT fn.NodeName, fn.NodeName,1 
FROM #FilteredNodes fn
WHERE fn.NodeName NOT IN (SELECT NodeName FROM #FilteredEdges);
   
-------------------------------------------------------
-- Script Header
INSERT INTO #RScript (CMDSEQ,RScript) 
SELECT 0, '#SetRepositories()'
UNION
SELECT 1, '#source("https://bioconductor.org/biocLite.R")'
UNION
SELECT 2, '#biocLite()'
UNION
SELECT 3, '#install.packages("graph")'
UNION
SELECT 4, '#install.packages("Rgraphviz")'
UNION
SELECT 4, '#install.packages("graphNEL")'
UNION
SELECT 6, 'rm(list = ls())'
UNION
SELECT 7, 'library(graph)'
UNION
SELECT 8, 'library(Rgraphviz)'
UNION
SELECT 9,'NodeName <- c(';
    
-------------------------------------------------------
-- Nodes
DECLARE @NotesCount INT;
SELECT
  @NotesCount = COUNT(*)
FROM #FilteredNodes n;
    
DECLARE @AnchorNotesCount INT;
SELECT
  @AnchorNotesCount = COUNT(*)
FROM @AnchorNodes n;
    
;WITH Nodes AS
(
    SELECT NodeName AS NodeName, ROW_NUMBER() OVER(ORDER BY NodeName) AS NodeID
    FROM #FilteredNodes n
)
INSERT INTO #RScript (CMDSEQ,RScript)
SELECT
  NodeID + 10, '"' + 'n' + CAST(RTRIM(Nodes.NodeName) AS varchar(50)) + '"' +
  CASE WHEN Nodes.NodeID = @NotesCount THEN '' ELSE ',' END AS Nodes
FROM Nodes
ORDER BY Nodes.NodeID;
    
-------------------------------------------------------
-- Edges
INSERT INTO #RScript(CMDSEQ,RScript)
SELECT 10000, ')'
UNION ALL
SELECT 100001,'EdgeName = list(';
    
DECLARE @EdgeCount INT;
SELECT @EdgeCount = COUNT(*) 
FROM #FilteredEdges;
    
WITH Nodes AS
(
    SELECT NodeName, 
    ROW_NUMBER() OVER(ORDER BY NodeName) AS NodeID
    FROM #FilteredNodes n
)
,Edges AS
(
    SELECT
      NodeName, 
      NodeReferedTo, 
      ROW_NUMBER() OVER(ORDER BY NodeName, NodeReferedTo) AS EdgeID,
      ROW_NUMBER() OVER(PARTITION BY NodeName ORDER BY NodeReferedTo) AS NodeEdgeSeq
    FROM #FilteredEdges 
)
INSERT INTO #RScript (CMDSEQ,RScript) 
SELECT ISNULL(e.EdgeID,0) + 200000,
  CASE
    WHEN ISNULL(e.NodeEdgeSeq,1) = 1 THEN
    'n' + n.NodeName + ' = list(edges = c(' + CAST(ISNULL(nr.NodeID,n.NodeID) AS varchar(5)) 
    ELSE ''
  END
  +
  CASE WHEN ISNULL(e.NodeEdgeSeq,1) > 1 THEN
    CAST(nr.NodeID AS varchar(5))
    ELSE ''
  END
  +
  CASE WHEN ISNULL(e.NodeEdgeSeq,1) = ISNULL(LastNodeEdge.LastNodeEdgeSeq,1) THEN
    '))'
    ELSE ''
  END
  +
  CASE WHEN ISNULL(e.EdgeID,0) = LastEdgeID.MaxEdgeIDSeq THEN '' ELSE CASE WHEN ISNULL(e.EdgeID,0) = @EdgeCount THEN '' ELSE ',' END END AS Edges     
      
FROM Nodes n
    
LEFT OUTER JOIN Edges e
  ON n.NodeName = e.NodeName
    
LEFT OUTER JOIN Nodes nr
 ON e.NodeReferedTo = nr.NodeName
    
LEFT OUTER JOIN
(
  SELECT NodeName, MAX(NodeEdgeSeq) AS LastNodeEdgeSeq
  FROM Edges
  GROUP BY NodeName 
) LastNodeEdge
  ON LastNodeEdge.NodeName = n.NodeName
    
OUTER APPLY
(
  SELECT MAX(EdgeID) AS MaxEdgeIDSeq
  FROM Edges
) LastEdgeID
ORDER BY e.EdgeID;
    
INSERT INTO #RScript (CMDSEQ,RScript)
SELECT 300000, ')'
    
-------------------------------------------------------
-- Node Labels
IF @IncludeLabels = 1
BEGIN
  INSERT INTO #RScript (CMDSEQ,RScript)
  SELECT 300001, 'NodeLabel <- c(';
    
  WITH Nodes AS
  (
    SELECT NodeName, NodeLabel, 
    ROW_NUMBER() OVER(ORDER BY NodeName) AS NodeID
    FROM #FilteredNodes n
  )
  INSERT INTO #RScript (CMDSEQ, RScript)
  SELECT n.NodeID + 400000, '"' + n.NodeLabel + '"' + CASE WHEN NodeID = @NotesCount THEN '' ELSE ',' END AS NodeLabels
  FROM Nodes n
  ORDER BY NodeID;
    
  INSERT INTO #RScript (CMDSEQ,RScript)
  SELECT 500000, ')';
END
ELSE
BEGIN
  INSERT INTO #RScript (CMDSEQ, RScript) SELECT 400000,'';
END;
    
-------------------------------------------------------
---- Footer
INSERT INTO #RScript (CMDSEQ,RScript)
SELECT 500001,'gR <- graphNEL(nodes = NodeName, edgeL = EdgeName, edgemode = "directed")' AS RScript
UNION
SELECT 500002,'nAttrs <- list()' AS RScript
UNION
SELECT 500003, CASE WHEN @IncludeLabels = 1 THEN 'NodeLabel <- NodeLabel[1:numNodes(gR)]' ELSE '' END AS RScript
UNION
SELECT 500004, CASE WHEN @IncludeLabels = 1 THEN 'names(NodeLabel) = nodes(gR)' ELSE '' END AS RScript
UNION
SELECT 500005, CASE WHEN @IncludeLabels = 1 THEN 'nAttrs$label <- NodeLabel' ELSE '' END AS RScript;
 
-------------------------------------------------------   
-- Flag Anchor Nodes
WITH CTE AS
(
  SELECT NodeName, ROW_NUMBER() OVER(ORDER BY NodeName) AS RowNum
  FROM @AnchorNodes
) 
INSERT INTO #RScript (CMDSEQ,RScript)
SELECT
  500006, 
  CASE ISNULL(NodeName,'') WHEN '' THEN '' ELSE CASE WHEN RowNum = 1 THEN 'nAttrs$fillcolor <- c(' ELSE '' END + 'n' + CAST(NodeName AS varchar(250)) + ' = "grey"' + CASE WHEN RowNum = @AnchorNotesCount THEN ')' ELSE ',' END END AS RScript
FROM CTE
ORDER BY RowNum; 
   
-- Hide artifical edges
INSERT INTO #RScript (CMDSEQ,RScript)
SELECT 500010,'eAttrs  <- list()'; 
   
DECLARE @ArtificalEdgeCount INT;
SELECT @ArtificalEdgeCount = COUNT(*) FROM #FilteredEdges WHERE isArtificial = 1;
   
WITH Nodes AS
(
    SELECT NodeName, 
    ROW_NUMBER() OVER(ORDER BY NodeName) AS NodeID
    FROM #FilteredNodes n
)
,Edges AS
(
    SELECT
      NodeName, 
      NodeReferedTo, 
      isArtificial,
      ROW_NUMBER() OVER(ORDER BY NodeName, NodeReferedTo) AS EdgeID,
      ROW_NUMBER() OVER(PARTITION BY NodeName ORDER BY NodeReferedTo) AS NodeEdgeSeq
    FROM #FilteredEdges 
    WHERE isArtificial = 1
)
INSERT INTO #RScript (CMDSEQ,RScript)
SELECT
  500010 + EdgeID,
  CASE WHEN EdgeID = 1 THEN 'eAttrs$color <- c("' + 'n' + NodeName + '~' + 'n' + NodeName + '"' + '="white"' ELSE '' END +
  CASE WHEN EdgeID > 1 THEN ','+ '"n' + NodeName + '~' + 'n' + NodeName + '"' + '="white"' ELSE '' END +
  CASE WHEN EdgeID = @ArtificalEdgeCount THEN ')' ELSE '' END
FROM Edges;
   
-------------------------------------------------------   
-- PDF File
IF ISNULL(@PDFFile,'') <> ''
BEGIN
  INSERT INTO #RScript (CMDSEQ,RScript)
  SELECT 600000, 'pdf("' + REPLACE(@PDFFile,'\','/') + '")'
  UNION
  SELECT 600001, 'attrs <- list(graph=list(rankdir="LR"),node=list(shape="ellipse", fixedsize=FALSE, color="grey"),edge=list(color="grey"))'
  UNION
  SELECT 600002, 'plot(gR, nodeAttrs = nAttrs, edgeAttrs=eAttrs, attrs=attrs)'
  UNION
  SELECT 600003, 'dev.off()';
END 
ELSE
BEGIN
  INSERT INTO #RScript (CMDSEQ,RScript)
  SELECT 600001, 'attrs <- list(graph=list(rankdir="LR"),node=list(shape="ellipse", fixedsize=FALSE, color="grey"),edge=list(color="grey"))'
  UNION
  SELECT 600002, 'plot(gR, nodeAttrs = nAttrs, edgeAttrs=eAttrs, attrs=attrs)';
END
    
-------------------------------------------------------
---- Return R Script
    
SELECT RScript FROM #RScript ORDER BY CMDSEQ;
    
END

%d bloggers like this: