Wednesday, January 23, 2013

Google Analytics Business Intelligence Project with SSIS

Schema to create DateControlMonth table

CREATE TABLE [web].[DateControlMonth](
[FiscalYear] [smallint] NULL,
[FiscalQuarter] [tinyint] NULL,
[FiscalMonth] [tinyint] NULL,
[FiscalYearStartDate] [date] NULL,
[FiscalYearEndDate] [date] NULL,
[FiscalMonthStartDate] [date] NULL,
[FiscalMonthEndDate] [date] NULL
)

Script to populate value in DateControlMonth table

SELECT dd.[FiscalYear]
,dd.FiscalQuarter
,dd.[FiscalMonth]
,(SELECT MIN(DD2.DateValue)
FROM [EDW20].[dbo].DimDate dd2
WHERE dd2.FiscalYear = DD.FiscalYear) AS [FiscalYearStartDate]
,(SELECT MAX(DD2.DateValue)
FROM [EDW20].[dbo].DimDate dd2
WHERE dd2.FiscalYear = DD.FiscalYear) AS [FiscalYearEndDate]
,MIN(dd.dateValue) [FiscalMonthStartDate]
,MAX(dd.dateValue) [FiscalMonthEndDate]
FROM [EDW20].[dbo].DimDate dd
WHERE FiscalYear > 2005 //provide year to get data from
GROUP BY dd.[FiscalYear]
,dd.FiscalQuarter
,dd.[FiscalMonth]
ORDER BY dd.[FiscalYear]
,dd.FiscalQuarter
,dd.[FiscalMonth]


Schema to create DateControlQuarter

CREATE TABLE [web].[DateControlQuarter](
[FiscalYear] [smallint] NULL,
[FiscalQuarter] [tinyint] NULL,
[FiscalQuarterStartDate] [date] NULL,
[FiscalQuarterEndDate] [date] NULL
)

Script to populate value in DateControlQuarter table

SELECT dd.[FiscalYear]
,dd.[FiscalQuarter]
,MIN(dd.dateValue) [FiscalQuarterStartDate]
,MAX(dd.dateValue) [FiscalQuarterEndDate]
FROM [EDW20].[dbo].DimDate dd
WHERE FiscalYear > 2005 //provide year to get data from
GROUP BY dd.[FiscalYear]
,dd.[FiscalQuarter]
ORDER BY dd.[FiscalYear]
,dd.[FiscalQuarter]

Schema to create SiteprofileHistory_Date

CREATE TABLE [web].[SiteProfileHistory_Date](
[UniqueId] [int] IDENTITY(1,1) NOT NULL,
[ProfileId] [varchar](24) NULL,
[FiscalYear] [smallint] NULL,
[FiscalMonth] [tinyint] NULL,
[FiscalQuarter] [tinyint] NULL,
[MonthStartDate] [date] NULL,
[MonthEndDate] [date] NULL,
[QuarterStartDate] [date] NULL,
[QuarterEndDate] [date] NULL,
[YearStartDate] [date] NULL,
[YearEndDate] [date] NULL
)

SSIS-ETL Steps to populate values in  SiteprofileHistory_Date








Schema to create SiteprofileHistoryDate_Quarter

CREATE TABLE [web].[SiteProfileHistoryDate_Quarter](
[UniqueId] [int] IDENTITY(1,1) NOT NULL,
[ProfileId] [varchar](24) NULL,
[FiscalQuarterStartDate] [date] NULL,
[FiscalQuarterEndDate] [date] NULL,
[FiscalYear] [smallint] NULL,
[FiscalQuarter] [tinyint] NULL
)

Script to populate value in SiteprofileHistoryDate_Quarter

SELECT DISTINCT [ProfileId]
,[FiscalYear]
,[FiscalQuarter]
,[QuarterStartDate]
,[QuarterEndDate]
FROM [web].[SiteProfileHistory_Date]

Schema to create SiteprofileHistoryDate_FYTD

CREATE TABLE [web].[SiteProfileHistoryDate_FYTD](
[UniqueId] [int] IDENTITY(1,1) NOT NULL,
[ProfileId] [varchar](24) NULL,
[FYTDStartDate] [date] NULL,
[FYTDEndDate] [date] NULL,
[FiscalYear] [smallint] NULL,
[FiscalMonth] [tinyint] NULL,
[FiscalQuarter] [tinyint] NULL
)

Script to populate value in SiteprofileHistoryDate_FYTD
SELECT DISTINCT [ProfileId]
,[FiscalYear]
,[FiscalMonth]
,[FiscalQuarter]
,[YearStartDate] AS FYTDStarteDate
,[MonthEndDate] AS FYTDEndDate
FROM [web].[SiteProfileHistory_Date]



Example table and package to retrieve value from Google Analytics for the last completed single month


Once the date related tasks have done, we can start with create table for PageDetailByURL and how to populate value to this table. The following schema is to create table for PageDetailByURL_SingleMonth which will load data for the last completed month.

We can also pull the data for the last completed quarter and FYTD.

CREATE TABLE [web].[PagesDetailByURL_SingleMonth](
[ProfileID] [int] NOT NULL,
[Interval] [varchar](20) NOT NULL,
[FiscalYear] [smallint] NULL,
[FiscalMonth] [tinyint] NULL,
[FiscalQuarter] [tinyint] NULL,
[PageURL] [nvarchar](max) NULL,
[PageViewCount] [int] NULL,
[UniquePageViewCount] [int] NULL,
[AvgTimeOnPage] [numeric](18, 2) NULL,
[EntranceCount] [int] NULL,
[BounceRate] [numeric](18, 2) NULL,
[ExitPercent] [numeric](18, 2) NULL,
[TotalValue] [numeric](18, 2) NULL,
[RecordExists] [bit] NULL,
[AuditTaskId] [int] NULL
)


Below is the example figures for ETL to load data in a table 

Created variables:

ControlFlow example:
















Below is the script to get FromDate, ToDate, FiscalMonth, FiscalQuarter and FiscalYear those will be passed as parameter in Google Analytics to retrieve data for the particular period. (Last completed month data)



SELECT ? = DATEADD(mm, DATEDIFF(m,0,getdate())-1,0)

SELECT ? = DATEADD(s,-1,DATEADD(mm, DATEDIFF(m,0,getdate()),0))


DECLARE @DATE DATETIME, @Quarter INT
SET @DATE = (convert(varchar(20),(SELECT DATEADD(mm, DATEDIFF(m,0,getdate())-1,0)),111))
SET @Quarter = Datepart(QQ,DATEADD(mm, DATEDIFF(m,0,getdate())-1,0))

SELECT ? = CASE WHEN (MONTH(@Date) > 9) THEN YEAR(@Date) + 1
ELSE YEAR(@Date) END -- AS FiscalYear

SELECT ?= CASE WHEN (MONTH(@Date) > 9) THEN MONTH(@Date) - 9
ELSE MONTH(@Date) + 3 END -- AS FiscalMonth

SELECT ?= (SELECT DISTINCT FiscalQuarter FROM DimDate WHERE QuarterNumber = @Quarter)

Note: we need DimDate table to bring fiscal quarter



DataFlowTask steps:

We need to select variables to pass in the script component coding area.






Once the output columns created in SCR task, Click on EditScript to go the VisaProjects. In the Solution Explorer screen, Right click on – References – click Add References – Select the DLL file as given in the below figure.



In the main.cs file, write the below coding.

#region Help: Introduction to the Script Component
/* The Script Component allows you to perform virtually any operation that can be accomplished in
* a .Net application within the context of an Integration Services data flow.
*
* Expand the other regions which have "Help" prefixes for examples of specific ways to use
* Integration Services features within this script component. */
#endregion

#region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using System.Windows.Forms;
using GoogleAnalyticsSupport;
#endregion
.................
.................etc,.

public override void CreateNewOutputRows()
{
ReportRequestorWithSorting RR = new ReportRequestorWithSorting();
RR.Email = Variables.UserName; //"test_googleanalytics@mycompany.org";
RR.Password = Variables.Password; // "abcdefgh";
DateTime value =Variables.FromDate; //new DateTime(2010, 9, 1);
DateTime value1 = Variables.ToDate; //new DateTime(2012, 9, 26);
AnalyticsAccountInfo AAI = new AnalyticsAccountInfo();
AAI.AccountName = Variables.AccountName; //"myaccount";
AAI.WebPropertyID = Variables.WebPropertyId; //"http://myaccount.abcd.com";
AAI.ProfileID = Convert.ToString(Variables.ProfileID); // "123456";
AAI.ID = Variables.ID; // "ga:123456";
AAI.Title = Variables.Title; // "myaccount.abcd.com";

var PagesDetailByURL = RR.ReportRequestWF(AAI, new Dimension[] { Dimension.pagePath }
,new Metric[7]{Metric.pageviews,Metric.uniquePageviews
,Metric.avgTimeOnPage,Metric.entrances,Metric.visitBounceRate,Metric.exitRate
,Metric.totalValue},new Sort[]{Sort.pageviews},value, value1);
foreach (var p in PagesDetailByURL)
{
Output0Buffer.AddRow();
Output0Buffer.PageURL = p.Dimensions[0].Value.ToString();
Output0Buffer.PageViewCount = p.Metrics[0].Value.ToString();
Output0Buffer.UniquePageViewCount = p.Metrics[1].Value.ToString();
Output0Buffer.AvgTimeOnPage = p.Metrics[2].Value.ToString();
Output0Buffer.EntranceCount = p.Metrics[3].Value.ToString();
Output0Buffer.BounceRate = p.Metrics[4].Value.ToString();
Output0Buffer.ExitPercent = p.Metrics[5].Value.ToString();
Output0Buffer.TotalValue = p.Metrics[6].Value.ToString();
Output0Buffer.SiteProfileID = AAI.ProfileID.ToString();
}
}
}

Example table and package to retrieve data from the inception date of Google Analytics

The following schema is to create table for PageDetailByURLHistory_SingleMonth which will load data for the from the inception date to the last completed month.

We can also pull the data for the quarter and FYTD.


CREATE TABLE [web].[PagesDetailByURLHistory_SingleMonth](
[ProfileID] [int] NOT NULL,
[Interval] [varchar](20) NULL,
[FiscalYear] [smallint] NULL,
[FiscalMonth] [tinyint] NULL,
[FiscalQuarter] [tinyint] NULL,
[PageURL] [nvarchar](max) NULL,
[PageViewCount] [int] NULL,
[UniquePageViewCount] [int] NULL,
[AvgTimeOnPage] [numeric](18, 2) NULL,
[EntranceCount] [int] NULL,
[BounceRate] [numeric](18, 2) NULL,
[ExitPercent] [numeric](18, 2) NULL,
[TotalValue] [numeric](18, 2) NULL,
[RecordExists] [bit] NULL,
[AuditTaskId] [int] NULL
)


Below is the example figures for ETL to load data in a table 

Created variables:



ControlFlow Steps
















DataFlowTask steps:

We need to select variables to pass in the script component coding area.




Once the output columns created in SCR task, Click on EditScript to go the VisaProjects. In the Solution Explorer screen, Right click on – References – click Add References – Select the DLL file as given in the below figure.




In the main.cs file, write the below coding.

#region Help: Introduction to the Script Component
/* The Script Component allows you to perform virtually any operation that can be accomplished in
* a .Net application within the context of an Integration Services data flow.
*
* Expand the other regions which have "Help" prefixes for examples of specific ways to use
* Integration Services features within this script component. */
#endregion

#region Namespaces
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using System.Windows.Forms;
using GoogleAnalyticsSupport;
#endregion

.............................
....................etc,.

public override void CreateNewOutputRows()
{
ReportRequestorWithSorting RR = new ReportRequestorWithSorting();
RR.Email = Variables.UserName; //"test_googleanalytics@mycompany.org";
RR.Password = Variables.Password; // "abcdef";
DateTime value =Variables.FromDate; //new DateTime(2010, 9, 1);
DateTime value1 = Variables.ToDate; //new DateTime(2012, 9, 26);
AnalyticsAccountInfo AAI = new AnalyticsAccountInfo();
AAI.AccountName = Variables.AccountName; //"myaccount";
AAI.WebPropertyID = Variables.WebPropertyId; //"http://myaccount.abcd.com";
AAI.ProfileID = Convert.ToString(Variables.ProfileID); // "12345";
AAI.ID = Variables.ID; // "ga:12345";
AAI.Title = Variables.Title; // "myaccount.abcd.com";
int fiscalYear = Variables.FiscalYear;
int fiscalMonth = Variables.FiscalMonth;
int fiscalQuarter = Variables.FiscalQuarter;

var PagesDetailByURL = RR.ReportRequestWF(AAI, new Dimension[] { Dimension.pagePath }
,new Metric[7]{Metric.pageviews,Metric.uniquePageviews
,Metric.avgTimeOnPage,Metric.entrances,Metric.visitBounceRate,Metric.exitRate
,Metric.totalValue},new Sort[]{Sort.pageviews},value, value1);
foreach (var p in PagesDetailByURL)
{
Output0Buffer.AddRow();
Output0Buffer.PageURL = p.Dimensions[0].Value.ToString();
Output0Buffer.PageViewCount = p.Metrics[0].Value.ToString();
Output0Buffer.UniquePageViewCount = p.Metrics[1].Value.ToString();
Output0Buffer.AvgTimeOnPage = p.Metrics[2].Value.ToString();
Output0Buffer.EntranceCount = p.Metrics[3].Value.ToString();
Output0Buffer.BounceRate = p.Metrics[4].Value.ToString();
Output0Buffer.ExitPercent = p.Metrics[5].Value.ToString();
Output0Buffer.TotalValue = p.Metrics[6].Value.ToString();
Output0Buffer.SiteProfileID = AAI.ProfileID.ToString();
Output0Buffer.FiscalMonth = fiscalMonth;
Output0Buffer.FiscalYear = fiscalYear;
Output0Buffer.FiscalQuarter = fiscalQuarter;
}
}
}

Creating Delay function using script component task where there is an issue in network. Can set the sleep timing according to the requirement.

public void Main()
{
// TODO: Add your code here
System.Threading.Thread.Sleep(9000);
Dts.TaskResult = (int)ScriptResults.Success;
}




No comments:

Post a Comment

Note: Only a member of this blog may post a comment.