NPPEXEC Plugin
C++ Compiler template
NPP_SAVECD $(CURRENT_DIRECTORY)
g++ -std=c++11 $(FILE_NAME)
public class Encryption { public static string EncryptString(string valueToEncrypt) { return EncryptString(valueToEncrypt, "My password key"); } public static string DecryptString(string valueToDecrypt) { return DecryptString(valueToDecrypt, "My password key"); } public static string EncryptString(string valueToEncrypt, string secretPhrase) { CryptoStream encryptStream = null; // Stream used to encrypt RijndaelManaged rijndael = null; // Rijndael provider ICryptoTransform rijndaelEncrypt = null; // Encrypting object MemoryStream memStream = new MemoryStream(); // Stream to contain data byte[] key; byte[] IV; GenerateKey(secretPhrase, out key, out IV); try { if (valueToEncrypt.Length > 0) { // Create the crypto objects rijndael = new RijndaelManaged(); rijndael.Key = key; rijndael.IV = IV; rijndaelEncrypt = rijndael.CreateEncryptor(); encryptStream = new CryptoStream( memStream, rijndaelEncrypt, CryptoStreamMode.Write); // Write the encrypted value into memory byte[] input = Encoding.UTF8.GetBytes(valueToEncrypt); encryptStream.Write(input, 0, input.Length); encryptStream.FlushFinalBlock(); // Retrieve the encrypted value and return it return Convert.ToBase64String(memStream.ToArray()); } else { return ""; } } finally { if (rijndael != null) rijndael.Clear(); if (rijndaelEncrypt != null) rijndaelEncrypt.Dispose(); if (memStream != null) memStream.Close(); } } public static string DecryptString(string valueToDecrypt, string secretPhrase) { CryptoStream decryptStream = null; // Stream used to decrypt RijndaelManaged rijndael = null; // Rijndael provider ICryptoTransform rijndaelDecrypt = null; // decrypting object MemoryStream memStream = null; // Stream to contain data byte[] key; byte[] IV; GenerateKey(secretPhrase, out key, out IV); try { if (valueToDecrypt.Length > 0) { // Create the crypto objects rijndael = new RijndaelManaged(); rijndael.Key = key; rijndael.IV = IV; //Now decrypt the previously encrypted message using the decryptor // obtained in the above step. // Write the encrypted value into memory byte[] encrypted = Convert.FromBase64String(valueToDecrypt); memStream = new MemoryStream(encrypted); rijndaelDecrypt = rijndael.CreateDecryptor(); decryptStream = new CryptoStream(memStream, rijndaelDecrypt, CryptoStreamMode.Read); byte[] fromEncrypt = new byte[encrypted.Length]; //Read the data out of the crypto stream. decryptStream.Read(fromEncrypt, 0, fromEncrypt.Length); // Retrieve the encrypted value and return it string decryptedString = new string(Encoding.UTF8.GetChars(fromEncrypt)); return decryptedString.TrimEnd(new char[] { '\0' }); } else { return ""; } } finally { if (rijndael != null) rijndael.Clear(); if (rijndaelDecrypt != null) rijndaelDecrypt.Dispose(); if (memStream != null) memStream.Close(); } } /// Generates an encryption key based on the given phrase. The /// phrase is hashed to create a unique 32 character (256-bit) /// value, of which 24 characters (192 bit) are used for the /// key and the remaining 8 are used for the initialization vector (IV). private static void GenerateKey(string secretPhrase, out byte[] key, out byte[] IV) { // Initialize internal values key = new byte[24]; IV = new byte[16]; // Perform a hash operation using the phrase. This will // generate a unique 32 character value to be used as the key. byte[] bytePhrase = Encoding.ASCII.GetBytes(secretPhrase); SHA384Managed sha384 = new SHA384Managed(); sha384.ComputeHash(bytePhrase); byte[] result = sha384.Hash; // Transfer the first 24 characters of the hashed value to the key // and the remaining 8 characters to the intialization vector. for (int index = 0; index < 24; index++) key[index] = result[index]; for (int index = 24; index < 40; index++) IV[index - 24] = result[index]; } }
public static class Encryption { public static void EncryptConnectionStrings() { Configuration configuration = ConfigurationManager.OpenExeConfiguration(ConfigurationUserLevel.None); ConfigurationSection section = configuration.GetSection("connectionStrings"); if (!section.SectionInformation.IsProtected) { section.SectionInformation.ProtectSection("DataProtectionConfigurationProvider"); section.SectionInformation.ForceSave = true; configuration.Save(ConfigurationSaveMode.Modified); } } public static void DecryptConnectionStrings() { Configuration configuration = ConfigurationManager.OpenExeConfiguration(ConfigurationUserLevel.None); ConfigurationSection section = configuration.GetSection("connectionStrings"); if (section.SectionInformation.IsProtected) { section.SectionInformation.UnprotectSection(); section.SectionInformation.ForceSave = true; configuration.Save(ConfigurationSaveMode.Modified); } } }
Consider on this query:
SELECT * FROM Student WHERE Age = 18
Our data has sorted on column RollNo not on Age :( How to search the students in the table tblSudent (clustered index) for a give age efficiently? We got same problem as we had got for RollNo in the Heap. If problem is same then solution should be same that is store the data of the table tblSudent in the sorted order of column Age. But this time problem is little complex. If we will try to store data in the sorted order of column Age then sorting on column RollNo will disturb.
We need data to be stored on the basis of column RollNo for query:
SELECT * FROM Student WHERE RollNo = 111
We also need data to be stored on the basis of column Age for query:
SELECT * FROM Student WHERE Age = 18
We want both!! How is it possible? Any object can have only one physical organization. Then what is solution? I'm telling what sql server does. It creates copy of table and stored data in a sorted order of column Age in the copied table and it doesn't change anything in tblSudent(clustered index). This new copied table is called non-clustered index. Let's assume name of copied table is tblStdentNCI_1 then copied or subset table will look like something:
tblStudent (Clustered index): Sorted on RollNo
tblStdentNCI_1 (Non-clustered index): Sorted on Age
FAQ about no-clustered index:
Q. What is non-clustered index?
Answer: It is a copy of clustered index table sorted on some columns. It is subset of clustered since we can add some or all columns of clustered index table in the non-clustered index tables.
Q. A table can have how many non-clustered indexes?
Answer: We can create any number of copies of a table. So a table (more accurately clustered index) can have any number of non-clustered indexes. But there is some max limitation. For example in sql server 2012, 999 non-clustered indexes can be created per table.
Q. Does all non-clustered indexes occupy disk spaces?
Answer: Yes
Q. What will happen if data is modified in base table (clustered index)?
Answer:If data is inserted, deleted or updated in clustered index same data is also inserted, deleted or updated in all non-clustered indexes. There is no need to worry about data synchronization between clustered index and non-clustered indexes since sql server take this responsibility.
Q. Does non-clustered indexes are good or bad?
Answer: It may good for select statements or it may bad for insert, delete and update statement
Non-clustered index in depth:
It is logical organization of data in a table. A table can have 1023 non-clustered index. A non-clustered index can be of two types as:
1. Heap
2. Based on clustered index.
If table has clustered index then leaf node of non-clustered index keeps the key columns of clustered index. If the table has no clustered index then leaf node of non-clustered index keeps RID which is unique to each row of table. Such non-clustered index is called heap.
Non-clustered index based on clustered index:
Description of above image:
It is the B-tree representation of non-clustered index NCI_Student_Age where lower section of dotted line is also B-tree of clustered index CL_Student_Roll. Top node of non-clustered index is called root node and bottom nodes are called leaf node. All the nodes in between root node and leaf nodes are called intermediate nodes. A B-tree can have many intermediate nodes in case table has too many records. In this example there is no intermediate node and we have assumed size of each node is four i.e. each node can keep maximum five Age.
Important points about non-clustered index based on clustered index:
1. All the nodes keep key columns of non-clustered index and key column of unique clustered index.
2. If clustered index is not unique then instead of key column of clustered index it keeps a unique row identifier of the actual data rows of table.
3. Apart from points above, a leaf node also keeps the included columns of non-clustered index. (We will discuss about included column later)
Now if we will execute following sql query:
SELECT * FROM Student WHERE RollNo = 111
Since Student table has clustered index, now it will go for index scan instead of table scan to search the RollNo = 111. Scan will start from root node in the clustered index CL_Student_Roll.
SELECT Country
FROM Student WITH (INDEX (NCI_Student_Age))
WHERE Age = 31
Note: Here we have added WITH clause only because sql server uses the non-clustered index NCI_Student_Age, otherwise it will not use non-clustered index or will use if table has too many records. We will discuss why is it so later.
Execution plan of above query will look like this:
Meaning of above execution plan:
Sql server will first use non-clustered index NCI_Student_Age and find out the corresponding value of key column i.e. RollNumber of clustered index (It is called key look up) using nested loop and find out the value of column Country.
Scan 1: 22 < 31 so it will go to next element of root node.
Scan 2: 24 < 31 so it will go to the next element of root node. Since there is not any other element in root node so, it will follow right most link or path.
Scan 3: In leaf node, 24 < 31 o it will go to next element of leaf node.
Scan 4: In leaf node, 24 < 31 o it will go to next element of leaf node.
Scan 5: In leaf node, 27 < 31 o it will go to next element of leaf node.
Scan 6: In leaf node, 27 < 31 o it will go to next element of leaf node.
Scan 7: In leaf node, 31 = 31. It will get value key column of non clustered index i.e. RollNo which it equal to 109.
Scan 9: From here it will use clustered index CL_Student_Roll to get the values of column Country name of RollNo = 31. In root node of clustered index 107 < 109 so, it will go to next element of root node.
Scan 10: 110 > 109 so it will go through this link.
Scan 11: In leaf node, 108 < 109 so, it will go to next element of leaf node.
Scan 12: In leaf node, 107 < 109 so, it will go to next element of leaf node.
Scan 13: In leaf node, 109 = 109 so it will follow this path and get the actual data that is value of column Country which is USA.
Simple syntax of creating a non-clustered index
CREATE [UNIQUE] NONCLUSTERED INDEX <NCI_IndexName>
ON <ObjectName>(<ColumnName_Key>[ASC|DESC] [ ,...n ])
[INCLUDE (<ColumnName_Include> [,...n])]
<IndexName>: It is any valid name of index.
<ObjectName>: It is name of the table or view on which we want to create the non-clustered index.
<ColumnName_Key>: It is name of the key columns for clustered index separated by comma. Values of these columns are present in each node of a clustered index. It has following restrictions:
1. We can specify maximum 16 column names.
2. Sum of size of the columns cannot be more than 900 bytes.
3. All columns must belong to same table.
4. Data type of columns cannot be ntext, text, varchar (max), nvarchar (max), varbinary (max), xml, or image
5. It cannot be non-deterministic computed column.
<ColumnName_Include>: Due to above restrictions on <ColumnName_Key> columns, we can include more columns of table or view in the INCLUDE section. Date of include columns are only present in the leaf node of clustered index. It has also some restrictions:
1. Data type of columns cannot be text, ntext, and image.
2. A column cannot be present in key columns as well as can not include column at the same time. Also repetition of columns are not allowed.
Good practice: If possible we should try to add all columns of WHERE clause in key columns list and all columns of SELECT clause in the INCLUDE columns of a non-clustered index.
Covering of queries:
If we include all the the columns of a SELECT statement either as key columns or INCLUDE columns in non-clustered index such queries are called covered query. For example consider on query:
Non-clustered index 1:
CREATE NONCLUSTERED INDEX NCI_1
ON Student (RollNo,Age)
This index doesn't cover the above SELECT statement.
Non-clustered index 2:
CREATE NONCLUSTERED INDEX NCI_2
ON Student (RollNo,Age,Name)
This index doesn't cover the above SELECT statement.
Non-clustered index 3:
CREATE NONCLUSTERED INDEX NCI_3
ON Student (RollNo,Age,Name,Country)
This index COVERS the above SELECT statement.
Non-clustered index 4:
CREATE NONCLUSTERED INDEX NCI_4
ON Student (RollNo,Age)
INCLUDE (Name,Country)
This index covers the above SELECT statement.
Now consider on last index we have created that is
CREATE NONCLUSTERED INDEX NCI_Student_Age
ON Student (Age)
This index doesn't cover the SQL query:
SELECT Country
FROM Student
WHERE Age = 31
Due to which SQL server doesn't use the index NCI_Student_Age. Execution plan of this query will look like:
SQL server follows Index scan which is same as table scan and it is a slow process.
If we will use forcefully non-clustered index NCI_Student_Age as
SELECT Country
FROM Student WITH (INDEX (NCI_Student_Age))
WHERE Age = 31
Its execution plan will look like:
Here problem is it is partiality index seek process that is, to get value of column Country it has to go through (key look up) the clustered index CL_Student_Roll since non-clustered index doesn't keep the value of column Country. We can optimize this query by creating a new index which covers the SELECT statement.
CREATE NONCLUSTERED INDEX NCI_Student_Country
ON Student (Age)
INCLUDE (Country)
After creating this index, execution plan will look like:
Now there is no key look up and it is fully index seek. Structures of index NCI_Student_Country will look like this:
We can observe key columns of clustered index keep all the nodes while INCLUDE columns are only kept by leaf node. To perform this query:
SELECT Country
FROM Student
WHERE Age = 31
SQL server will not use clustered index since data of Country columns are present in non-clustered index NCI_Student_Country. So, there is no need of key look up.
Non-clustered index based on heap:
A table without any clustered index is called heap. It is possible to create a non-clustered index on such tables which do not have any clustered index. Main difference between non-clustered index on clustered index and non-clustered index on heap is, it keeps the RID (i.e. Row ID) in the leaf node of non-clustered index instead of key column of clustered index which is pointer to the actual data rows. Suppose we have a Student table which do not have any clustered index. Now we want to create a non-clustered index:
CREATE INDEX NCI_Heap
ON Student (Age)
If we will check the execution plan of this query:
SELECT Country
FROM Student WITH (INDEX (NCI_Heap))
WHERE Age = 31
It will look something like this:
From execution plan, we can observe that it is using RID look up instead of key look up.
Note: RID of each row of a table is generated by SQL server using file identifier, page number and number of the row on the page etc.
Organizations of non-clustered index on heap will look something like this:
We can remove the RID look up in the execution plan by covering the query in non-clustered index on heap that is:
CREATE INDEX NCI_Heap_Country
ON Student (Age)
INCLUDE (Country)
Now execution plan of query:
SELECT Country FROM Student WHERE Age = 31
Will look like:
URL: URL: http://www.exacthelp.com/2012/01/what-is-non-clustered-index-in-sql.html
Recently I was tasked with the creation of a large database system that consisted of a database table with 5 date columns, and a varchar primary key. This table was to hold upwards of 3.5 million records, and I needed to know exactly how much disk space was going to be needed to store not only the database, but also the index required to facilitate the search requirements. After looking for a number of different ways to do this, and many free third party tools I decided that the most simple way to do this with the tools that I had available was to generate my own method to populate a test database.
To get started with this I took the specific requirements of my database table and identified a routine to load data that would mimic the actual data. The reason for this was that I was two months out from being able to obtain actual data. My table as I mentioned consisted of a varchar primary key column that was 10 characters in length. This obviously had to be generated in a unique manner. I then had a total of 5 date columns each date entry for each record had to be different, but the dates would all be within the last 2 years. With this in mind I had a population scheme that I could work with.
· Must populate primary key value with 10 character unique identifier
· All date values should be dates between today and 2 years before today
· All date values in individual records should be different
· An index is needed to cover the primary key and ALL date columns due to specific business needs
· The following sections will demonstrate how I loaded the data, including randomization to ensure a varied collection of date values were included.
Obviously the first step of this process was to create my test table, for the sake of this exercise I am going to leave out the extra columns that were included in my testing as this article is really talking about how to create test datasets, and using TSQL loops and random number generators. The script below creates my minimal test table.
CREATE TABLE dbo.TestTableSize
(
MyKeyField VARCHAR(10) NOT NULL,
MyDate1 DATETIME NOT NULL,
MyDate2 DATETIME NOT NULL,
MyDate3 DATETIME NOT NULL,
MyDate4 DATETIME NOT NULL,
MyDate5 DATETIME NOT NULL
)
Now that I have the test table to load data into, I can start to actually create the TSQL that will fill my table. This code will be discussed in a few portions: Needed variables, one time value setting, and the population loop, in the end it will create the entire process needed to fill a table with three million records.
To start out process we will need a few variables; one to count the rows, one for a string representation of the row, one to hold the random number that is used to calculate our date, two to hold the upper/lower limits for the random number, and lastly one to hold our insert date. Below is the script needed to create the various variables.
DECLARE @RowCount INT
DECLARE @RowString VARCHAR(10)
DECLARE @Random INT
DECLARE @Upper INT
DECLARE @Lower INT
DECLARE @InsertDate DATETIME
After we have defined our variables it is necessary to configure the items that are set once per run. This involves setting the row count value to 0, setting the lower to -730 as valid dates can be 2 years prior to today, thus a negative 365 * 2. And lastly to set the upper limit to -1 to only allow yesterday to be selected.
SET @Lower = -730
SET @Upper = -1
SET @RowCount = 0
This is the most complicated piece of code, therefore it will be presented in sections, when then in the end we will re-present the entire code needed to populate the table. First we start a while loop that runs while the row count is less than three million.
WHILE @RowCount < 3000000
BEGIN
Now that we have this completed we perform the calculations needed to ready the inserts. First we will conver the row count into a string value, then we will get a random number between the upper and lower bounds set by our variables. This will get the day offset that we need. Lastly we will calculate the insert date using the random offset established previously. This code allows us to properly work with our insert later.
SET @RowString = CAST(@RowCount AS VARCHAR(10))
SELECT @Random = ROUND(((@Upper - @Lower -1) * RAND() + @Lower), 0)
SET @InsertDate = DATEADD(dd, @Random, GETDATE())
Now that we have our values, we can start our insert. As noted in our requirements none of the 5 dates in a single row can be the same, so to get around the issue we will simply add a day to the MyDate2 column, two days to the MyDate3 column, etc until we have all five dates. At the end of this we will increment the counter and end the block of code.
INSERT INTO TestTableSize
(MyKeyField
,MyDate1
,MyDate2
,MyDate3
,MyDate4
,MyDate5)
VALUES
(REPLICATE('0', 10 - DATALENGTH(@RowString)) + @RowString
, @InsertDate
,DATEADD(dd, 1, @InsertDate)
,DATEADD(dd, 2, @InsertDate)
,DATEADD(dd, 3, @InsertDate)
,DATEADD(dd, 4, @InsertDate))
SET @RowCount = @RowCount + 1
END
In the end we are presented the following script that will load random content into the table for testing. Simply execute this script and wait for the load to complete.
DECLARE @RowCount INT
DECLARE @RowString VARCHAR(10)
DECLARE @Random INT
DECLARE @Upper INT
DECLARE @Lower INT
DECLARE @InsertDate DATETIME
SET @Lower = -730
SET @Upper = -1
SET @RowCount = 0
WHILE @RowCount < 3000000
BEGIN
SET @RowString = CAST(@RowCount AS VARCHAR(10))
SELECT @Random = ROUND(((@Upper - @Lower -1) * RAND() + @Lower), 0)
SET @InsertDate = DATEADD(dd, @Random, GETDATE())
INSERT INTO TestTableSize
(MyKeyField
,MyDate1
,MyDate2
,MyDate3
,MyDate4
,MyDate5)
VALUES
(REPLICATE('0', 10 - DATALENGTH(@RowString)) + @RowString
, @InsertDate
,DATEADD(dd, 1, @InsertDate)
,DATEADD(dd, 2, @InsertDate)
,DATEADD(dd, 3, @InsertDate)
,DATEADD(dd, 4, @InsertDate))
SET @RowCount = @RowCount + 1
END
This article presented a quick overview of a method to insert random test data into a database table, additionally presented is the method of calculating a random number as well as randomizing data input based on a random offset. This may not be the best way to create a random data set, but it does work very well if you do not have any of the popular third party tools!
IF OBJECT_ID('dbo.TestTableSize', 'U') IS NOT NULL
DROP TABLE Dbo.TestTableSize;
GO
--===== Environment presets
-- Suppress the auto-display of Rowcounts TO Increase Performance.
SET NOCOUNT ON;
--===== Create the test table.
CREATE TABLE Dbo.TestTableSize
(MyKeyField VARCHAR(10) NOT NULL,
MyDate1 DATETIME NOT NULL,
MyDate2 DATETIME NOT NULL,
MyDate3 DATETIME NOT NULL,
MyDate4 DATETIME NOT NULL,
MyDate5 DATETIME NOT NULL
);
--===== Local variables
DECLARE @rowCount INT, @rowString VARCHAR(10), @random INT, @upper INT, @lower INT, @insertDate DATETIME;
--===== Presets
SELECT @lower = -730,
@upper = -1,
@rowCount = 0;
--===== Populate the table using a transaction to substantially
-- increase performance of the WHILE Loop.
BEGIN TRANSACTION;
WHILE @rowCount < 3000000
BEGIN --===== Single SELECT Instead OF Multiple SETs TO Increase Performance
SELECT @rowString = CAST(@rowCount AS VARCHAR(10)),
@random = ROUND(((@upper - @lower - 1) * RAND() + @lower), 0),
@insertDate = DATEADD(Dd, @random, GETDATE());
INSERT INTO Dbo.TestTableSize
(MyKeyField,
MyDate1,
MyDate2,
MyDate3,
MyDate4,
MyDate5
)
SELECT --Used SELECT instead of values to add clarity to which formula is which column
MyKeyField = REPLICATE('0', 10-DATALENGTH(@rowString))+@rowString,
MyDate1 = @insertDate,
MyDate2 = DATEADD(Dd, 1, @insertDate),
MyDate3 = DATEADD(Dd, 2, @insertDate),
MyDate4 = DATEADD(Dd, 3, @insertDate),
MyDate5 = DATEADD(Dd, 4, @insertDate);
SELECT @rowCount = @rowCount + 1;
END;
COMMIT;