Saturday, September 15, 2012

IQueryable to SQL

I needed a quick way to convert simple LINQ queries to SQL for testing execution plans, Someone (Thank you Yaniv Lorencez!) suggested using ToTraceString, but it doesn't provide a query you can execute and doesn't show the parameters, so first I've added the parameters but its useless for executing on SQL.


/// <summary>
/// Retrieves the sql text command + simple representation of values
/// </summary>
public static string ToTraceString<T>(this IQueryable<T> query)
{
    if (query != null)
    {
        ObjectQuery<T> objectQuery = query as ObjectQuery<T>;

        StringBuilder sb = new StringBuilder();
        sb.Append(objectQuery.ToTraceString());
        foreach (var p in objectQuery.Parameters)
            sb.AppendFormat("\r\n{0} ({2}): {1}", p.Name, p.Value, p.ParameterType);
        return sb.ToString();
    }
    return "No Trace string to return";
}


So, I've added a bit of code and we now have something that generates a valid sp_executesql:


/// <summary>
/// Retrieves SQL text command in sp_executesql ready for execution.
/// </summary>
public static string ToSqlString<T>(this IQueryable<T> query)
{
    if (query == null)
        return string.Empty;

    //get query
    ObjectQuery<T> objectQuery = query as ObjectQuery<T>;
    var sqlquery = objectQuery.ToTraceString();


    //if there are parameters, first add parameter types, then the parameter values.
    if (objectQuery.Parameters.Count() > 0)
    {
        //create a list of parameters with sql type
        StringBuilder retval = new StringBuilder();

        retval.Append("sp_executesql ");
        retval.AppendFormat("@statement = N'{0}'", sqlquery);

        //create sql parameter for each parameter
        List<string> parameters = new List<string>();
        List<string> values = new List<string>();
        foreach (var p in objectQuery.Parameters)
        {
            //get sqldbtype
            parameters.Add(string.Format("@{0} {1}", p.Name, GetDbType(p.Value)));
            //get sql values for each parameter
            values.Add(string.Format("@{0} = {1}", p.Name, GetDbValue(p.Value))); 
        }

        retval.AppendFormat(", @parameters = N'{0}'", string.Join(", ", parameters.ToArray()));
        retval.Append(", ");
        retval.Append(string.Join(", ", values.ToArray()));

        return retval.ToString();
    }
    else
    {
        return sqlquery;
    }
}

/// <summary>
/// Retrieve sql type from object value
/// </summary>
private static string GetDbType(object value)
{
    var p1 = new System.Data.SqlClient.SqlParameter();
    p1.Value = value;

    if (p1.SqlDbType == SqlDbType.NVarChar)
        return "nvarchar(max)";
    if ((p1.SqlDbType == SqlDbType.Binary) || (p1.SqlDbType == SqlDbType.VarBinary))
        return "varbinary(max)";

    return p1.SqlDbType.ToString().ToLower();
}

/// <summary>
/// Retrieve escaped sql value
/// </summary>
private static string GetDbValue(object obj)
{
    if (obj == null)
        return "null";

    if (obj is string)
        return "'" + obj.ToString().Replace("'", "''") + "'";

    if (obj is Guid)
        return "'" + obj.ToString() + "'";

    if (obj is DateTime)
        return "N'" + ((DateTime)obj).ToString("yyyy-MM-dd HH:mm:ss.ff") + "'";

    if (obj is bool)
        return (((bool)obj) == true) ? "1" : "0";

    if (obj is byte[])
        return "0x" + BitConverter.ToString((byte[])obj).Replace("-", "") + "";

    if (obj is int)
        return ((int)obj).ToString();

    if (obj is decimal)
        return ((decimal)obj).ToString();

    if (obj is long)
        return ((long)obj).ToString();

    if (obj is double)
        return ((double)obj).ToString();

    if (obj is Single)
        return ((Single)obj).ToString();

    throw new NotImplementedException(obj.GetType().Name + " not implemented");
}


You can find the project here:https://github.com/drorgl/ForBlog/tree/master/ToTraceString






Friday, September 14, 2012

EXEC vs sp_executesql

So you need dynamic SQL, not the best thing to do, but sometimes unavoidable.

What's the best way of writing it?

Can it execute faster? safer?

SQL provides two methods for executing dynamic SQL, the first and oldest is EXECUTE (or exec) and the second is sp_executesql, which came not much later.

If you have a query without changing or user parameters, exec is enough.

For example:


exec ('select * from Department');


Or you can do it with a variable:


declare @sql nvarchar(max);
set @sql = 'select * from Department';
exec (@sql)


But what happens when you need to put variables into the query, surely you can do this:


set @sql = 'select * from person where Username = ''' + @username + ''' and Password = ''' + @password + '''';
exec (@sql)


Sure, you can do it, but lets see what happens when you try this:


set @username = 'Peggy''';
set @password = ' or ''1''= ''1';
set @sql = 'select * from person where Username = ''' + @username + ''' and Password = ''' + @password + '''';
exec (@sql)


Opps.. where's did my security go?

SQL injection is still one of the more active attacks, its been known for years and still some people are not aware of it, lets see how we can solve it:


declare @username nvarchar(20);
declare @password nvarchar(20);
set @username = 'Peggy';
set @password = 'goodpassword';
set @username = REPLACE(@username,'''','''''');
set @password = REPLACE(@password,'''','''''');
declare @sql nvarchar(max)
set @sql = 'select * from person where Username = ''' + @username + ''' and Password = ''' + @password + '''';

exec (@sql)


Cool! so now no one can put a single quote to hack my application! WRONG!

Lets take a look at this:


declare @username nvarchar(20);
declare @password nvarchar(20);
set @username = 'Peggy''''''''''''''''''';
set @password = ' or (1= 1) or ''=''''''''';

set @username = REPLACE(@username,'''','''''');
set @password = REPLACE(@password,'''','''''');

set @sql = 'select * from person where Username = ''' + @username + ''' and Password = ''' + @password + '''';

exec (@sql)


Opps.... where did my security go again??

What's happening is the variables for username and password are too short to hold the last single quote, so its open, the same as with the previous attack, we can solve it, but then we'll find some other hole, so lets try to plug it once and for all.

sp_executesql to the rescue!

sp_executesql works in a similar way to the exec command but it allows you to define and pass parameters and by that eliminating the threat we've seen before.

So how would we solve the previous problem? Simple.


declare @username nvarchar(20);
declare @password nvarchar(20);

set @username = 'Peggy';
set @password = 'goodpassword'

exec sp_executesql N'select * from person where Username =@username and Password = @password', 
                    N'@username nvarchar(20), @password nvarchar(20)', @username,@password;


So what else can sp_executesql do for us?

It can speed things up by allowing execution plans to be cached.
Execution plans are the commands the sql server needs to execute so it can return the results you're asking for, each query is compiled into an execution plan and then saved in memory to save time when the query is executed again, but because its a hash, every little change will cause the server to recompile the query into another execution plan, by taking out the parameters the query stays the same for different inputs, increasing efficiency of your queries.

What else can these methods do for me?

You can insert into a table query results from other queries

With exec:


declare  @serverinfo table
(
    attribute_id int,
    attribute_name nvarchar(255),
    attribute_value nvarchar(255)
)
insert @serverinfo
exec sp_server_info

select *,'more..' from @serverinfo


with sp_executesql:


declare  @serverinfo table
(
    attribute_id int,
    attribute_name nvarchar(255),
    attribute_value nvarchar(255)
)
insert @serverinfo
exec sp_executesql N'sp_server_info'

select *,'more..' from @serverinfo



By far the best and oldest article on dynamic queries is http://www.sommarskog.se/dynamic_sql.html.

And if you're interested in further reading about execution plans:
http://www.simple-talk.com/sql/performance/execution-plan-basics/

Many Files Concurrent Read/Write

As part of an optimization I needed to do to a heavily loaded application I've searched for a way to read and write a lot of small to medium sized files, it was one more serious bottleneck which good caching alleviated but did not eliminate due to the amount of the files going out of cache.

So I've thought about 3 ways of accessing files and went ahead to see which one is faster, the first one will attempt to read/write with exception handling, the second one will use a queue with worker threads and the third one will avoid trying to access the files if they exist in a read/write dictionary (multiple reads are allowed but only one write).

At a later stage I've added the dictionary check to the queue, to make it more efficient, if a specific file is writing, the 2nd attempt will be re-queued so other files can be cleared out of the queue.

The results are files per second, for 50k files, 1000 random iterations on 100  files range averaged on 10 executions



ActionThreadsOS LockingQueued 1Queued 2Avoid
Read19775 1000 990 10193
Read531887 15019 14560 31585
Read1038639 15669 1066036886
Write13017 1022 993 2791
Write55532 4857 4766 5413
Write105378 4866 4625 5339
Read/Write14504 1050 995 5173
Read/Write510477 6934 7711 10324
Read/Write109742 7867 7824 10773

The results are pretty straight forward, OS Locking uses exceptions for handling collisions, it makes it slower in one thread, but it is overall faster because there are no overheads from other elements.


I've written two queued classes, one of them is invoking a method on each queue item, the other is in a continuous loop, the continuous loop is faster.

Overall it seems that avoiding an OS file access is the same as the exception for attempting to access the same file except when the file operation takes a long time, this is probably due to the fact that the method will keep trying every 1 ms to access the file in case of "file in use" exception, wasting CPU instead of waiting peacefully for the file to be unlocked.

Its been an interesting test for me, I've thought that the queues will do a much faster job as the OS will need only to handle writing and reading files sequentially, I was wrong and it was good to find out.

I'm including all the source code for the tests, I have a feeling the DictionaryLock and the QueuedExecution will find a better use in the future.

The DictionaryLock uses ConcurrentDictionary and a SpinLock to perform a Read/Write lock on keys, the spinlock is there so only one thread will be able to insert new locks into the dictionary.


/// <summary>
/// Dictionary of locks on TKey
/// </summary>
/// <typeparam name="TKey">Type of key</typeparam>
public class DictionaryLock<TKey>
{
    /// <summary>
    /// Dictionary of locks container
    /// </summary>
    private ConcurrentDictionary<TKey, ReaderWriterLockSlim> _locks = new ConcurrentDictionary<TKey, ReaderWriterLockSlim>();

    /// <summary>
    /// _locks updating lock
    /// </summary>
    private SpinLock _operationlock = new SpinLock();

    /// <summary>
    /// Retrieves the ReaderWriterLock for a specific key
    /// </summary>
    private ReaderWriterLockSlim GetLock(TKey key)
    {
        //check if lock exist
        ReaderWriterLockSlim localock;
        if (_locks.TryGetValue(key, out localock))
        {
            return localock;
        }

        //it doesn't exist, lets create it

        bool lockTaken = false;
        _operationlock.Enter(ref lockTaken);

        //after acquired write lock, recheck its not in the dictionary if two writes were attempted for the same key
        if (!_locks.TryGetValue(key, out localock))
        {
            localock = new ReaderWriterLockSlim();
            _locks[key] = localock;
        }
        _operationlock.Exit();

        return localock;
    }


    /// <summary>
    /// Enter Reader lock on key
    /// </summary>
    public void EnterReader(TKey key)
    {
        var localock = GetLock(key);

        localock.EnterReadLock();
    }

    /// <summary>
    /// Enter Writer lock on key
    /// </summary>
    public void EnterWriter(TKey key)
    {
        var localock = GetLock(key);

        localock.EnterWriteLock();
    }

    /// <summary>
    /// Check Reader locked on key
    /// </summary>
    public bool IsReaderLocked(TKey key)
    {
        ReaderWriterLockSlim localock;
        if (_locks.TryGetValue(key, out localock))
            return localock.IsReadLockHeld;
        return false;
    }

    /// <summary>
    /// Check Writer locked on key
    /// </summary>
    public bool IsWriterLocked(TKey key)
    {
        ReaderWriterLockSlim localock;
        if (_locks.TryGetValue(key, out localock))
            return localock.IsWriteLockHeld;
        return false;
    }

    /// <summary>
    /// Exit Reader lock on key
    /// </summary>
    public void ExitReader(TKey key)
    {
        ReaderWriterLockSlim localock;
        if (_locks.TryGetValue(key, out localock))
            localock.ExitReadLock();
    }

    /// <summary>
    /// Exit Writer lock on key
    /// </summary>
    public void ExitWriter(TKey key)
    {
        ReaderWriterLockSlim localock;
        if (_locks.TryGetValue(key, out localock))
            localock.ExitWriteLock();
    }
}


The QueuedExecution is an abstract class providing an easy way to implement queued object handling, it uses the ManualResetEventSlim to notify the caller its done processing the request, it could use a better exception handling, I've done the minimum for this test project.


/// <summary>
/// Abstract Queued Execution
/// <para>Provides infrastructure for executing IItems with ProcessQueue override
/// in a number of threads in defined in the constructor</para>
/// </summary>
public abstract class QueuedExecution : IDisposable
{
    /// <summary>
    /// Process Result, returned by ProcessQueue
    /// </summary>
    protected enum ProcessResult
    {
        Success,
        FailThrow,
        FailRequeue
    }

    /// <summary>
    /// Item interface
    /// </summary>
    protected interface IItem {}

    /// <summary>
    /// Queue Item container
    /// </summary>
    private class QueueItem
    {
        /// <summary>
        /// IItem
        /// </summary>
        public IItem Item { get; set; }

        /// <summary>
        /// Result of ProcessQueue
        /// </summary>
        public ProcessResult ProcessResult { get; set; }

        /// <summary>
        /// ManualResetEvent for pinging back the waiting call
        /// </summary>
        public ManualResetEventSlim resetEvent { get; set; } 
    }

    /// <summary>
    /// Queue containing all the items for execution
    /// </summary>
    private ConcurrentQueue<QueueItem> _queue = new ConcurrentQueue<QueueItem>();

    /// <summary>
    /// Process Queue method, should be overriden in inheriting class
    /// </summary>
    /// <param name="item">item to be executed against</param>
    /// <returns>success/fail/requeue</returns>
    protected abstract ProcessResult ProcessQueue(IItem item);

    /// <summary>
    /// Number of threads to process queue
    /// </summary>
    private int _threadcount = 1;

    /// <summary>
    /// Threads array
    /// </summary>
    private Thread[] _threads;

    /// <summary>
    /// flag, should abort all executing threads
    /// </summary>
    private bool _threadaborted = false;

    /// <summary>
    /// Initializes the threads for execution
    /// </summary>
    private void Initialize()
    {
        _threads = new Thread[_threadcount];
        for (var i = 0; i < _threadcount; i++)
            _threads[i] = new Thread(new ThreadStart(() =>
                {
                    do
                    {
                        QueueItem item;
                        if (_queue.TryDequeue(out item))
                        {
                            item.ProcessResult = ProcessQueue(item.Item);

                            if (item.ProcessResult == ProcessResult.FailRequeue)
                            {
                                _queue.Enqueue(item);
                                continue;
                            }

                            item.resetEvent.Set();
                        }
                        else
                        {
                            Thread.Sleep(1);
                        }
                    } while (!_threadaborted);
                }));
        for (var i = 0; i < _threadcount; i++)
            _threads[i].Start();
    }

    protected QueuedExecution(int threads)
    {
        _threadcount = threads;
        Initialize();
    }

    /// <summary>
    /// Execute call in queue, block until processed
    /// </summary>
    /// <param name="item"></param>
    protected void Execute(IItem item)
    {
        var resetevent = new ManualResetEventSlim();
        var qi = new QueueItem
            {
                Item = item,
                resetEvent = resetevent
            };
        _queue.Enqueue(qi);
        resetevent.Wait();

        if (qi.ProcessResult == ProcessResult.FailThrow)
            throw new Exception("execution failed");
    }

    #region IDisposable Members

    /// <summary>
    /// cleanup
    /// </summary>
    /// <param name="waitForFinish">should wait for process to finish 
    /// currently executing request or abort immediately</param>
    /// <param name="wait">time to wait for abort to finish</param>
    public void Dispose(bool waitForFinish,TimeSpan wait)
    {
        _threadaborted = true;
         bool allaborted = true;
        if (waitForFinish)
        {
            //wait for timeout, check if threads aborted gracefully in that time
            while ((DateTime.Now + wait) > DateTime.Now)
            {
                allaborted = true;
                foreach (var t in _threads)
                {
                    if (t.IsAlive == true)
                    {
                        allaborted = false;
                        break;
                    }
                }
                if (allaborted == true)
                    break;

                Thread.Sleep(1);
            }
        }

        //if not all threads were aborted, abort them
        if (allaborted == false)
        {
            foreach (var t in _threads)
                if (t.IsAlive)
                    t.Abort();
        }
    }

    public void Dispose()
    {
        Dispose(false,TimeSpan.MinValue);
    }

    #endregion
}


You can find the source code here:
https://github.com/drorgl/ForBlog/tree/master/FileCollisionTests