用GPU通用并行计算绘制曼德勃罗特集图形 下篇

时间:2009-12-13   来源:博客园   网友评论:0   人气: 452 作者: 装配中的脑袋

上篇中我们用DirectX Compute Shader在显卡上编写了一个并行算法来计算好看的曼德勃罗特集迭代数图形。那么使用显卡进行通用计算到底有多少优势呢?我们本次就来比较一番。首先我们在CPU上也实现该算法。为了方便起见我们设计了一个类:

class CPUCalc
{
private:
    int m_stride;
    int m_width;
    int m_height;
    float m_realMin;
    float m_imagMin;
    float m_scaleReal;
    float m_scaleImag;
    unsigned char* m_pData;

    void CalculatePoint(unsigned int x, unsigned int y);

public:
    CPUCalc(int stride, int width, int height, float rmin, float rmax, float imin, float imax, unsigned char* pData) 
        : m_stride(stride), m_width(width), m_height(height), m_realMin(rmin), m_imagMin(imin), m_scaleReal(0), m_scaleImag(0), m_pData(pData) 
    {
        m_scaleReal = (rmax - rmin) / width;
        m_scaleImag = (imax - imin) / height;
    }

    void Calculate();

};

在HLSL代码中放在Constant Buffer中的数据,现在放在了类的成员处。注意我们这个类可以计算自定义复平面区间的曼德勃罗特集。rmin和rmax表示复平面的实数轴范围而imin、imax则表示复平面的虚数轴范围。这些参数的意义和上次HLSL中用的参数是一样的,如果想自己实现该程序可以参考一下。

下面则是类的实现。我们用几乎和HLSL一模一样的做法来实现。其中某些HLSL的内置方法采用类似的C++实现代替:

#include <algorithm>
#include <math.h>
#include "CPUCalc.h"

using std::max;

typedef unsigned int uint;
const uint MAX_ITER = 4096;

struct float2
{
    float x;
    float y;
};

inline float smoothstep(const float minv, const float maxv, const float v)
{
    if (v < minv) 
        return 0.0f;
    else if (v > maxv)
        return 1.0f;
    else
        return (v - minv) / (maxv - minv);
}

inline uint ComposeColor(uint index)
{    
    if (index == MAX_ITER) return 0xff000000;

    uint red, green, blue;
    
    float phase = index * 3.0f / MAX_ITER;
    red = (uint)(max(0.0f, phase - 2.0f) * 255.0f);
    green = (uint)(smoothstep(0.0f, 1.0f, phase - 1.3f) * 255.0f);
    blue = (uint)(max(0.0f, 1.0f - abs(phase - 1.0f)) * 255.0f);
    
    return 0xff000000 | (red << 16) | (green << 8) | blue;
}

void CPUCalc::CalculatePoint(uint x, uint y)
{
    float2 c;
    c.x = m_realMin + (x * m_scaleReal);
    c.y = m_imagMin + ((m_width - y) * m_scaleImag);
    
    float2 z;
    z.x = 0.0f;
    z.y = 0.0f;
    
    float temp, lengthSqr;
    uint count = 0;
    
    do
    {
        temp = z.x * z.x - z.y * z.y + c.x;
        z.y = 2 * z.x * z.y + c.y;
        z.x = temp;
        
        lengthSqr = z.x * z.x + z.y * z.y;
        count++;
    }
    while ((lengthSqr < 4.0f) && (count < MAX_ITER));    
    
    //write to result
    uint currentIndex = x * 4 + y * m_stride;
    uint& pPoint = *reinterpret_cast<uint*>(m_pData + currentIndex);
    
    pPoint = ComposeColor(static_cast<uint>(log((float)count) / log((float)MAX_ITER) * MAX_ITER));
}

void CPUCalc::Calculate()
{
    #pragma omp parallel for
    for (int y = 0; y < m_height; y++)
        for (int x = 0; x < m_width; x++)
        {
            CalculatePoint(x, y);
        }
}


 

文章评论