注册 登录  
 加关注
   显示下一条  |  关闭
温馨提示!由于新浪微博认证机制调整,您的新浪微博帐号绑定已过期,请重新绑定!立即重新绑定新浪微博》  |  关闭

Koala++'s blog

计算广告学 RTB

 
 
 

日志

 
 

Weka开发[27]——SMO源代码分析[4]   

2009-09-19 22:29:12|  分类: 机器学习 |  标签: |举报 |字号 订阅

  下载LOFTER 我的照片书  |

    下面是distridutionForInstance的代码:

// Filter instance

if (!m_checksTurnedOff) {

    m_Missing.input(inst);

    m_Missing.batchFinished();

    inst = m_Missing.output();

}

 

if (!m_onlyNumeric) {

    m_NominalToBinary.input(inst);

    m_NominalToBinary.batchFinished();

    inst = m_NominalToBinary.output();

}

 

if (m_Filter != null) {

    m_Filter.input(inst);

    m_Filter.batchFinished();

    inst = m_Filter.output();

}

       buildClassifier中已经看过,略过。这里只看m_fitLogisticModels的代码:

if (!m_fitLogisticModels) {

    double[] result = new double[inst.numClasses()];

    for (int i = 0; i < inst.numClasses(); i++) {

       for (int j = i + 1; j < inst.numClasses(); j++) {

           if ((m_classifiers[i][j].m_alpha != null)

                  || (m_classifiers[i][j].m_sparseWeights != null)) {

              double output = m_classifiers[i][j].SVMOutput(-1, inst);

              if (output > 0) {

                  result[j] += 1;

              } else {

                  result[i] += 1;

              }

           }

       }

    }

    Utils.normalize(result);

    return result;

}

       m_fitLogisticModelsfalse的情况下SVMOutput第一个参数没有意义,这里是一个投票的过程,如果这次ij类别分类时,分到了jresult[j]++;,否则i++;

/**

 * The polynomial kernel : K(x, y) = <x, y>^p or K(x, y) = ( <x, y>+1)^p

*/

public class PolyKernel extends CachedKernel {

 

    /** Use lower-order terms? */

    private boolean m_lowerOrder = false;

 

    /** The exponent for the polynomial kernel. */

    private double m_exponent = 1.0;

 

    /**

     * Creates a new <code>PolyKernel</code> instance.

     *

     * @param dataset

     *            the training dataset used.

     * @param cacheSize

     *            the size of the cache (a prime number)

     */

    public PolyKernel(Instances dataset, int cacheSize, double exponent,

           boolean lowerOrder) {

 

       super(dataset, cacheSize);

 

       m_exponent = exponent;

       m_lowerOrder = lowerOrder;

       m_data = dataset;

    }

 

    protected double evaluate(int id1, int id2, Instance inst1)

           throws Exception {

 

       double result;

       if (id1 == id2) {

           result = dotProd(inst1, inst1);

       } else {

           result = dotProd(inst1, m_data.instance(id2));

       }

       // Use lower order terms?

       if (m_lowerOrder) {

           result += 1.0;

       }

       if (m_exponent != 1.0) {

           result = Math.pow(result, m_exponent);

       }

       return result;

    }

}

       m_lowerOrder是指是K(x,y)=<x,y>^p还是K(x,y)=(<x,y>+1)^pm_exponent不用说当然是指数p的值。

protected final double dotProd(Instance inst1, Instance inst2)

       throws Exception {

 

    double result = 0;

 

    // we can do a fast dot product

    int n1 = inst1.numValues();

    int n2 = inst2.numValues();

    int classIndex = m_data.classIndex();

    for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {

       int ind1 = inst1.index(p1);

       int ind2 = inst2.index(p2);

       if (ind1 == ind2) {

           if (ind1 != classIndex) {

              result += inst1.valueSparse(p1) * inst2.valueSparse(p2);

           }

           p1++;

           p2++;

       } else if (ind1 > ind2) {

           p2++;

       } else {

           p1++;

       }

    }

    return (result);

}

       这个内积的计算,就是Platt论文50页中的伪代码。

// we can only cache if we know the indexes

if (id1 >= 0) {

 

    // Use full cache?

    if (m_cacheSize == 0) {

       if (m_kernelMatrix == null) {

           m_kernelMatrix = new double[m_data.numInstances()][];

           for (int i = 0; i < m_data.numInstances(); i++) {

              m_kernelMatrix[i] = new double[i + 1];

              for (int j = 0; j <= i; j++) {

                  m_kernelEvals++;

                  m_kernelMatrix[i][j] = evaluate(i, j, m_data

                         .instance(i));

              }

           }

       }

       m_cacheHits++;

       result = (id1 > id2) ? m_kernelMatrix[id1][id2]

              : m_kernelMatrix[id2][id1];

       return result;

    }

       注释第一行,我们只缓存我们知道的indexes,下面是不是全部缓存,如果还没有缓存过,那么分配空间,m_kernelMatrix是一个三角矩阵,计算就是用刚才看过的evaluate函数,因为m_kernelMatrix[id1][id2]=m_kernelMatrix[id2][id1]没有必要记录两次,只需在用的时候判断一下大小就可以了。

// Use LRU cache

    if (id1 > id2) {

       key = (id1 + ((long) id2 * m_numInsts));

    } else {

       key = (id2 + ((long) id1 * m_numInsts));

    }

    location = (int) (key % m_cacheSize) * m_cacheSlots;

    int loc = location;

    for (int i = 0; i < m_cacheSlots; i++) {

       long thiskey = m_keys[loc];

       if (thiskey == 0)

           break; // empty slot, so break out of loop early

       if (thiskey == (key + 1)) {

           m_cacheHits++;

           // move entry to front of cache (LRU) by swapping

           // only if it's not already at the front of cache

           if (i > 0) {

              double tmps = m_storage[loc];

              m_storage[loc] = m_storage[location];

              m_keys[loc] = m_keys[location];

              m_storage[location] = tmps;

              m_keys[location] = thiskey;

              return tmps;

           } else

              return m_storage[loc];

       }

       loc++;

    }

}

 

result = evaluate(id1, id2, inst1);

 

m_kernelEvals++;

 

// store result in cache

if (key != -1) {

    // move all cache slots forward one array index

    // to make room for the new entry

    System.arraycopy(m_keys, location, m_keys, location + 1,

           m_cacheSlots - 1);

    System.arraycopy(m_storage, location, m_storage, location + 1,

           m_cacheSlots - 1);

    m_storage[location] = result;

    m_keys[location] = (key + 1);

}

return result;

       注释提到的LRU算法全称是Least Recently Used。这里先看一下它的构造函数:

/**

 * Initializes the kernel cache. The actual size of the cache in bytes

 * is (64 * cacheSize).

 */

protected CachedKernel(Instances data, int cacheSize) {

    m_data = data;

    m_cacheSize = cacheSize;

    if (cacheSize > 0) {

 

       // Use LRU cache

       m_storage = new double[m_cacheSize * m_cacheSlots];

       m_keys = new long[m_cacheSize * m_cacheSlots];

    }

 

    m_numInsts = m_data.numInstances();

}

       m_storeagem_keys的大小都是m_cacheSize*m_cacheSlots。回到evalkey的计算也是很常规的,这就相当于是把id1看成了一个个数位,而id2是个m_numInsts位数。接下来计算location,如果m_keys[loc]==0表示这个slot是空的,跳出循环。如果为key+1,那么按LRU算法就要交换loclocation两个下标的m_storagem_keys的值。这个和操作系统中讲的差不多,一个slot的大小是m_cacheSlots,那么也就每m_cacheSlots个后是另一个keyslot了。Key!=-1下面的两个arraycopy是把最前面的那个位置移出来给最新的resultkey

  评论这张
 
阅读(1182)| 评论(0)
推荐 转载

历史上的今天

评论

<#--最新日志,群博日志--> <#--推荐日志--> <#--引用记录--> <#--博主推荐--> <#--随机阅读--> <#--首页推荐--> <#--历史上的今天--> <#--被推荐日志--> <#--上一篇,下一篇--> <#-- 热度 --> <#-- 网易新闻广告 --> <#--右边模块结构--> <#--评论模块结构--> <#--引用模块结构--> <#--博主发起的投票-->
 
 
 
 
 
 
 
 
 
 
 
 
 
 

页脚

网易公司版权所有 ©1997-2017