从今天开始要多做一些关于机器学习方面的竞赛题目,题目来源主要是Hackerrank和Kaggle。链接如下
Hackerrank:https://www.hackerrank.com/
Kaggle:https://www.kaggle.com/
在Hackerrank中提交源代码,这就使得很多库都需要自己写,限制比较多。而Kaggle只需要提交数据,所以随便怎么搞都行。现在来讲第一道题,房价预测,这是Andrew Ng课程里的比较经典的例子。题目描述如下
题目:https://www.hackerrank.com/challenges/predicting-house-prices
分析:比较简单,用梯度下降法即可。
代码:
#coding:utf-8 class Data: def __init__(self): self.x = [] self.y = 0.0 def WX(d, w): ans = 0.0 for i in range(0, len(w)): ans += w[i] * d.x[i] return ans def Gradient(d, w, alpha): for i in range(0, len(w)): tmp = 0.0 for j in range(0, len(d)): tmp += alpha * d[j].x[i] * (WX(d[j], w) - d[j].y) w[i] -= tmp def getValues(d, w): res = 0.0 for i in range(0, len(d)): tmp = WX(d[i], w) res += (d[i].y - tmp) * (d[i].y - tmp) return res def Iterator(d, w): alpha = 0.005 delta = 0.000001 oldVal = getValues(d, w) Gradient(d, w, alpha) newVal = getValues(d, w) while abs(oldVal - newVal) > delta: oldVal = newVal Gradient(d, w, alpha) newVal = getValues(d, w) def main(): while True: try: d = [] w = [] F, N = map(int, raw_input().split()) for i in range(0, N): t = Data() t.x = map(float, raw_input().split()) t.x.insert(0, 1.0) t.y = t.x.pop() d.append(t) for i in range(0, F + 1): w.append(0) Iterator(d, w) N = int(raw_input()) for i in range(0, N): t = Data() t.x = map(float, raw_input().split()) t.x.insert(0, 1.0) print '%.2f'% WX(t, w) except EOFError: break if __name__ == '__main__': main()