network.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133
  1. import re
  2. import random
  3. import numpy as np
  4. from pprint import pprint
  5. from docopt import docopt
  6. help = """Perceptron
  7. Usage:
  8. network.py [--layout=<layout>] [--save_file=<save_file>]
  9. Options:
  10. -h --help Display this help.
  11. --layout=<layout> Provide a layout to the network [default: 1_1].
  12. --save_file=<save_file> Pickle file to network state.
  13. Creates a neural composed by a bunch of sigmoids :)
  14. """
  15. class LayeredNetwork(object):
  16. """
  17. Make a network where each neurons of one layer are linked to each neurons of previous layer
  18. """
  19. def __init__(self, layout, eta=0.001):
  20. """
  21. A layout parameter is a string describing network architecture
  22. the pattern is for example 784_12_15_10, that create a layered network
  23. involving 784 inputs neurons, 2 hidden layers compose respectively of 12
  24. and 15 neurons and returns its results with a pool of 10 outputs. Of course
  25. hidden layers aren't mandatory so you can write 784_10 if you want and you
  26. will get 784 inputs linked to 10 outputs.
  27. eta handles network learning creativity
  28. """
  29. pattern = re.compile("([\d_]*)")
  30. matched = re.findall(pattern, layout)
  31. if not matched:
  32. raise ValueError("Incorrect layout provided")
  33. sizes = [ int(x) for x in matched[0].split("_") ]
  34. self.eta = eta
  35. self.num_layers = len(sizes)
  36. self.sizes = sizes
  37. self.biases = [ np.random.randn(y, 1) for y in sizes[1:] ]
  38. self.weights = [ np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:]) ]
  39. def schotastic_gradient_descent(self, batch, nb_trainings, batch_size):
  40. """
  41. Following a specific algorithm trains the network to handle various
  42. situation, batches is a list of (x, y), where x is the input vector
  43. and y is the output vector. nb_trainings specifies how many train will
  44. be performed and batch_size
  45. """
  46. print "Starting training"
  47. n = len(batches)
  48. for training_num in range(nb_trainings):
  49. random.shuffle(batches)
  50. training_data = [ batch[i:i+batch_size] for i in xrange(0, n batch_size) ]
  51. for batch in training_data:
  52. self.training(batch)
  53. print "End of training"
  54. def feedforward(self, a):
  55. """
  56. Return the output of the network if "a" is input
  57. """
  58. for b, w in zip(self.biases, self.weights):
  59. a = sigmoid(np.dot(w, a) + b)
  60. return a # output vecor of the last layer
  61. def training(self, batch):
  62. #initiate nabla list
  63. nabla_b = [ np.zeros(b.shape) for b in self.biases ]
  64. nabla_w = [ np.zeros(w.shape) for w in self.weights ]
  65. # launch different training inputs x and compute results regarding expected value y
  66. for x, y in batch:
  67. delta_nabla_b, delta_nabla_w = self.backpropagation(x, y)
  68. nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
  69. nabla_b = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
  70. #modify network biases and weights following network outputs
  71. self.weights = [w - (self.eta/len(batch)) * nw for w,nw in zip(self.weights, nabla_w)]
  72. self.weights = [b - (self.eta/len(batch)) * nb for w,nb in zip(self.biases, nabla_b)]
  73. def backpropagation(self, x, y):
  74. """
  75. x input vector
  76. y expected output vector
  77. return delta_nabla_b and delta_nabla_w, which are lists of optimized delta to append
  78. to current network biases and weight to reach with the fastest way the expected result
  79. """
  80. #initiate nabla list
  81. delta_nabla_b = [ np.zeros(b.shape) for b in self.biases ]
  82. delta_nabla_w = [ np.zeros(w.shape) for w in self.weights ]
  83. #initiate feed forwarding
  84. a = x
  85. activations = [x]
  86. zs = []
  87. #propagate inputs toward outputs
  88. for b,w in zip(self.biases, self.weights):
  89. z = np.dot(w, a) + b
  90. zs.append(z)
  91. a = sigmoid(z)
  92. activations.append(a)
  93. #output checking
  94. delta = (activations[-1] - y), sigmoid_prime(zs[-1])
  95. #backpropagation initialization
  96. delta_nabla_b[-1] = delta
  97. delta_nabla_w[-1] = np.dot(delta, activations[-2].transpose())
  98. #backpropagation
  99. for l in xrange(2, self.num_layers):
  100. z = zs[-l]
  101. delta = np.dot(self.weights[-l+1].transpose(), delta) * sigmoid_prime(z)
  102. delta_nabla_b[-l] = delta
  103. delta_nabla_w[-l] = np.dot(delta, activations[-l-1].transpose())
  104. return (delta_nabla_b, delta_nabla_w)
  105. def sigmoid(z):
  106. """
  107. An elementwise activation function
  108. z a vector
  109. """
  110. return 1.0 / (1.0 + np.exp(-z))
  111. def sigmoid_prime(z):
  112. """
  113. An elementwise prime activation function
  114. z a vector
  115. """
  116. return sigmoid(z) * (1.0 - sigmoid(z))
  117. if __name__ == '__main__':
  118. arguments = docopt(help)
  119. network = LayeredNetwork(layout=arguments["--layout"])