diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..10b472b --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/../../../../:\pythonproject\gogames\.idea/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/gogames.iml b/.idea/gogames.iml new file mode 100644 index 0000000..8dc09e5 --- /dev/null +++ b/.idea/gogames.iml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..03d9549 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..999906c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..1467427 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/CnnCode.py b/CnnCode.py new file mode 100644 index 0000000..acd3e97 --- /dev/null +++ b/CnnCode.py @@ -0,0 +1,141 @@ +from mxnet import nd + +import random +import math +from mxnet import gluon, init, nd, autograd +from mxnet import autograd, nd +from mxnet.gluon import nn +from mxnet.gluon import loss as gloss +class Inception(nn.Block): + def __init__(self,c1,c2,c3,c4,**kwargs): + super(Inception, self).__init__(**kwargs) + + self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu') + + # 线路 2, 1 x 1 卷积层后接 3 x 3 卷积层 + + self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu') + + self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1, + + activation='relu') + + # 线路 3, 1 x 1 卷积层后接 5 x 5 卷积层 + + self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu') + + self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2, + + activation='relu') + + # 线路 4, 3 x 3 最⼤池化层后接 1 x 1 卷积层 + + self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1) + + self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu') + def forward(self, x): + p1 = self.p1_1(x) + + p2 = self.p2_2(self.p2_1(x)) + p3 = self.p3_2(self.p3_1(x)) + + p4 = self.p4_2(self.p4_1(x)) + + return nd.concat(p1, p2, p3, p4, dim=1) +def batch_norm(X, gamma, beta, moving_mean, moving_var, eps, momentum): + + # 通过 autograd 来判断当前模式是训练模式还是预测模式 + + if not autograd.is_training(): + X_hat=(X-moving_mean)/nd.sqrt(moving_var+eps) + else: + assert len(X.shape) in (2,4) + if len(X.shape)==2: + mean=X.mean(axis=0) + var=((X-mean)**2).mean(axis=0) + else: + mean=X.mean(axis=(0,2,3),keepdims=True) + var=((X-mean)**2).mean(axis=(0,2,3),keepdims=True) + X_hat=(X-mean)/nd.sqrt(var+eps) + moving_mean=momentum*moving_mean+(1.0-momentum)*mean + moving_var=momentum*moving_var+(1.0-momentum)*var + Y=gamma*X_hat+beta + return Y,moving_mean,moving_var + +class BatchNorm(nn.Block): + def __init__(self, num_features, num_dims, **kwargs): + super(BatchNorm, self).__init__(**kwargs) + + if num_dims == 2: + shape = (1, num_features) + else: + shape=(1,num_features,1,1) + self.gamma=self.params.get('gamma',shape=shape,init=init.One()) + self.beta=self.params.get('beta',shape=shape,init=init.Zero()) + self.moving_mean=nd.zeros(shape) + self.moving_var=nd.zeros(shape) + def forward(self, X): + if self.moving_mean.context!=X.context: + self.moving_mean=self.moving_mean.copyto(X.context) + self.moving_var=self.moving_var.copyto(X.context) + Y, self.moving_mean, self.moving_var = batch_norm( + + X, self.gamma.data(), self.beta.data(), self.moving_mean, + + self.moving_var, eps=1e-5, momentum=0.9) + return Y +b1 = nn.Sequential() + + +b1.add( + nn.Conv2D(64, kernel_size=7, strides=2, padding=3, activation='relu'), + nn.MaxPool2D(pool_size=3, strides=2, padding=1) + +) +b2 = nn.Sequential() + +b2.add(nn.Conv2D(64, kernel_size=1, activation='relu'), + +nn.Conv2D(192, kernel_size=3, padding=1, activation='relu'),\ +nn.MaxPool2D(pool_size=3, strides=2, padding=1)) +b3 = nn.Sequential() + +b3.add(Inception(64, (96, 128), (16, 32), 32), + +Inception(128, (128, 192), (32, 96), 64), + +nn.MaxPool2D(pool_size=3, strides=2, padding=1)) + +b4 = nn.Sequential() + +b4.add(Inception(192, (96, 208), (16, 48), 64), + +Inception(160, (112, 224), (24, 64), 64), + +Inception(128, (128, 256), (24, 64), 64), + +Inception(112, (144, 288), (32, 64), 64), + +Inception(256, (160, 320), (32, 128), 128), + +nn.MaxPool2D(pool_size=3, strides=2, padding=1)) +b5 = nn.Sequential() + +b5.add(Inception(256, (160, 320), (32, 128), 128),\ +Inception(384, (192, 384), (48, 128), 128), + +nn.GlobalAvgPool2D()) + +net = nn.Sequential() +X = nd.random.uniform(shape=(1, 1, 96, 96)) +net.add(b1, b2, b3, b4, b5, nn.Dense(19*19)) +# +# for layer in net: +# +# X = layer(X) +# +# print(layer.name, 'output shape:\t', X.shape) +# if __name__ == '__main__': +# X = nd.random.uniform(shape=(1, 1, 19, 19)) +# res=net(X) +# print(res) \ No newline at end of file diff --git a/MultHeadAttention.py b/MultHeadAttention.py new file mode 100644 index 0000000..01f9a52 --- /dev/null +++ b/MultHeadAttention.py @@ -0,0 +1,68 @@ +from mxnet.gluon import nn +from mxnet import nd +import copy +def softmax_attention(x): + e=nd.exp(x-nd.max(x,axis=1,keepdims=True)) + s=nd.sum(e,axis=1,keepdims=True) + return e/s +class Context_Attention(nn.Block): + def __init__(self,**kwargs): + super(Context_Attention, self).__init__(**kwargs) + + self.dense1_layer=nn.Dense(units=19,activation="tanh") + + def forward(self, maxlen_input,h,st_1): + # print(self.collect_params()) + st=nd.reshape(st_1,shape=(st_1.shape[1],st_1.shape[2])) + st1=nd.repeat(st,repeats= maxlen_input,axis=1) + x=nd.concat(h,st1,dim=1) + x=self.dense1_layer(x) + alphas=softmax_attention(x) + context=nd.dot(alphas,h) + context=context.reshape(1,context.shape[0],context.shape[1]) + return context +# def scale_dot_product_attention(query,key,value,mask): +# +# depth= key.shape[-1] +# values=nd.dot(query,key,transpose_b=True)\ +# # /nd.sqrt(depth) +# values=values/nd.array([depth],dtype="float32") +# if mask is not None: +# values+=mask*-1e9 +# attention_weights=nd.softmax(values,axis=-1) +# output=nd.dot(attention_weights,value) +# return output +# class MultHeadAttention(nn.Block): +# def __init__(self,num_hiddens,num_heads,**kwargs): +# super(MultHeadAttention, self).__init__(**kwargs) +# self.query_dense=nn.Dense(num_hiddens,activation="sigmoid") +# self.num_heads=num_heads +# self.key_dense=nn.Dense(num_hiddens,activation="sigmoid") +# self.value_dense=nn.Dense(num_hiddens,activation="sigmoid") +# self.output_dens=nn.Dense(num_hiddens,activation="sigmoid") +# def transpose(self,X,batch_size): +# +# X=X.reshape(batch_size,-1,self.num_heads,10) +# X=nd.transpose(X,axes=(0,2,1,3)) +# return X +# def transpose_output(self,X,num_heads): +# X=X.reshape(-1,num_heads,X.shape[1],X.shape[2]) +# X=nd.transpose(X,axes=(0,2,1,3)) +# return X.reshape(X.shape[0],X.shape[1],-1) +# def forward(self, queries,keys,values,valid_lens): +# batch_size=queries.shape[0] +# queries=self.transpose(self.query_dense(queries),batch_size) +# keys=self.transpose(self.key_dense(keys),batch_size) +# values=self.transpose(self.value_dense(values),batch_size) +# output=scale_dot_product_attention(queries,keys,values,valid_lens) +# output_concat=self.transpose_output(output,self.num_heads) +# return self.output_dens(output_concat) +# if __name__ == '__main__': +# num_hiddens,num_heads=100,5 +# attention=MultHeadAttention(num_hiddens,num_heads) +# attention.initialize() +# batch_size,num_queries,num_kvparis,valid_lens=2,4,6,nd.array([3,2]) +# queris=nd.random.uniform(shape=(batch_size,num_queries,num_hiddens)) +# values=nd.random.uniform(shape=(batch_size,num_kvparis,num_hiddens)) +# output=attention(queris,values,values,valid_lens) +# print(output) \ No newline at end of file diff --git a/OutPut.py b/OutPut.py new file mode 100644 index 0000000..e69de29 diff --git a/RnnCode.py b/RnnCode.py new file mode 100644 index 0000000..5cbda2b --- /dev/null +++ b/RnnCode.py @@ -0,0 +1,92 @@ +from mxnet.gluon import nn,rnn +from mxnet import nd + +import mxnet as mx +from mxnet.gluon import loss as gloss, nn, rnn +from MultHeadAttention import Context_Attention +from mxnet import autograd, gluon, init, nd + +def try_gpu(): + + # 本函数已保存在 d2lzh 包中⽅便以后使⽤ + + try: + + ctx = mx.gpu() + + _ = nd.zeros((1,), ctx=ctx) + + except mx.base.MXNetError: + + ctx = mx.cpu() + + return ctx + +class RnnModel(nn.Block): + def __init__(self,EMBEDDING_DIM,INPUT_DIM,LATENT_DIM,**kwargs): + super(RnnModel, self).__init__(**kwargs) + self.Embedding_dim=EMBEDDING_DIM + self.Input_DIM=INPUT_DIM + self.Latent_dim=LATENT_DIM + self.decoder_lstm=rnn.LSTM(LATENT_DIM) + + self.context_attention=Context_Attention() + self.embedding=nd.Embedding + def begin_state(self, *args, **kwargs): + return self.decoder_lstm.begin_state(*args, **kwargs) + def forward(self, maxlen_output,maxlen_input,decoder_inputs,weight,encoder_output,s,c): + self.decoder_input = self.embedding(weight,decoder_inputs,self.Input_DIM,self.Embedding_dim) + outputs=[] + for i in range(maxlen_output): + context=self.context_attention(maxlen_input,encoder_output,s) + if i>=self.decoder_input.shape[0]: + + selector=nn.Lambda(lambda x:x[[-1],:,:]) + else: + selector=nn.Lambda(lambda x:x[[i],:,:]) + x_t=selector(self.decoder_input) + decoder_lstm_input=nd.Concat(context,x_t,dim=0) + output,state=self.decoder_lstm(decoder_lstm_input,(s,c)) + s,c=state + output= output.reshape((output.shape[0],output.shape[1]*output.shape[2])) + decoder_outputs=nd.softmax(output,axis=1) + decoder_outputs=nd.max(decoder_outputs,axis=0) + + if i==0: + outputs=nd.max(decoder_outputs) + else: + outputs=nd.concat(outputs,nd.max(decoder_outputs),dim=0) + outputs.attach_grad() + return outputs + +# if __name__ == '__main__': +# ctx=try_gpu() +# array=nd.random.normal(shape=(19,19)) +# decoder_inputs=nd.array([[int(i) for i in range(j*19,j*19+19)] for j in range(19*19)],dtype="float32") +# weight=nd.array([[0,3,4,3,3,2,3,1,2,3,4,5,6,7,8,10,11,12,13]]) +# +# r=RnnModel(EMBEDDING_DIM=19,INPUT_DIM=19*19,LATENT_DIM=19 ) +# s, c = r.begin_state(batch_size=19, ctx=ctx) +# r.initialize() +# output=r(19*19,200,decoder_inputs,weight,array,s,c) +# print(output.shape) +# poss = nd.zeros(shape=(361,), dtype="float32") +# poss[0] = 0.0065 +# poss[1] = 0.0025 +# poss[25] = 0.00013 +# poss[34] = 0.0012 +# poss[44] = 0.0011 +# poss[90] = 0.001023 +# poss[100] = 0.00023 +# +# s.detach() +# c.detach() +# loss = gloss.L2Loss() +# trainer = gluon.Trainer(r.collect_params(), 'sgd', +# +# {'learning_rate': 1e2, 'momentum': 0, 'wd': 0}) +# print(r.collect_params()) +# with autograd.record(): +# l = loss(output, poss).sum() +# l.backward() +# trainer.step(1) diff --git a/Sequential.py b/Sequential.py new file mode 100644 index 0000000..75335fa --- /dev/null +++ b/Sequential.py @@ -0,0 +1,38 @@ +from mxnet.gluon import nn,rnn +from mxnet import nd +from CnnCode import net +import copy +import mxnet as mx +from mxnet import gluon, init, nd, autograd +import random +from RnnCode import RnnModel,try_gpu +class Sequential(nn.Block): + def __init__(self,EMBEDDING_DIM,INPUT_DIM,LATENT_DIM,**kwargs): + super(Sequential, self).__init__(**kwargs) + self.convolution=net + self.dequence=RnnModel(EMBEDDING_DIM,INPUT_DIM,LATENT_DIM) + self.begin_state=self.dequence.begin_state + def forward(self, chessboard,maxlen_output,maxlen_input,decoder_inputs,weight,s,c): + chessboard=chessboard.reshape(1,1,chessboard.shape[0],chessboard.shape[1]) + encoder_sequence=net(chessboard) + encoder_output=encoder_sequence.reshape(19,19) + output=self.dequence(maxlen_output,maxlen_input,decoder_inputs,weight,encoder_output,s,c) + + return output +# if __name__ == '__main__': +# initlist=[0,0,0,0,135,255,0,0,0,135,0,255,135,0,0,0,0,0,0] +# def shuf(seq): +# random.seed(10) +# s=copy.deepcopy(seq) +# random.shuffle(s) +# return s +# chess_board=nd.array(list(map(lambda index:shuf(initlist),range(19)))) +# print(chess_board.shape) +# seq=Sequential(EMBEDDING_DIM=19,INPUT_DIM=19*19,LATENT_DIM=19) +# decoder_inputs = nd.array([[int(i) for i in range(j * 19, j * 19 + 19)] for j in range(19 * 19)], dtype="float32") +# weight = nd.array([[0, 3, 4, 3, 3, 2, 3, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], +# [1, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 55, 66, 47, 28, 120, 121, 112, 213]]) +# ctx=try_gpu() +# s, c=seq.begin_state(batch_size=19, ctx=ctx) +# seq.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) +# seq(chess_board,9*19,200,decoder_inputs,weight,s,c) diff --git a/__pycache__/CnnCode.cpython-39.pyc b/__pycache__/CnnCode.cpython-39.pyc new file mode 100644 index 0000000..1317d4f Binary files /dev/null and b/__pycache__/CnnCode.cpython-39.pyc differ diff --git a/__pycache__/MultHeadAttention.cpython-39.pyc b/__pycache__/MultHeadAttention.cpython-39.pyc new file mode 100644 index 0000000..821bf40 Binary files /dev/null and b/__pycache__/MultHeadAttention.cpython-39.pyc differ diff --git a/__pycache__/RnnCode.cpython-39.pyc b/__pycache__/RnnCode.cpython-39.pyc new file mode 100644 index 0000000..66a6309 Binary files /dev/null and b/__pycache__/RnnCode.cpython-39.pyc differ diff --git a/__pycache__/Sequential.cpython-39.pyc b/__pycache__/Sequential.cpython-39.pyc new file mode 100644 index 0000000..5bb3e72 Binary files /dev/null and b/__pycache__/Sequential.cpython-39.pyc differ diff --git a/bugscreenshot.JPG b/bugscreenshot.JPG new file mode 100644 index 0000000..8861013 Binary files /dev/null and b/bugscreenshot.JPG differ diff --git a/main.py b/main.py new file mode 100644 index 0000000..0f8a58f --- /dev/null +++ b/main.py @@ -0,0 +1,82 @@ +# This is a sample Python script. +from mxnet.gluon import nn,rnn +from mxnet import nd +from Sequential import Sequential +import mxnet as mx +import random +import copy +from RnnCode import try_gpu +from mxnet import autograd, gluon, init, nd +from mxnet.gluon import loss as gloss, nn, rnn +# Press Shift+F10 to execute it or replace it with your code. +# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. +def grad_clipping(params, theta, ctx): + + norm = nd.array([0], ctx) + for param in params: + + norm += (param.grad ** 2).sum() + norm = norm.sqrt().asscalar() + + if norm > theta: + + for param in params: + + param.grad[:] *= theta / norm +def TraingingModel(chess_board, weights, possibilities): + num_epochs=8 + ctx=try_gpu() + model=Sequential(EMBEDDING_DIM=19,INPUT_DIM=19*19,LATENT_DIM=19) + decoder_inputs = nd.array([[int(i) for i in range(j * 19, j * 19 + 19)] for j in range(19 * 19)], dtype="float32") + model.initialize(force_reinit=True, ctx=ctx, init=init.Xavier()) + trainer = gluon.Trainer(model.collect_params(), 'sgd', + + {'learning_rate': 1e2, 'momentum': 0, 'wd': 0}) + # print(model.collect_params()) + clipping_theta=1e-2 + loss = gloss.L2Loss() + + for epoch in range(num_epochs): + + for X,y,weight in zip(chess_board,possibilities,weights): + s, c = model.begin_state(batch_size=19, ctx=ctx) + s.detach() + c.detach() + with autograd.record(): + # for layer in model: + # X = layer(X,19*19,200,decoder_inputs,weight,s,c) + # + # print(layer.name, 'output shape:\t', X.shape) + output = model(X,19*19,200,decoder_inputs,weight,s,c) + l=loss(output,y).sum() + l.backward() + params = [p.data() for p in model.collect_params().values()] + grad_clipping(params, clipping_theta, ctx) + trainer.step(1) + print("{} epoch".format(epoch)+str(l)) + + +# Press the green button in the gutter to run the script. +if __name__ == '__main__': + weight = nd.array([[0, 3, 4, 3, 3, 2, 3, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13], + [1, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 55, 66, 47, 28, 120, 121, 112, 213]]) + weights=[weight] + initlist = [0, 0, 0, 0, 135, 255, 0, 0, 0, 135, 0, 255, 135, 0, 0, 0, 0, 0, 0] + def shuf(seq): + random.seed(10) + s=copy.deepcopy(seq) + random.shuffle(s) + return s + chess_board=nd.array(list(map(lambda index:shuf(initlist),range(19)))) + chess_board=[chess_board] + poss=nd.zeros(shape=(361,),dtype="float32") + poss[0]=0.0065 + poss[1]=0.0025 + poss[25]=0.00013 + poss[34]=0.0012 + poss[44]=0.0011 + poss[90]=0.001023 + poss[100]=0.00023 + possibilities=[poss] + TraingingModel(chess_board, weights, possibilities) +# See PyCharm help at https://www.jetbrains.com/help/pycharm/ diff --git a/themodeldesign.png b/themodeldesign.png new file mode 100644 index 0000000..dcbece5 Binary files /dev/null and b/themodeldesign.png differ