Skip to content

Commit 8167a1b

Browse files
committed
update
1 parent 8a604e9 commit 8167a1b

File tree

121 files changed

+13088
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

121 files changed

+13088
-0
lines changed
2.63 KB
Binary file not shown.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <cstring>
4+
#include <iostream>
5+
#include <algorithm>
6+
#include <functional>
7+
#include <array>
8+
using namespace std;
9+
const int dim = 2;
10+
using vec = array<double, dim>;
11+
/*
12+
* grad: 计算梯度的函数
13+
* pos: 搜索的起始点
14+
* lr: 学习率(移动的步长)
15+
* decay: 学习率的衰减指数
16+
* exit: 当学习率小于exit时算法结束
17+
* 注意该算法求出的是 函数的极**小**值点
18+
*/
19+
vec Adam(function<vec(vec)> grad, vec pos, double lr, double decay, double exit,
20+
const double beta1 = 0.9, const double beta2 = 0.9) {
21+
double pw1 = 1, pw2 = 1;
22+
vec m = {}, v = {};
23+
for (int t = 1; lr >= exit; ++t) {
24+
vec g = grad(pos);
25+
pw1 *= beta1;
26+
pw2 *= beta2;
27+
for (int i = 0; i < dim; ++i) {
28+
m[i] = m[i] * beta1 + g[i] * (1 - beta1);
29+
v[i] = v[i] * beta2 + g[i] * g[i] * (1 - beta2);
30+
double update = m[i] / (1 - pw1);
31+
double factor = lr / (sqrt(v[i] / (1 - pw2)) + 1e-8);
32+
pos[i] -= update * factor;
33+
}
34+
lr *= decay;
35+
}
36+
return pos;
37+
}
38+
39+
int main() {
40+
auto grad = [](vec A) {
41+
double x = A[0], y = A[1];
42+
return vec{ 2 * y + -4 / (x * x), 2 * x + -4 / (y * y) };
43+
};
44+
auto ans = Adam(grad, vec{ 1, 1 }, 0.01, 0.999, 1e-8);
45+
printf("%.10f %.10f\n", ans[0], ans[1]); //1.2599210498948732
46+
return 0;
47+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <cstring>
4+
#include <iostream>
5+
#include <algorithm>
6+
#include <random>
7+
#include <tuple>
8+
#include <vector>
9+
#include <map>
10+
using namespace std;
11+
const int nrow = 10, ncol = 10, max_state = nrow * ncol, max_action = 4;
12+
const int dr[4] = { -1, 1, 0, 0 };
13+
const int dc[4] = { 0, 0, -1, 1 };
14+
struct CliffWalkingEnv {
15+
char s[11][11];
16+
int x, y;
17+
vector<pair<int, int>> path;
18+
CliffWalkingEnv() {
19+
reset();
20+
}
21+
int reset() {
22+
x = 0, y = 0;
23+
path.clear();
24+
path.emplace_back(x, y);
25+
memset(s, 0, sizeof(s));
26+
for (int i = 0; i < nrow; ++i) {
27+
for (int j = 0; j < ncol; ++j) {
28+
s[i][j] = '.';
29+
}
30+
}
31+
for (int i = 0; i < 5; ++i) {
32+
for (int j = 1; j < ncol - 1; ++j) {
33+
s[i][j] = '#';
34+
}
35+
}
36+
return x * ncol + y;
37+
}
38+
void print() {
39+
char t[11][11];
40+
memcpy(t, s, sizeof(s));
41+
for (auto [x, y] : path) {
42+
if (t[x][y] >= '1' && t[x][y] < '9')
43+
t[x][y] += 1;
44+
else
45+
t[x][y] = '1';
46+
}
47+
for (int i = 0; i < nrow; ++i)
48+
printf("%s\n", t[i]);
49+
printf("\n");
50+
}
51+
tuple<int, int, int> step(int number) {
52+
int nx = x + dr[number], ny = y + dc[number];
53+
if (nx >= 0 && nx < nrow && ny >= 0 && ny < ncol)
54+
x = nx, y = ny;
55+
int reward = -1, done = false;
56+
if (s[x][y] == '#') {
57+
reward = -100;
58+
done = true;
59+
}
60+
if (x == 0 && y == ncol - 1)
61+
done = true;
62+
path.emplace_back(x, y);
63+
return make_tuple(x * ncol + y, reward, done);
64+
}
65+
} env;
66+
67+
const double alpha = 1e-1; /*学习率*/
68+
const double gamma = 0.9; /*折扣因子*/
69+
const double epsilon = 1e-2; /*epsilon-greedy*/
70+
const int N = 10; //Q-Planning的次数
71+
const int max_episode = 200; //训练多少轮
72+
uniform_real_distribution<double> p;
73+
uniform_int_distribution<int> d;
74+
default_random_engine e;
75+
double Q[max_state][max_action];
76+
map<pair<int, int>, int> id;
77+
vector<tuple<int, int, int, int>> model;
78+
int epsilon_greedy(int state) { /*基于epsilon-greedy选择动作*/
79+
if (p(e) < epsilon)
80+
return d(e) % max_action;
81+
return max_element(Q[state], Q[state] + max_action) - Q[state];
82+
}
83+
void learn(int s0, int a0, int r, int s1) {
84+
auto td_error = r + gamma * *max_element(Q[s1], Q[s1] + max_action) - Q[s0][a0];
85+
Q[s0][a0] += alpha * td_error;
86+
}
87+
void update(int s0, int a0, int r, int s1) {
88+
learn(s0, a0, r, s1);
89+
pair<int, int> pr(s0, a0);
90+
if (!id.count(pr)) {
91+
id[pr] = model.size();
92+
model.emplace_back(s0, a0, r, s1);
93+
}
94+
else {
95+
model[id[pr]] = make_tuple(s0, a0, r, s1);
96+
}
97+
for (int i = 0; i < N; ++i) {
98+
int idx = d(e) % model.size();
99+
auto [s0, a0, r, s1] = model[idx];
100+
learn(s0, a0, r, s1);
101+
}
102+
}
103+
void DynaQ() {
104+
for (int i = 0; i < max_episode; ++i) {
105+
int state = env.reset();
106+
for (;;) {
107+
int action = epsilon_greedy(state);
108+
auto [next_state, reward, done] = env.step(action);
109+
update(state, action, reward, next_state);
110+
state = next_state;
111+
if (done)
112+
break;
113+
}
114+
env.print();
115+
}
116+
}
117+
int main() {
118+
DynaQ();
119+
return 0;
120+
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#include <cstdio>
2+
#include <cstdlib>
3+
#include <cstring>
4+
#include <iostream>
5+
#include <algorithm>
6+
#include <vector>
7+
#include <array>
8+
#include <cmath>
9+
#include <ctime>
10+
using namespace std;
11+
const int maxn = 210000;
12+
const int maxdim = 1001;
13+
const double eps = 1e-8;
14+
using matrix = double[maxdim][maxdim];
15+
using vec = array<double, maxdim>;
16+
using pair_t = pair<double, vec>;
17+
struct PCA {
18+
matrix A, V;
19+
int column[maxdim], n;
20+
void update(int r, int c, double v) {
21+
A[r][c] = v;
22+
if (column[r] == c || fabs(A[r][c]) > fabs(A[r][column[r]])) {
23+
for (int i = 0; i < n; ++i) if (i != r)
24+
if (fabs(A[r][i]) > fabs(A[r][column[r]]))
25+
column[r] = i;
26+
}
27+
}
28+
void Jacobi() {
29+
for (int i = 0; i < n; ++i) {
30+
for (int j = 0; j < n; ++j)
31+
V[i][j] = 0;
32+
V[i][i] = 1;
33+
column[i] = (i == 0 ? 1 : 0);
34+
}
35+
for (int i = 0; i < n; ++i)
36+
for (int j = 0; j < n; ++j)
37+
if (j != i && fabs(A[i][j]) > fabs(A[i][column[i]]))
38+
column[i] = j;
39+
for (int T = 0; ; ++T) { //迭代次数限制
40+
int x, y;
41+
double val = 0;
42+
for (int i = 0; i < n; ++i)
43+
if (fabs(A[i][column[i]]) > val)
44+
val = fabs(A[i][column[i]]), x = i, y = column[i];
45+
if (val < eps) //精度限制
46+
break;
47+
double phi = atan2(-2 * A[x][y], A[y][y] - A[x][x]) / 2;
48+
double sinp = sin(phi), cosp = cos(phi);
49+
for (int i = 0; i < n; ++i) if (i != x && i != y) {
50+
double a = A[x][i] * cosp + A[y][i] * sinp;
51+
double b = A[x][i] * -sinp + A[y][i] * cosp;
52+
update(x, i, a);
53+
update(y, i, b);
54+
}
55+
for (int i = 0; i < n; ++i) if (i != x && i != y) {
56+
double a = A[i][x] * cosp + A[i][y] * sinp;
57+
double b = A[i][x] * -sinp + A[i][y] * cosp;
58+
update(i, x, a);
59+
update(i, y, b);
60+
}
61+
for (int i = 0; i < n; ++i) {
62+
double a = V[i][x] * cosp + V[i][y] * sinp;
63+
double b = V[i][x] * -sinp + V[i][y] * cosp;
64+
V[i][x] = a, V[i][y] = b;
65+
}
66+
double a = A[x][x] * cosp * cosp + A[y][y] * sinp * sinp + 2 * A[x][y] * cosp * sinp;
67+
double b = A[x][x] * sinp * sinp + A[y][y] * cosp * cosp - 2 * A[x][y] * cosp * sinp;
68+
double tmp = (A[y][y] - A[x][x]) * sin(2 * phi) / 2 + A[x][y] * cos(2 * phi);
69+
update(x, y, tmp);
70+
update(y, x, tmp);
71+
A[x][x] = a, A[y][y] = b;
72+
}
73+
}
74+
//a 为输入向量组
75+
//n 为向量的维数
76+
//center 指针用来保存输入向量组的中心点
77+
//返回特征值和特征向量的pair,按照特征值从大到小排序
78+
//特征值是各个点在对应特征向量方向的坐标平方和,除以(a.size() - 1)为方差。
79+
auto solve(vector<vec> a, int n, vec* center = nullptr) {
80+
this->n = n;
81+
vec s = {};
82+
for (int i = 0; i < a.size(); ++i)
83+
for (int j = 0; j < n; ++j)
84+
s[j] += a[i][j];
85+
for (int j = 0; j < n; ++j)
86+
s[j] /= a.size();
87+
for (int i = 0; i < a.size(); ++i)
88+
for (int j = 0; j < n; ++j)
89+
a[i][j] -= s[j];
90+
if (center) *center = s;
91+
for (int i = 0; i < n; ++i) {
92+
for (int j = 0; j < n; ++j) {
93+
A[i][j] = 0;
94+
for (int k = 0; k < a.size(); ++k)
95+
A[i][j] += a[k][i] * a[k][j];
96+
}
97+
}
98+
Jacobi();
99+
vector<pair_t> result;
100+
for (int i = 0; i < n; ++i)
101+
result.emplace_back(A[i][i], vec());
102+
for (int i = 0; i < n; ++i)
103+
for (int j = 0; j < n; ++j)
104+
result[i].second[j] = V[j][i];
105+
sort(result.begin(), result.end(), greater<pair_t>());
106+
return result;
107+
}
108+
}pca;
109+
int main() { //1329070654.526
110+
freopen("in.txt", "r", stdin);
111+
vector<vec> a;
112+
int n, m;
113+
scanf("%d %d", &n, &m);
114+
for (int i = 0; i < n; ++i) {
115+
vec v = {};
116+
for (int j = 0; j < m; ++j)
117+
scanf("%lf", &v[j]);
118+
a.push_back(v);
119+
}
120+
auto now = clock();
121+
auto result = pca.solve(a, m);
122+
for (int i = 0; i < m; ++i)
123+
printf("%.3f\n", result[i].first);
124+
printf("time: %f\n", double(clock() - now) / CLOCKS_PER_SEC);
125+
return 0;
126+
}

0 commit comments

Comments
 (0)