Skip to content

Commit 2b55cda

Browse files
burmanmemerkle826
andcommitted
Add notification details to the repair process (#338)
* Add repair job version which follows notifications * Add new getStatusChanges and message * Fix formatting * Add tests for the async endpoint in v1 also * Change setStatusChange * Fix test indentation * Rename RpcParam * Fix rebase * Add back the payload in the IT test to the request * Update openapi.json, make special case for return id 0 to the Rpc call - should fix the test also which can not be run otherwise on single node cluster * Disable the test, throw exception if Cassandra refuses to repair * Fix repair test to run repair for a keyspace with RF 2 (#351) * Update openapi.json after rebase --------- Co-authored-by: Erik Merkle <erik.merkle@datastax.com>
1 parent 8be160c commit 2b55cda

File tree

9 files changed

+412
-23
lines changed

9 files changed

+412
-23
lines changed

management-api-agent-common/src/main/java/com/datastax/mgmtapi/NodeOpsProvider.java

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.concurrent.CompletableFuture;
3939
import java.util.concurrent.ExecutionException;
4040
import java.util.stream.Collectors;
41+
import javax.management.NotificationFilter;
4142
import javax.management.openmbean.CompositeDataSupport;
4243
import javax.management.openmbean.TabularData;
4344
import org.apache.cassandra.auth.AuthenticatedUser;
@@ -52,6 +53,7 @@
5253
import org.apache.cassandra.repair.messages.RepairOption;
5354
import org.apache.cassandra.service.StorageProxy;
5455
import org.apache.cassandra.utils.Pair;
56+
import org.apache.cassandra.utils.progress.ProgressEventType;
5557
import org.slf4j.Logger;
5658
import org.slf4j.LoggerFactory;
5759

@@ -733,10 +735,11 @@ public void clearSnapshots(
733735
}
734736

735737
@Rpc(name = "repair")
736-
public void repair(
738+
public String repair(
737739
@RpcParam(name = "keyspaceName") String keyspace,
738740
@RpcParam(name = "tables") List<String> tables,
739-
@RpcParam(name = "full") Boolean full)
741+
@RpcParam(name = "full") Boolean full,
742+
@RpcParam(name = "notifications") boolean notifications)
740743
throws IOException {
741744
// At least one keyspace is required
742745
if (keyspace != null) {
@@ -756,8 +759,76 @@ public void repair(
756759
// incremental repairs will fail if parallelism is not set
757760
repairSpec.put(RepairOption.PARALLELISM_KEY, RepairParallelism.PARALLEL.getName());
758761
}
759-
ShimLoader.instance.get().getStorageService().repairAsync(keyspace, repairSpec);
762+
763+
// Since Cassandra provides us with a async, we don't need to use our executor interface for
764+
// this.
765+
final int repairJobId =
766+
ShimLoader.instance.get().getStorageService().repairAsync(keyspace, repairSpec);
767+
768+
if (!notifications) {
769+
return Integer.valueOf(repairJobId).toString();
770+
}
771+
772+
String jobId = String.format("repair-%d", repairJobId);
773+
final Job job = service.createJob("repair", jobId);
774+
775+
if (repairJobId == 0) {
776+
// Job is done and won't continue
777+
job.setStatusChange(ProgressEventType.COMPLETE, "");
778+
job.setStatus(Job.JobStatus.COMPLETED);
779+
job.setFinishedTime(System.currentTimeMillis());
780+
service.updateJob(job);
781+
return job.getJobId();
782+
}
783+
784+
ShimLoader.instance
785+
.get()
786+
.getStorageService()
787+
.addNotificationListener(
788+
(notification, handback) -> {
789+
if (notification.getType().equals("progress")) {
790+
Map<String, Integer> data = (Map<String, Integer>) notification.getUserData();
791+
ProgressEventType progress = ProgressEventType.values()[data.get("type")];
792+
793+
switch (progress) {
794+
case START:
795+
job.setStatusChange(progress, notification.getMessage());
796+
job.setStartTime(System.currentTimeMillis());
797+
break;
798+
case NOTIFICATION:
799+
case PROGRESS:
800+
break;
801+
case ERROR:
802+
case ABORT:
803+
job.setError(new RuntimeException(notification.getMessage()));
804+
job.setStatus(Job.JobStatus.ERROR);
805+
job.setFinishedTime(System.currentTimeMillis());
806+
break;
807+
case SUCCESS:
808+
job.setStatusChange(progress, notification.getMessage());
809+
// SUCCESS / ERROR does not mean the job has completed yet (COMPLETE is that)
810+
break;
811+
case COMPLETE:
812+
job.setStatusChange(progress, notification.getMessage());
813+
job.setStatus(Job.JobStatus.COMPLETED);
814+
job.setFinishedTime(System.currentTimeMillis());
815+
break;
816+
}
817+
service.updateJob(job);
818+
}
819+
},
820+
(NotificationFilter)
821+
notification -> {
822+
final int repairNo =
823+
Integer.parseInt(((String) notification.getSource()).split(":")[1]);
824+
return repairNo == repairJobId;
825+
},
826+
null);
827+
828+
return job.getJobId();
760829
}
830+
831+
throw new RuntimeException("At least one keyspace must be defined");
761832
}
762833

763834
@Rpc(name = "move")

management-api-agent-common/src/main/java/com/datastax/mgmtapi/util/Job.java

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
package com.datastax.mgmtapi.util;
77

88
import com.google.common.annotations.VisibleForTesting;
9-
import java.util.UUID;
9+
import java.util.ArrayList;
10+
import java.util.List;
11+
import org.apache.cassandra.utils.progress.ProgressEventType;
1012

1113
public class Job {
1214
public enum JobStatus {
@@ -19,14 +21,43 @@ public enum JobStatus {
1921
private String jobType;
2022
private JobStatus status;
2123
private long submitTime;
24+
private long startTime;
2225
private long finishedTime;
2326
private Throwable error;
2427

25-
public Job(String jobType) {
28+
public class StatusChange {
29+
ProgressEventType status;
30+
long changeTime;
31+
32+
String message;
33+
34+
public StatusChange(ProgressEventType type, String message) {
35+
changeTime = System.currentTimeMillis();
36+
status = type;
37+
this.message = message;
38+
}
39+
40+
public ProgressEventType getStatus() {
41+
return status;
42+
}
43+
44+
public long getChangeTime() {
45+
return changeTime;
46+
}
47+
48+
public String getMessage() {
49+
return message;
50+
}
51+
}
52+
53+
private List<StatusChange> statusChanges;
54+
55+
public Job(String jobType, String jobId) {
2656
this.jobType = jobType;
27-
jobId = UUID.randomUUID().toString();
57+
this.jobId = jobId;
2858
submitTime = System.currentTimeMillis();
2959
status = JobStatus.WAITING;
60+
statusChanges = new ArrayList<>();
3061
}
3162

3263
@VisibleForTesting
@@ -51,6 +82,14 @@ public void setStatus(JobStatus status) {
5182
this.status = status;
5283
}
5384

85+
public void setStatusChange(ProgressEventType type, String message) {
86+
statusChanges.add(new StatusChange(type, message));
87+
}
88+
89+
public List<StatusChange> getStatusChanges() {
90+
return statusChanges;
91+
}
92+
5493
public long getSubmitTime() {
5594
return submitTime;
5695
}
@@ -70,4 +109,8 @@ public Throwable getError() {
70109
public void setError(Throwable error) {
71110
this.error = error;
72111
}
112+
113+
public void setStartTime(long startTime) {
114+
this.startTime = startTime;
115+
}
73116
}

management-api-agent-common/src/main/java/com/datastax/mgmtapi/util/JobExecutor.java

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import com.google.common.cache.Cache;
99
import com.google.common.cache.CacheBuilder;
10+
import java.util.UUID;
1011
import java.util.concurrent.CompletableFuture;
1112
import java.util.concurrent.ExecutorService;
1213
import java.util.concurrent.Executors;
@@ -20,29 +21,40 @@ public class JobExecutor {
2021
public Pair<String, CompletableFuture<Void>> submit(String jobType, Runnable runnable) {
2122
// Where do I create the job details? Here? Add it to the Cache first?
2223
// Update the status on the callbacks and do nothing else?
23-
final Job job = new Job(jobType);
24-
jobCache.put(job.getJobId(), job);
24+
25+
String jobId = UUID.randomUUID().toString();
26+
final Job job = createJob(jobType, jobId);
2527

2628
CompletableFuture<Void> submittedJob =
2729
CompletableFuture.runAsync(runnable, executorService)
2830
.thenAccept(
2931
empty -> {
3032
job.setStatus(Job.JobStatus.COMPLETED);
3133
job.setFinishedTime(System.currentTimeMillis());
32-
jobCache.put(job.getJobId(), job);
34+
updateJob(job);
3335
})
3436
.exceptionally(
3537
t -> {
3638
job.setStatus(Job.JobStatus.ERROR);
3739
job.setError(t);
3840
job.setFinishedTime(System.currentTimeMillis());
39-
jobCache.put(job.getJobId(), job);
41+
updateJob(job);
4042
return null;
4143
});
4244

4345
return Pair.create(job.getJobId(), submittedJob);
4446
}
4547

48+
public Job createJob(String jobType, String jobId) {
49+
final Job job = new Job(jobType, jobId);
50+
jobCache.put(jobId, job);
51+
return job;
52+
}
53+
54+
public void updateJob(Job job) {
55+
jobCache.put(job.getJobId(), job);
56+
}
57+
4658
public Job getJobWithId(String jobId) {
4759
return jobCache.getIfPresent(jobId);
4860
}

management-api-server/doc/openapi.json

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,6 +1520,45 @@
15201520
"summary" : "Rebuild data by streaming data from other nodes. This operation returns immediately with a job id."
15211521
}
15221522
},
1523+
"/api/v1/ops/node/repair" : {
1524+
"post" : {
1525+
"operationId" : "repair_1",
1526+
"requestBody" : {
1527+
"content" : {
1528+
"*/*" : {
1529+
"schema" : {
1530+
"$ref" : "#/components/schemas/RepairRequest"
1531+
}
1532+
}
1533+
}
1534+
},
1535+
"responses" : {
1536+
"202" : {
1537+
"content" : {
1538+
"text/plain" : {
1539+
"example" : "repair-1234567",
1540+
"schema" : {
1541+
"type" : "string"
1542+
}
1543+
}
1544+
},
1545+
"description" : "Job ID for successfully scheduled Cassandra repair request"
1546+
},
1547+
"400" : {
1548+
"content" : {
1549+
"text/plain" : {
1550+
"example" : "keyspaceName must be specified",
1551+
"schema" : {
1552+
"type" : "string"
1553+
}
1554+
}
1555+
},
1556+
"description" : "Repair request missing Keyspace name"
1557+
}
1558+
},
1559+
"summary" : "Execute a nodetool repair operation"
1560+
}
1561+
},
15231562
"/api/v1/ops/node/schema/versions" : {
15241563
"get" : {
15251564
"operationId" : "getSchemaVersions",
@@ -1805,6 +1844,12 @@
18051844
"type" : "string",
18061845
"enum" : [ "ERROR", "COMPLETED", "WAITING" ]
18071846
},
1847+
"status_changes" : {
1848+
"type" : "array",
1849+
"items" : {
1850+
"$ref" : "#/components/schemas/StatusChange"
1851+
}
1852+
},
18081853
"submit_time" : {
18091854
"type" : "integer",
18101855
"format" : "int64"
@@ -1951,6 +1996,21 @@
19511996
},
19521997
"required" : [ "entity" ]
19531998
},
1999+
"StatusChange" : {
2000+
"type" : "object",
2001+
"properties" : {
2002+
"change_time" : {
2003+
"type" : "integer",
2004+
"format" : "int64"
2005+
},
2006+
"message" : {
2007+
"type" : "string"
2008+
},
2009+
"status" : {
2010+
"type" : "string"
2011+
}
2012+
}
2013+
},
19542014
"StreamingInfo" : {
19552015
"type" : "object",
19562016
"properties" : {

management-api-server/src/main/java/com/datastax/mgmtapi/resources/NodeOpsResources.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -503,10 +503,11 @@ public Response repair(RepairRequest repairRequest) {
503503
}
504504
app.cqlService.executePreparedStatement(
505505
app.dbUnixSocketFile,
506-
"CALL NodeOps.repair(?, ?, ?)",
506+
"CALL NodeOps.repair(?, ?, ?, ?)",
507507
repairRequest.keyspaceName,
508508
repairRequest.tables,
509-
repairRequest.full);
509+
repairRequest.full,
510+
false);
510511

511512
return Response.ok("OK").build();
512513
});

0 commit comments

Comments
 (0)