55 "fmt"
66 "net/http"
77 "os"
8+ "reflect"
89 "strings"
910 "time"
1011
@@ -13,21 +14,20 @@ import (
1314 v1 "k8s.io/api/core/v1"
1415 k8serrors "k8s.io/apimachinery/pkg/api/errors"
1516 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
17+ "k8s.io/apimachinery/pkg/runtime/schema"
18+ "k8s.io/apimachinery/pkg/util/wait"
19+ "k8s.io/client-go/discovery"
20+ "k8s.io/client-go/tools/clientcmd"
1621
1722 configv1 "github.com/openshift/api/config/v1"
1823 configv1client "github.com/openshift/client-go/config/clientset/versioned/typed/config/v1"
19- clusteroperatorv1helpers "github.com/openshift/library-go/pkg/config/clusteroperator/v1helpers"
20- operatorv1helpers "github.com/openshift/library-go/pkg/operator/v1helpers"
2124 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client"
2225 "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
2326 "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm"
2427 "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
2528 "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/signals"
2629 "github.com/operator-framework/operator-lifecycle-manager/pkg/metrics"
2730 olmversion "github.com/operator-framework/operator-lifecycle-manager/pkg/version"
28- "k8s.io/apimachinery/pkg/runtime/schema"
29- "k8s.io/client-go/discovery"
30- "k8s.io/client-go/tools/clientcmd"
3131)
3232
3333const (
@@ -128,98 +128,198 @@ func main() {
128128 http .Handle ("/metrics" , promhttp .Handler ())
129129 go http .ListenAndServe (":8081" , nil )
130130
131- ready , done := operator .Run (stopCh )
131+ ready , done , sync := operator .Run (stopCh )
132132 <- ready
133133
134134 if * writeStatusName != "" {
135- opStatusGV := schema.GroupVersion {
136- Group : "config.openshift.io" ,
137- Version : "v1" ,
135+ monitorClusterStatus (sync , stopCh , opClient , configClient )
136+ }
137+
138+ <- done
139+ }
140+
141+ func monitorClusterStatus (syncCh chan error , stopCh <- chan struct {}, opClient operatorclient.ClientInterface , configClient configv1client.ConfigV1Interface ) {
142+ var (
143+ syncs int
144+ successfulSyncs int
145+ hasClusterOperator bool
146+ )
147+ go wait .Until (func () {
148+ // slow poll until we see a cluster operator API, which could be never
149+ if ! hasClusterOperator {
150+ opStatusGV := schema.GroupVersion {
151+ Group : "config.openshift.io" ,
152+ Version : "v1" ,
153+ }
154+ err := discovery .ServerSupportsVersion (opClient .KubernetesInterface ().Discovery (), opStatusGV )
155+ if err != nil {
156+ log .Infof ("ClusterOperator api not present, skipping update (%v)" , err )
157+ time .Sleep (time .Minute )
158+ return
159+ }
160+ hasClusterOperator = true
138161 }
139- err := discovery .ServerSupportsVersion (opClient .KubernetesInterface ().Discovery (), opStatusGV )
140- if err != nil {
141- log .Infof ("ClusterOperator api not present, skipping update (%v)" , err )
142- } else {
143- existing , err := configClient .ClusterOperators ().Get (* writeStatusName , metav1.GetOptions {})
144- if k8serrors .IsNotFound (err ) {
145- log .Info ("Existing operator status not found, creating" )
146- created , err := configClient .ClusterOperators ().Create (& configv1.ClusterOperator {
147- ObjectMeta : metav1.ObjectMeta {
148- Name : * writeStatusName ,
149- },
150- })
151- if err != nil {
152- log .Fatalf ("ClusterOperator create failed: %v\n " , err )
162+
163+ // Sample the sync channel and see whether we're successfully retiring syncs as a
164+ // proxy for "working" (we can't know when we hit level, but we can at least verify
165+ // we are seeing some syncs succeeding). Once we observe at least one successful
166+ // sync we can begin reporting available and level.
167+ select {
168+ case err , ok := <- syncCh :
169+ if ! ok {
170+ // syncCh should only close if the Run() loop exits
171+ time .Sleep (5 * time .Second )
172+ log .Fatalf ("Status sync channel closed but process did not exit in time" )
173+ }
174+ syncs ++
175+ if err == nil {
176+ successfulSyncs ++
177+ }
178+ // grab any other sync events that have accumulated
179+ for len (syncCh ) > 0 {
180+ if err := <- syncCh ; err == nil {
181+ successfulSyncs ++
153182 }
183+ syncs ++
184+ }
185+ // if we haven't yet accumulated enough syncs, wait longer
186+ // TODO: replace these magic numbers with a better measure of syncs across all queueInformers
187+ if successfulSyncs < 5 || syncs < 10 {
188+ log .Printf ("Waiting to observe more successful syncs" )
189+ return
190+ }
191+ }
154192
155- created .Status = configv1.ClusterOperatorStatus {
193+ // create the cluster operator in an initial state if it does not exist
194+ existing , err := configClient .ClusterOperators ().Get (* writeStatusName , metav1.GetOptions {})
195+ if k8serrors .IsNotFound (err ) {
196+ log .Info ("Existing operator status not found, creating" )
197+ created , createErr := configClient .ClusterOperators ().Create (& configv1.ClusterOperator {
198+ ObjectMeta : metav1.ObjectMeta {
199+ Name : * writeStatusName ,
200+ },
201+ Status : configv1.ClusterOperatorStatus {
156202 Conditions : []configv1.ClusterOperatorStatusCondition {
157203 configv1.ClusterOperatorStatusCondition {
158204 Type : configv1 .OperatorProgressing ,
159- Status : configv1 .ConditionFalse ,
160- Message : fmt .Sprintf ("Done deploying %s. " , olmversion .OLMVersion ),
205+ Status : configv1 .ConditionTrue ,
206+ Message : fmt .Sprintf ("Installing %s " , olmversion .OLMVersion ),
161207 LastTransitionTime : metav1 .Now (),
162208 },
163209 configv1.ClusterOperatorStatusCondition {
164210 Type : configv1 .OperatorFailing ,
165211 Status : configv1 .ConditionFalse ,
166- Message : fmt .Sprintf ("Done deploying %s." , olmversion .OLMVersion ),
167212 LastTransitionTime : metav1 .Now (),
168213 },
169214 configv1.ClusterOperatorStatusCondition {
170215 Type : configv1 .OperatorAvailable ,
171- Status : configv1 .ConditionTrue ,
172- Message : fmt .Sprintf ("Done deploying %s." , olmversion .OLMVersion ),
216+ Status : configv1 .ConditionFalse ,
173217 LastTransitionTime : metav1 .Now (),
174218 },
175219 },
176- Versions : []configv1.OperandVersion {{
220+ },
221+ })
222+ if createErr != nil {
223+ log .Errorf ("Failed to create cluster operator: %v\n " , createErr )
224+ return
225+ }
226+ existing = created
227+ err = nil
228+ }
229+ if err != nil {
230+ log .Errorf ("Unable to retrieve cluster operator: %v" , err )
231+ return
232+ }
233+
234+ // update the status with the appropriate state
235+ previousStatus := existing .Status .DeepCopy ()
236+ switch {
237+ case successfulSyncs > 0 :
238+ setOperatorStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
239+ Type : configv1 .OperatorFailing ,
240+ Status : configv1 .ConditionFalse ,
241+ })
242+ setOperatorStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
243+ Type : configv1 .OperatorProgressing ,
244+ Status : configv1 .ConditionFalse ,
245+ Message : fmt .Sprintf ("Deployed %s" , olmversion .OLMVersion ),
246+ })
247+ setOperatorStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
248+ Type : configv1 .OperatorAvailable ,
249+ Status : configv1 .ConditionTrue ,
250+ })
251+ // we set the versions array when all the latest code is deployed and running - in this case,
252+ // the sync method is responsible for guaranteeing that happens before it returns nil
253+ if version := os .Getenv ("RELEASE_VERSION" ); len (version ) > 0 {
254+ existing .Status .Versions = []configv1.OperandVersion {
255+ {
177256 Name : "operator" ,
178- Version : olmversion . Full () ,
179- }} ,
180- }
181- _ , err = configClient . ClusterOperators (). UpdateStatus ( created )
182- if err != nil {
183- log . Fatalf ( "ClusterOperator update status failed: %v" , err )
257+ Version : version ,
258+ },
259+ {
260+ Name : "operator-lifecycle-manager" ,
261+ Version : olmversion . OLMVersion ,
262+ },
184263 }
185- } else if err != nil {
186- log .Fatalf ("ClusterOperators get failed: %v" , err )
187264 } else {
188- clusteroperatorv1helpers .SetStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
189- Type : configv1 .OperatorProgressing ,
190- Status : configv1 .ConditionFalse ,
191- Message : fmt .Sprintf ("Done deploying %s." , olmversion .OLMVersion ),
192- LastTransitionTime : metav1 .Now (),
193- })
194- clusteroperatorv1helpers .SetStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
195- Type : configv1 .OperatorFailing ,
196- Status : configv1 .ConditionFalse ,
197- Message : fmt .Sprintf ("Done deploying %s." , olmversion .OLMVersion ),
198- LastTransitionTime : metav1 .Now (),
199- })
200- clusteroperatorv1helpers .SetStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
201- Type : configv1 .OperatorAvailable ,
202- Status : configv1 .ConditionTrue ,
203- Message : fmt .Sprintf ("Done deploying %s." , olmversion .OLMVersion ),
204- LastTransitionTime : metav1 .Now (),
205- })
206-
207- olmOperandVersion := configv1.OperandVersion {Name : "operator" , Version : olmversion .Full ()}
208- // look for operator version, even though in OLM's case should only be one
209- for _ , item := range existing .Status .Versions {
210- if item .Name == "operator" && item != olmOperandVersion {
211- // if a cluster wide upgrade has occurred, hopefully any existing operator statuses have been deleted
212- log .Infof ("Updating version from %v to %v\n " , item .Version , olmversion .Full ())
213- }
214- }
215- operatorv1helpers .SetOperandVersion (& existing .Status .Versions , olmOperandVersion )
216- _ , err = configClient .ClusterOperators ().UpdateStatus (existing )
217- if err != nil {
218- log .Fatalf ("ClusterOperator update status failed: %v" , err )
219- }
265+ existing .Status .Versions = nil
266+ }
267+ default :
268+ setOperatorStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
269+ Type : configv1 .OperatorFailing ,
270+ Status : configv1 .ConditionTrue ,
271+ Message : "Waiting for updates to take effect" ,
272+ })
273+ setOperatorStatusCondition (& existing .Status .Conditions , configv1.ClusterOperatorStatusCondition {
274+ Type : configv1 .OperatorProgressing ,
275+ Status : configv1 .ConditionFalse ,
276+ Message : fmt .Sprintf ("Waiting to see update %s succeed" , olmversion .OLMVersion ),
277+ })
278+ // TODO: use % errors within a window to report available
279+ }
280+
281+ // update the status
282+ if ! reflect .DeepEqual (previousStatus , & existing .Status ) {
283+ if _ , err := configClient .ClusterOperators ().UpdateStatus (existing ); err != nil {
284+ log .Errorf ("Unable to update cluster operator status: %v" , err )
220285 }
221286 }
287+
288+ // if we've reported success, we can sleep longer, otherwise we want to keep watching for
289+ // successful
290+ if successfulSyncs > 0 {
291+ time .Sleep (5 * time .Minute )
292+ }
293+
294+ }, 5 * time .Second , stopCh )
295+ }
296+
297+ func setOperatorStatusCondition (conditions * []configv1.ClusterOperatorStatusCondition , newCondition configv1.ClusterOperatorStatusCondition ) {
298+ if conditions == nil {
299+ conditions = & []configv1.ClusterOperatorStatusCondition {}
300+ }
301+ existingCondition := findOperatorStatusCondition (* conditions , newCondition .Type )
302+ if existingCondition == nil {
303+ newCondition .LastTransitionTime = metav1 .NewTime (time .Now ())
304+ * conditions = append (* conditions , newCondition )
305+ return
222306 }
223307
224- <- done
308+ if existingCondition .Status != newCondition .Status {
309+ existingCondition .Status = newCondition .Status
310+ existingCondition .LastTransitionTime = newCondition .LastTransitionTime
311+ }
312+
313+ existingCondition .Reason = newCondition .Reason
314+ existingCondition .Message = newCondition .Message
315+ }
316+
317+ func findOperatorStatusCondition (conditions []configv1.ClusterOperatorStatusCondition , conditionType configv1.ClusterStatusConditionType ) * configv1.ClusterOperatorStatusCondition {
318+ for i := range conditions {
319+ if conditions [i ].Type == conditionType {
320+ return & conditions [i ]
321+ }
322+ }
323+
324+ return nil
225325}
0 commit comments