dmwm
diff --git a/‎DMOps/file_invalidation_server/controllers/cronjob_process_jobs.yaml‎
Lines changed: 16 additions & 0 deletions b/‎DMOps/file_invalidation_server/controllers/cronjob_process_jobs.yaml‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎DMOps/file_invalidation_server/controllers/job.yaml‎
Lines changed: 110 additions & 0 deletions b/‎DMOps/file_invalidation_server/controllers/job.yaml‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎DMOps/file_invalidation_server/fi_manager/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎DMOps/file_invalidation_server/fi_manager/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎DMOps/file_invalidation_server/fi_manager/admin.py‎
Lines changed: 3 additions & 0 deletions b/‎DMOps/file_invalidation_server/fi_manager/admin.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎DMOps/file_invalidation_server/fi_manager/apps.py‎
Lines changed: 6 additions & 0 deletions b/‎DMOps/file_invalidation_server/fi_manager/apps.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎DMOps/file_invalidation_server/fi_manager/models.py‎
Lines changed: 30 additions & 0 deletions b/‎DMOps/file_invalidation_server/fi_manager/models.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎DMOps/file_invalidation_server/fi_manager/process_jobs.py‎
Lines changed: 131 additions & 0 deletions b/‎DMOps/file_invalidation_server/fi_manager/process_jobs.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎DMOps/file_invalidation_server/fi_manager/static/css/styles.css‎
Lines changed: 17 additions & 0 deletions b/‎DMOps/file_invalidation_server/fi_manager/static/css/styles.css‎
Lines changed: 17 additions & 0 deletions
@@ -0,0 +1,16 @@
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: jobs-log-processor
+spec:
+  schedule: "*/5 * * * *"  # every 1 minutes
+  jobTemplate:
+    spec:
+      template:
+        spec:
+          containers:
+          - name: log-processor
+            image: registry.paas.cern.ch/file-invalidation-tool/file-invalidation-tool:latest
+            command: ["python3", "fi_manager/process_jobs.py"]
+          restartPolicy: Never
+          serviceAccountName: job-reader
@@ -0,0 +1,110 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: file-invalidation-job
+spec:
+  template:
+    spec:
+      initContainers:
+      - name: debug-voms
+        image: registry.cern.ch/cmsrucio/file_invalidation_tool:latest
+        command: ["/bin/sh", "-c", "
+                    cp /tmp/usercert.pem /certs/usercert.pem;
+                    cp /tmp/userkey.pem /certs/userkey.pem;
+                    ls -l /certs/usercert.pem;
+                    stat -c '%a' /certs/usercert.pem;
+                    ls -l /certs/userkey.pem;
+                    stat -c '%a' /certs/userkey.pem;
+                    chmod 400 /certs/usercert.pem;
+                    chmod 600 /certs/userkey.pem;
+                    voms-proxy-init -voms cms -rfc -valid 192:00 --cert '/certs/usercert.pem' --key '/certs/userkey.pem';"]
+        volumeMounts:
+        - name: user-cert
+          mountPath: "/tmp/usercert.pem"
+          subPath: "usercert.pem"
+        - name: user-key
+          mountPath: "/tmp/userkey.pem"
+          subPath: "userkey.pem"
+        - name: vomses-volume
+          mountPath: "/etc/vomses/"
+          readOnly: true
+        - name: certs
+          mountPath: /certs
+      - name: debug-cvmfs
+        image: registry.cern.ch/cmsrucio/file_invalidation_tool:latest
+        command:
+        - /bin/sh
+        - -c
+        - |
+          set -e;
+          echo 'Waiting for 30s'; 
+          sleep 30;
+          # Check /cvmfs/cms.cern.ch/
+          ls -l /cvmfs/cms.cern.ch/ | grep 'rucio';
+          # Check /cvmfs/cms.cern.ch/rucio/
+          ls -l /cvmfs/cms.cern.ch/rucio/ | grep 'rucio.cfg';
+          
+          [ -d /input/ ] && rm -f /input/rucio* /input/dbs_files_inv.txt /input/datasets_inv.txt
+        volumeMounts:
+        - name: cvmfs-volume
+          mountPath: "/cvmfs/"
+          readOnly: true
+        - name: input-file
+          mountPath: "/input/"
+      containers:
+      - name: invalidation-tool
+        image: registry.cern.ch/cmsrucio/file_invalidation_tool:latest
+        imagePullPolicy: Always 
+        args: ["global","--reason", "$(REASON)", "--rucio-mode", "--dry-run"] 
+        #command: ["sleep","infinity"]
+        volumeMounts:
+        - name: input-file
+          mountPath: "/input/"
+        - name: dmtops-keytab
+          mountPath: "/secrets/dmtops.keytab"
+          subPath: "dmtops.keytab"
+        - name: cvmfs-volume
+          mountPath: "/cvmfs/"
+          readOnly: true
+        - name: vomses-volume
+          mountPath: "/etc/vomses/"
+          readOnly: true
+        - name: certs
+          mountPath: "/certs"
+        env: #Pass environment variables to the container.
+        - name: REASON
+          value: "your_reason_here" #replace with your reason.
+      restartPolicy: Never
+      serviceAccountName: job-runner
+      volumes:
+      - name: input-file
+        persistentVolumeClaim:
+          claimName: input-file-pvc
+      - name: user-cert
+        configMap:
+          name: user-certificates
+          items:
+          - key: usercert.pem
+            path: usercert.pem
+      - name: user-key
+        configMap:
+          name: user-certificates
+          items:
+          - key: userkey.pem
+            path: userkey.pem
+      - name: dmtops-keytab
+        secret:
+          secretName: dmtops-keytab
+          items:
+          - key: dmtops.keytab
+            path: dmtops.keytab
+      - name: vomses-volume
+        configMap:
+          name: vomses-config
+      - name: cvmfs-volume
+        persistentVolumeClaim:
+          claimName: cvmfs-volume
+      - name: certs
+        emptyDir: {}
+  backoffLimit: 4
+  
@@ -0,0 +1,3 @@
+import pymysql
+
+pymysql.install_as_MySQLdb()
@@ -0,0 +1,3 @@
+from django.contrib import admin
+
+# Register your models here.
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class FiManagerConfig(AppConfig):
+    default_auto_field = 'django.db.models.BigAutoField'
+    name = 'fi_manager'
@@ -0,0 +1,30 @@
+# This is an auto-generated Django model module.
+# You'll have to do the following manually to clean this up:
+#   * Rearrange models' order
+#   * Make sure each model has one field with primary_key=True
+#   * Make sure each ForeignKey and OneToOneField has `on_delete` set to the desired behavior
+#   * Remove `managed = False` lines if you wish to allow Django to create, modify, and delete the table
+# Feel free to rename the models, but don't rename db_table values or field names.
+from django.db import models
+import uuid
+
+class FileInvalidationRequests(models.Model):
+    id = models.AutoField(primary_key=True)
+    request_id = models.UUIDField(default=uuid.uuid4, editable=False, unique=True)
+    file_name = models.CharField(max_length=255)
+    status = models.CharField(max_length=20)
+    mode = models.CharField(max_length=10)
+    dry_run = models.BooleanField()
+    reason = models.TextField(blank=True, null=True)
+    job_id = models.CharField(max_length=8,null=True,blank=True)
+    logs = models.TextField()
+    global_invalidate_last_replicas = models.BooleanField(default=False)
+
+    class Meta:
+        managed = False
+        db_table = 'file_invalidation_requests'
+        unique_together = (('request_id', 'file_name'),)
+
+    def __str__(self):
+        #return f"{self.request_id} - {self.name}" 
+        return f"ID#: {self.id} REQUEST NUMBER {self.request_id} FOR FILE {self.file_name}" 
@@ -0,0 +1,131 @@
+import os
+import django
+import sys
+import re
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'file_invalidation_server.settings')
+django.setup()
+
+from kubernetes import client, config
+from fi_manager.models import FileInvalidationRequests  
+import logging
+
+logging.basicConfig(level=logging.INFO,format='(%(asctime)s) [%(name)s] %(levelname)s: %(message)s')
+logger = logging.getLogger(__name__)
+
+def fetch_and_process():
+    config.load_incluster_config()  
+    batch_v1 = client.BatchV1Api()
+    core_v1 = client.CoreV1Api()
+
+    namespace = 'file-invalidation-tool'
+    jobs = batch_v1.list_namespaced_job(namespace=namespace)
+
+    for job in jobs.items:
+        job_name = job.metadata.name
+        if not job.status.conditions:
+            continue
+
+        if job.kind == "CronJob" or 'jobs-log-processor' in job_name:
+            continue
+
+        condition_types = {cond.type: cond.status for cond in job.status.conditions}
+        if condition_types.get("Failed") == "True":
+            logger.warning(f"Job {job_name} failed")
+        elif condition_types.get("Complete") == "True":
+            # Get pods created by this job
+            pods = core_v1.list_namespaced_pod(
+                namespace=namespace,
+                label_selector=f"job-name={job_name}"
+            )
+
+            # Use most recent pod
+            try:
+                latest_pod = sorted(
+                    pods.items,
+                    key=lambda pod: pod.status.start_time or pod.metadata.creation_timestamp,
+                    reverse=True
+                )[0]
+            except IndexError as e:
+                logger.error(f"There are no pods under the {job_name} job name.")
+                continue
+
+
+            pod_name = latest_pod.metadata.name
+            logger.info(f"Pod name: {pod_name}")
+            logs = core_v1.read_namespaced_pod_log(pod_name, namespace=namespace)
+            try:
+                rucio_invalidated_dids, dbs_invalidated_dids, dry_run = parse_job_logs(logs)
+                logger.info(logs)
+                logger.info(f"Job {job_name} has invalidated {len(rucio_invalidated_dids)} dids on Rucio and {len(dbs_invalidated_dids)} dids on DBS.")
+                logger.info(f"Job {job_name} has invalidated the following DIDs on Rucio: {rucio_invalidated_dids}")
+                logger.info(f"Job {job_name} has invalidated the following DIDs on DBS: {dbs_invalidated_dids}")
+                if (len(rucio_invalidated_dids)>0) or (len(dbs_invalidated_dids)>0):
+                    update_database(job_name, rucio_invalidated_dids, dbs_invalidated_dids, dry_run)
+                    logger.info(f"Job {job_name} has completed and the DIDs have updated.")
+                else:
+                    raise Exception(f"Job {job_name} did not invalidate any DIDs on Rucio or DBS.")
+            except Exception as e:
+                logger.error(f"Job {pod_name} has failed with error: {e}")
+                update_database_for_failed_job(pod_name,f'Job {pod_name} has failed with error: {str(e)}\n{logs}')
+
+
+
+            delete_opts = client.V1DeleteOptions(propagation_policy='Foreground')
+
+            #batch_v1.delete_namespaced_job(
+            #        name=job_name,
+            #        namespace=namespace,
+            #           body=delete_opts)
+            logger.info(f"Job {job_name} would be deleted but is being kept for dev purposes.")
+
+def parse_job_logs(logs: str):
+    if "Error running shell script" in logs:
+        raise Exception(f"Job has failed with error: Error running shell script")
+    dry_run = 'Would declare file' in logs
+
+    if dry_run:
+        rucio_invalidated_files = re.findall(pattern='(?:Would declare file) (\/[\w\/\-]+.root) as bad at',
+                                                string=logs)
+
+        dbs_invalidated_files = re.findall(pattern='(?:Would invalidate file on DBS:) (\/[\w\/\-]+.root)\s',
+                                                string=logs)
+    else:
+        rucio_invalidated_files = re.findall(pattern='(?:Declared file) (\/[\w\/\-]+.root) as bad at',
+                                                string=logs)
+
+        dbs_invalidated_files = re.findall(pattern='(?:Invalidation OK for file:) (\/[\w\/\-]+.root)\s',
+                                                string=logs)
+        
+        dbs_invalidated_dataset = re.findall(pattern='(?:Invalidation OK for dataset:) (\/[\w\/\-]+.root)\s',
+                                                string=logs)
+
+    if dbs_invalidated_dataset:
+        # Assumes that for datasets, DBS dataset invalidation implies Rucio file invalidation
+        dbs_invalidated_files = dbs_invalidated_files.append(dbs_invalidated_dataset)
+        rucio_invalidated_files = rucio_invalidated_files.append(dbs_invalidated_dataset)
+
+    return rucio_invalidated_files, dbs_invalidated_files, dry_run
+
+def update_database(job_name, rucio_list, dbs_list, dry_run):
+    globally_invalidated_dids = set(rucio_list) & set(dbs_list)
+    job_id = re.findall(pattern='file-invalidation-job-(\w{8})',string=job_name)[0]
+    
+    job_files = FileInvalidationRequests.objects.filter(job_id=job_id)
+
+    only_rucio_invalidated = FileInvalidationRequests.objects.filter(job_id=job_id,file_name__in=rucio_list)
+    only_rucio_invalidated.update(status='success',mode='rucio_only',dry_run=dry_run)
+
+    only_dbs_invalidated = FileInvalidationRequests.objects.filter(job_id=job_id,file_name__in=dbs_list)
+    only_dbs_invalidated.update(status='success',mode='dbs_only',dry_run=dry_run)
+
+    global_invalidated = FileInvalidationRequests.objects.filter(job_id=job_id,file_name__in=globally_invalidated_dids)
+    global_invalidated.update(status='success',mode='global',dry_run=dry_run)
+
+def update_database_for_failed_job(job_name,logs):
+    job_id = re.findall(pattern='file-invalidation-job-(\w{8})-\w',string=job_name)[0]
+    failed_invalidation = FileInvalidationRequests.objects.filter(job_id=job_id)
+    failed_invalidation.update(status='failed',logs=logs)
+
+if __name__ == "__main__":
+    fetch_and_process()
@@ -0,0 +1,17 @@
+/* static/css/styles.css */
+body {
+    font-family: sans-serif;
+    margin: 0;
+    padding: 0;
+    background-color: #f4f4f4;
+}
+
+header, footer {
+    background-color: #e0e0e0;
+    padding: 20px;
+    text-align: center;
+}
+
+main {
+    padding: 20px;
+}
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+import pymysql`
	`2`	`+`
	`3`	`+pymysql.install_as_MySQLdb()`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from django.contrib import admin`
	`2`	`+`
	`3`	`+# Register your models here.`