linear-b · vim-zz · Jun 5, 2024 · Jun 6, 2024 · Jun 27, 2024
diff --git a/docs/downloads/gitlab/gitstream.cm b/docs/downloads/gitlab/gitstream.cm
@@ -0,0 +1,57 @@
+# -*- mode: yaml -*-
+# This example configuration for provides basic automations to get started with gitStream.
+# View the gitStream quickstart for more examples: https://docs.gitstream.cm/examples/
+manifest:
+  version: 1.0
+
+
+automations:
+  # Add a label that indicates how many minutes it will take to review the PR.
+  estimated_time_to_review:
+    if:
+      - true
+    run:
+      - action: add-label@v1
+        args:
+          label: "{{ calc.etr }} min review"
+          color: {{ colors.red if (calc.etr >= 20) else ( colors.yellow if (calc.etr >= 5) else colors.green ) }}
+  # Inform PR authors when they fail to reference Jira tickets in the PR title or description.
+  label_missing_jira_info:
+    if:
+      - {{ not (has.jira_ticket_in_title or has.jira_ticket_in_desc) }}
+    run:
+      - action: add-label@v1
+        args:
+          label: "missing-jira"
+          color: {{ colors.red }}
+      - action: add-comment@v1
+        args:
+          comment: |
+            This PR is missing a Jira ticket reference in the title or description.
+            Please add a Jira ticket reference to the title or description of this PR.
+  # Post a comment that lists the best experts for the files that were modified.
+  explain_code_experts:
+    if:
+      - true
+    run:
+      - action: explain-code-experts@v1
+        args:
+          gt: 10
+
+
+# +----------------------------------------------------------------------------+
+# | Custom Expressions                                                         |
+# | https://docs.gitstream.cm/how-it-works/#custom-expressions                 |
+# +----------------------------------------------------------------------------+
+
+calc:
+  etr: {{ branch | estimatedReviewTime }}
+
+has:
+  jira_ticket_in_title: {{ pr.title | includes(regex=r/\b[A-Za-z]+-\d+\b/) }}
+  jira_ticket_in_desc: {{ pr.description | includes(regex=r/atlassian.net\/browse\/\w{1,}-\d{3,4}/) }}
+
+colors:
+  red: 'b60205'
+  yellow: 'fbca04'
+  green: '0e8a16'
diff --git a/docs/downloads/gitstream.cm b/docs/downloads/gitstream.cm
@@ -8,6 +8,9 @@ manifest:
 automations:
   # Add a label that indicates how many minutes it will take to review the PR.
   estimated_time_to_review:
+    on:
+      - pr_created
+      - commit
     if:
       - true
     run:
@@ -31,12 +34,15 @@ automations:
             Please add a Jira ticket reference to the title or description of this PR.
   # Post a comment that lists the best experts for the files that were modified.
   explain_code_experts:
+    on:
+      - pr_created
+      - commit
     if:
       - true
     run:
-      - action: explain-code-experts@v1 
+      - action: explain-code-experts@v1
         args:
-          gt: 10 
+          gt: 10
 
 
 # +----------------------------------------------------------------------------+

diff --git a/docs/github-installation.md b/docs/github-installation.md
@@ -21,7 +21,7 @@ You can set up gitStream for a single repo or your entire GitHub organization. S
         Here is an example of a gitStream configuration file you can use to setup some basic workflow automations.
 
         ```yaml+jinja
-        --8<-- "docs/downloads/gitstream.cm"
+        --8<-- "docs/downloads/github/gitstream.cm"
         ```
 
         **Github Actions**
@@ -60,7 +60,7 @@ You can set up gitStream for a single repo or your entire GitHub organization. S
         !!! info "Configuration files go in the repo's root directory."
             Unlike the set up instructions for a single repo, your `.cm` files should be placed in the repository's root directory.
         ```yaml+jinja
-        --8<-- "docs/downloads/gitstream.cm"
+        --8<-- "docs/downloads/github/gitstream.cm"
         ```
         **GitHub Actions**
 

diff --git a/docs/gitlab-installation.md b/docs/gitlab-installation.md
@@ -15,11 +15,11 @@ GitLab Installation Overview
 1. Designate a gitStream user account.
 1. Create a CM configuration file.
 1. Create a GitLab pipeline.
-1. Install the gitStream service. 
+1. Install the gitStream service.
 
 ## Designate a gitStream User Account
 
-gitStream automation rules are executed on behalf of the user account configured when you install the gitStream service. This account must have the `maintainer` or `owner` role to the relevant repos. 
+gitStream automation rules are executed on behalf of the user account configured when you install the gitStream service. This account must have the `maintainer` or `owner` role to the relevant repos.
 
 We recommend creating a [dedicated service account](https://docs.gitlab.com/ee/user/profile/service_accounts.html){:target="_blank"} to control access to individual repos easily. You can also use your professional or personal GitLab account for this, which would result in all automations being executed under that account, which might also affect LinearB's metrics.
 
@@ -39,7 +39,7 @@ Create a `cm` project (repository) in your GitLab group, and create a `gitstream
 !!! example "Example Configuration"
 		Here is an example of a gitStream configuration file to set up some basic workflow automations.
 		```yaml+jinja
-		--8<-- "docs/downloads/gitstream.cm"
+		--8<-- "docs/downloads/gitlab/gitstream.cm"
 		```
 
 ## Create a GitLab Pipeline
@@ -49,7 +49,7 @@ Once your gitStream configuration file is set up, you need a GitLab CI configura
 === "GitLab-Hosted runners"
 
     **Gitlab-Hosted Runners**
-    
+
     Use the following `.gitlab-ci.yml`
 
 	``` yaml+jinja
@@ -67,7 +67,7 @@ Once your gitStream configuration file is set up, you need a GitLab CI configura
     ``` yaml+jinja
     --8<-- "docs/downloads/gitlab-shell-ci.yml"
     ```
-    
+
 === "Self-Managed Runners - Kubernetes"
     **Self-Managed Runners**
 
@@ -91,7 +91,7 @@ Once your gitStream configuration file is set up, you need a GitLab CI configura
     - ...
     - docker pull YOUR-REGISTRY-URL/gitstream/rules-engine:latest
 	```
-	The docker image can be pulled to your private repository from [DockerHub](https://hub.docker.com/r/gitstream/rules-engine){:target=_blank}.  
+	The docker image can be pulled to your private repository from [DockerHub](https://hub.docker.com/r/gitstream/rules-engine){:target=_blank}.
 ## Next Step
 If you successfully complete these instructions, gitStream will now do these two things.
 
@@ -117,5 +117,3 @@ The required permissions are:
 | Read/Write API    | To get notified on MR changes and allow gitStream to approve MRs once all conditions are met |
 | Read repository   | To read and check rules over the code changes on monitored repositories                      |
 | Read user profile | Used to identify users                                                                       |
-
-
diff --git a/extract_automations.py b/extract_automations.py
@@ -0,0 +1,217 @@
+import os
+from pathlib import Path
+import re
+import json
+import sys
+from git import Repo
+import csv
+
+popularity = {}
+
+def load_popularity(popularity_csv):
+    try:
+        # Open the CSV file
+        with open(popularity_csv, mode='r') as file:
+            # Create a CSV reader
+            reader = csv.reader(file)
+            print("Popularity CSV file found")
+            # Skip the header row if your CSV has one
+            next(reader)
+            # Read each row in the CSV
+            for row in reader:
+                key = row[0]  # Assume the key is in the first column
+                value = row[2]  # Assume the value is in the second column
+                popularity[key] = value
+    except (FileNotFoundError):
+        print("Error: could not find", popularity_csv)
+
+def extract_info(file_path) -> list[dict]:
+    file_directory = extract_directory(file_path)
+
+    readme_tabs = 1
+    info_tabs = []
+
+    info = {
+        # when `if:true` it's marked as alwasy
+        'always': False,
+        # the automation name as defined in the cm file
+        'automation_in_cm': "",
+        # list of categories, taken from the README file desc header
+        'category': [],
+        # link to the cm file extracted from the README file body
+        'cm': "",
+        # the config desc extracted from the README file body
+        'configuration_description': "",
+        # the file creation date based on the Git commit
+        'date': find_initial_commit_of_file('.', file_path),
+        # the automation desc, taken from the README file desc header
+        'description': "",
+        # the README file path
+        'file': str(file_path),
+        # link to the PNG file extracted from the README file body
+        'image': "",
+        # reconstruct the HTTPS link to the page
+        'link': linkify(file_directory),
+        # link to the SVG file extracted from the README directory
+        'logo': None,
+        # the title, taken from the README file desc header
+        'name': "",
+        # is quickstart, taken from the README file desc header
+        'quickstart': False,
+    }
+
+    logo = search_file(file_directory, ".svg")
+    if logo:
+        info['logo'] = linkify(logo)
+
+    configuration_description_block = False
+    # How many sections in the README file
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            for line in file:
+                if 'This file has moved' in line:
+                    return []
+                elif line.startswith('visible:'):
+                    visible = line[len('visible:'):].strip()
+                    if visible.lower() in ['false', '0', 'no']:
+                        return []
+                elif line.startswith('title:'):
+                    title = line[len('title:'):].strip()
+                    info['name'] = title.removeprefix('gitStream').lstrip().removeprefix('Automation - ')
+                elif line.startswith('description:'):
+                    info['description'] = line[len('description:'):].strip()
+                elif line.startswith('category:'):
+                    info['category'] += line[len('category:'):].strip().strip('][').split(', ')
+                elif line.startswith('quickstart:'):
+                    quickstart = line[len('quickstart:'):].strip()
+                    is_quickstart = quickstart.lower() in ['true', '1', 'yes']
+                    info['quickstart'] = is_quickstart
+                elif line.startswith('=== '):
+                    if readme_tabs > 1:
+                        info_tabs.append(info.copy())
+                        info['always'] = False
+                        info['automation_in_cm'] = ""
+                        info['cm'] = ""
+                        info['configuration_description'] = ""
+                    readme_tabs += 1
+                elif '.png' in line:
+                    match = re.search(r'!\[.*?\]\((.*?)\)', line)
+                    if match:
+                        # Get the directory path and append the image filename
+                        dir_path = file_path.parent
+                        # Create a full path to the image by combining the directory path
+                        # with the relative image path extracted from the markdown
+                        image_path = dir_path / match.group(1)
+                        info['image'] = linkify(str(image_path))
+                # elif '<div class="automationDescription"' in line:
+                elif 'Conditions (all must be true)' in line:
+                    configuration_description_block = True
+                elif configuration_description_block and '</div>' in line:
+                    configuration_description_block = False
+                elif configuration_description_block and '!!!' in line:
+                    configuration_description_block = False
+                elif configuration_description_block:
+                    info['configuration_description'] += line
+                elif '.cm' in line:
+                    match = re.search(r'(docs/downloads\/.*?\.cm)', line)
+                    if match:
+                        # Get the directory path and append the image filename
+                        cm_path = match.group(1)
+                        cm_link = cm_path.removeprefix('docs/')
+                        info['cm'] = linkify(cm_link)
+                        # check the CM code for the conditions
+                        with open(cm_path, 'r', encoding='utf-8') as cm_file:
+                            conditions = []
+                            conditions_block = False
+                            automation_block = False
+                            for cm_line in cm_file:
+                                if 'if:' in cm_line:
+                                    conditions_block = True
+                                elif 'run:' in cm_line:
+                                    conditions_block = False
+                                    break
+                                elif 'automations:' in cm_line:
+                                    automation_block = True
+                                elif conditions_block:
+                                    is_always = '- true' in cm_line
+                                    conditions.append(is_always)
+                                elif automation_block:
+                                    if cm_line.strip().startswith('#'):
+                                       continue
+                                    if cm_line.strip().startswith('{%'):
+                                        continue
+                                    if cm_line.strip() == '':
+                                        continue
+                                    automation_block = False
+                                    automation = cm_line.strip().rstrip(':')
+                                    automation = automation.split('_{{')[0]
+                                    info['automation_in_cm'] = automation
+                                    value = popularity.get(automation)
+                                    info['popularity'] = int(value) if value else None
+                             # when all condtions are True, its always on
+                            info['always'] = all(conditions)
+                        # remove this anyway
+
+    except Exception as e:
+        eprint(f"Error reading file {file_path}: {e}")
+        return []
+
+    info_tabs.append(info)
+    return info_tabs
+
+def search_file(path, pattern):
+    for file in os.listdir(path):
+        if pattern in file:
+            return os.path.join(path, file)
+
+def linkify(file_path):
+    # Create a Path object from the file_path
+    path = Path(file_path)
+    # Remove the 'docs' prefix if it exists
+    url_path = str(path).removeprefix('/').removeprefix('docs').removeprefix('/')
+    return f"https://docs.gitstream.cm/{url_path}"
+
+def extract_directory(file_path):
+    # Create a Path object from the file_path
+    path = Path(file_path)
+    # Get the parent directory of the README.md file
+    parent_dir = path.parent
+    # Convert the directory to a string
+    return str(parent_dir)
+
+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+def find_initial_commit_of_file(repo_path, file_path):
+    repo = Repo(repo_path)
+    commits = list(repo.iter_commits(paths=file_path))
+    if commits:
+        first_commit = commits[-1]
+        return first_commit.committed_datetime.isoformat()
+    else:
+        return None
+
+def main():
+    load_popularity(Path("../gitstream-analysis/automation_usage.csv"))
+    base_path = Path('docs/automations')
+
+    automations = []
+    count = 0
+    for root, dirs, files in os.walk(base_path):
+        for file in files:
+            if file == 'README.md':
+                file_path = Path(root) / file
+                file_info = extract_info(file_path)
+                for info in file_info:
+                    info['id'] = count
+                    count += 1
+                    automations.append(info)
+
+    # Convert the automations list to a JSON string
+    automations_json = json.dumps(automations, indent=4)
+
+    # Print the JSON-formatted string
+    print(automations_json)
+
+if __name__ == "__main__":
+    main()