Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,33 @@ jobs:
name: yellow-ai-vs-ai-proto-logs
path: |
/tmp/tbots/yellow/proto_*

common-commands:
strategy:
# TODO: set fail-fast to true in production.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remember to remove TODO before merging

fail-fast: false
matrix:
commands:
- ../scripts/safe_run.sh bazel run --run_under="xvfb-run" //software/thunderscope:thunderscope_main -- --run_blue --run_diagnostics --interface lo --keyboard_estop --ci_mode
- ../scripts/safe_run.sh bazel run --run_under="xvfb-run" //software/ai/hl/stp/tactic/goalie:goalie_tactic_test -- --enable_thunderscope
- ../scripts/safe_run.sh bazel run --run_under="xvfb-run" //software/thunderscope:thunderscope_main -- --enable_autoref --ci_mode && ../scripts/safe_run.sh bazel run --run_under="xvfb-run" //software/thunderscope:thunderscope_main -- blue_log="$(find /tmp/tbots/blue -maxdepth 1 -type d -name 'proto_*' -printf '/tmp/tbots/blue/%f\n' 2>/dev/null | head -n1)"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does the second half of this command do?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the delay, I am finally done work.

../scripts/safe_run.sh bazel run --run_under="xvfb-run" //software/thunderscope:thunderscope_main -- --enable_autoref --ci_mode This first half runs thunderscope once

This part tries to find the previous replay log and display the logs:
../scripts/safe_run.sh bazel run --run_under="xvfb-run" //software/thunderscope:thunderscope_main -- blue_log="$(find /tmp/tbots/blue -maxde4pth 1 -type d -name 'proto_*' -printf '/tmp/tbots/blue/%f\n' 2>/dev/null | head -n1)

This is not ideal, I am trying to think of a way to handle this. Maybe I should allow a new argument for our thunderscope, so we can specify the location for the replay log to save, so that we don't need to lookup for it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah being able to specify the runtime dir + folder name would be ideal


name: Sanity Check on Common Commands
runs-on: ubuntu-20.04
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can use the ubuntu-22.04 runner

steps:
- uses: actions/checkout@v4

- name: Environment Setup
run: |
"${GITHUB_WORKSPACE}"/environment_setup/setup_software.sh

- name: Build Thunderscope
run: |
cd src
bazel build //software/thunderscope:thunderscope_main

- name: Check Common Commands
run: |
cd src
${{ matrix.commands }}

104 changes: 104 additions & 0 deletions scripts/ci_runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/opt/tbotspython/bin/python3

"""
ci_runner.py runs a given command for a fix amount of time and
return 0 if there is no Python runtime exception found in the output
otherwise return 1
Because bazel outputs in both stderr and stdout regardless the content,
(https://github.com/bazelbuild/bazel/issues/10496)
only checking stderr is insufficient. Therefore, we are doing a pattern
matching in both stderr and stdout, which tells us if there is any runtime exception.
"""

from typing import List
import subprocess
import sys
import time
import select

# Default timeout is 120 seconds
TIME_LIMIT_S = 30
# Key pattern of python exception
PYTHON_ERROR_PATTERN="Traceback (most recent call last):"
# Delimiter which splits the target command output and this program logs
SECTION_DELIM = "=" * 50


def print_command(command: List[str]) -> None:
"""
Format and print commands
:param command: command to be printed in a list of string
"""
print(" ".join(command))

def read_available_output(proc: subprocess.Popen) -> str:
"""
Safely read available output from process without blocking
:param proc: Running process
:return: string output of stdout
"""
output = ""
if proc.stdout:
rlist, _, _ = select.select([proc.stdout], [], [], 0)
if rlist:
output = proc.stdout.readline()
return output

def test_command(command: List[str]) -> int:
"""
Run a given command and return status code
:param command: command to run and test
:return: 0 if the given command was run successfully and did not
throw any error in the given time limit.
1 if the give command failed to run or threw errors to console.
"""
start_time = time.time()

# Run the command as a subprocess
proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1)

# Keep polling and checking ONLY if
# - did not reach time limit yet AND
# - the process is still running
while time.time() - start_time <= TIME_LIMIT_S and proc.poll() is None:
stdout_data = read_available_output(proc)
if PYTHON_ERROR_PATTERN in stdout_data:
print(SECTION_DELIM)
print("Oops! Error found while running the following command :(")
print_command(command)

proc.kill()
return 1
if stdout_data:
print(stdout_data, end="")

# If the process is still running, send SIGKILL signal
if proc.poll() is None:
# TODO: remove the following print statement
print("killing the proc")
proc.kill()

remaining_output = proc.communicate()[0]
print(remaining_output)
print(SECTION_DELIM)

print("Nice! Test passed! :)")
return 0


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: command_runner.py <command> [args...]", file=sys.stderr)
sys.exit(1)

command = sys.argv[1:]
Comment on lines +94 to +98
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

better to use the argparse module here, it's more standard for python. it can also let you take in an arbitrary number of inputs.

print(f"Testing the following command:")
print_command(command)

print(SECTION_DELIM)
sys.exit(test_command(sys.argv[1:]))

52 changes: 52 additions & 0 deletions scripts/safe_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

safe_run.sh is sort of a weird name that doesn't immediately tell you what this file does

But tbh I don't know a better name... maybe ci_runner.sh is better. At least it tells you that it's only relevant for CI.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a high-level description of this script at the top of this file.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you re-write this script with python, you can use subprocess and get stdout and stderr logs. Then, you can print the error messages. Because right now, there isn't any feedback for the developers and it will be confusing when this CI job suddenly fails.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, sounds good!
I was trying to use 2> and > to redirect stdout and stderr, but it didn't seem to work.


# Timeout in seconds
# When the time is up and no error was shown, this test will pass
TIME_LIMIT=120 # 2 minutes

# Match Python traceback
ERROR_PATTERN="Traceback (most recent call last):"

# Temporary log file
LOG_FILE=$(mktemp)

# Run the command and record the log
"$@" &> "$LOG_FILE" &
CMD_PID=$!

echo "Process Running in Wrapper with Timeout $TIME_LIMIT ..."

# Time the process
SECONDS=0
while kill -0 $CMD_PID 2>/dev/null; do
# Check if time is up
if [ $SECONDS -ge $TIME_LIMIT ]; then
echo "Time limit reached, stopping process: $CMD_PID"
kill -SIGINT $CMD_PID
wait $CMD_PID
exit 0 # Upon time out and no error, returns 0 status code
fi

# Check if the log contains Traceback
if grep -q "$ERROR_PATTERN" "$LOG_FILE"; then
echo "[Error detected] Potential error found in command output!"
kill -SIGINT $CMD_PID
wait $CMD_PID
exit 1
fi

sleep 1 # Run this loop once per second
done

cat $LOG_FILE
# Get the exit code of the process
wait $CMD_PID
EXIT_CODE=$?

# Clean up log file
rm -f "$LOG_FILE"

# Exit with the command status code
exit $EXIT_CODE


16 changes: 16 additions & 0 deletions src/software/logger/proto_logger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,22 @@ ProtoLogger::ProtoLogger(const std::string& log_path,
log_thread_ = std::thread(&ProtoLogger::logProtobufs, this);
}

ProtoLogger::ProtoLogger(const std::string& log_path,
std::function<double()> time_provider,
const bool friendly_colour_yellow,
const std::string& log_name)
: log_path_(log_path),
time_provider_(time_provider),
friendly_colour_yellow_(friendly_colour_yellow),
stop_logging_(false),
buffer_(PROTOBUF_BUFFER_SIZE, true)
{
start_time_ = time_provider_();
log_folder_ = log_path_ + "/" + log_name + "/";
std::experimental::filesystem::create_directories(log_folder_);
log_thread_ = std::thread(&ProtoLogger::logProtobufs, this);
}

ProtoLogger::~ProtoLogger()
{
flushAndStopLogging();
Expand Down
12 changes: 12 additions & 0 deletions src/software/logger/proto_logger.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,18 @@ class ProtoLogger
std::function<double()> time_provider,
bool friendly_colour_yellow);

/**
* Constructor
* @param log_path The path to the directory where the logs will be saved
* @param time_provider A function that returns the current time in seconds
* @param friendly_colour_yellow Whether the friendly team is yellow or not
* @param log_name The name of the log directory
*/
explicit ProtoLogger(const std::string& log_path,
std::function<double()> time_provider,
const bool friendly_colour_yellow,
const std::string& log_name);

ProtoLogger() = delete;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
should_restart_on_crash: bool = True,
run_sudo: bool = False,
running_in_realtime: bool = True,
log_name: str = None,
) -> None:
"""Run FullSystem

Expand All @@ -33,6 +34,7 @@ def __init__(
:param should_restart_on_crash: whether or not to restart the program after it has been crashed
:param run_sudo: true if we should run full system under sudo
:param running_in_realtime: True if we are running fullsystem in realtime, else False
:param log_name: Name of the proto log folder
"""
self.full_system_runtime_dir = full_system_runtime_dir
self.debug_full_system = debug_full_system
Expand All @@ -41,6 +43,7 @@ def __init__(
self.should_restart_on_crash = should_restart_on_crash
self.should_run_under_sudo = run_sudo
self.running_in_realtime = running_in_realtime
self.log_name = log_name

self.thread = threading.Thread(target=self.__restart__, daemon=True)

Expand All @@ -60,10 +63,11 @@ def __enter__(self) -> FullSystem:
except:
pass

self.full_system = "software/unix_full_system --runtime_dir={} {} {}".format(
self.full_system = "software/unix_full_system --runtime_dir={} {} {} {}".format(
self.full_system_runtime_dir,
"--friendly_colour_yellow" if self.friendly_colour_yellow else "",
"--ci" if not self.running_in_realtime else "",
f"--log_name={self.log_name}" if self.log_name else "",
)

if self.should_run_under_sudo:
Expand Down
11 changes: 11 additions & 0 deletions src/software/thunderscope/thunderscope_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@
default="/tmp/tbots/yellow",
)

# Proto log folder name
parser.add_argument(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice work!

"--log_name",
type=str,
help="proto log folder name to save to",
default=None,
)

# Debugging
parser.add_argument(
"--debug_blue_full_system",
Expand Down Expand Up @@ -374,6 +382,7 @@
friendly_colour_yellow=friendly_colour_yellow,
should_restart_on_crash=True,
run_sudo=args.sudo,
log_name=args.log_name
) as full_system:
full_system.setup_proto_unix_io(current_proto_unix_io)

Expand Down Expand Up @@ -447,13 +456,15 @@ def __ticker(tick_rate_ms: int) -> None:
should_restart_on_crash=False,
run_sudo=args.sudo,
running_in_realtime=(not args.ci_mode),
log_name=args.log_name,
) as blue_fs, FullSystem(
full_system_runtime_dir=args.yellow_full_system_runtime_dir,
debug_full_system=args.debug_yellow_full_system,
friendly_colour_yellow=True,
should_restart_on_crash=False,
run_sudo=args.sudo,
running_in_realtime=(not args.ci_mode),
log_name=args.log_name,
) as yellow_fs, Gamecontroller(
suppress_logs=(not args.verbose)
) as gamecontroller, (
Expand Down
16 changes: 15 additions & 1 deletion src/software/unix_full_system_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ int main(int argc, char** argv)
{
bool help = false;
std::string runtime_dir = "/tmp/tbots";
std::string log_name = std::string();
bool friendly_colour_yellow = false;
bool ci = false;
};
Expand All @@ -64,6 +65,9 @@ int main(int argc, char** argv)
desc.add_options()("runtime_dir",
boost::program_options::value<std::string>(&args.runtime_dir),
"The directory to output logs and setup unix sockets.");
desc.add_options()("log_name",
boost::program_options::value<std::string>(&args.log_name),
"The directory name to contain proto logs.");
desc.add_options()("friendly_colour_yellow",
boost::program_options::bool_switch(&args.friendly_colour_yellow),
"If false, friendly colour is blue");
Expand Down Expand Up @@ -107,8 +111,18 @@ int main(int argc, char** argv)
// timestamp once the backend is set up
time_provider = []() { return 0; };
}
proto_logger = std::make_shared<ProtoLogger>(args.runtime_dir, time_provider,

if (args.log_name.empty())
{
proto_logger = std::make_shared<ProtoLogger>(args.runtime_dir, time_provider,
args.friendly_colour_yellow);
}
else
{
proto_logger = std::make_shared<ProtoLogger>(args.runtime_dir, time_provider,
args.friendly_colour_yellow, args.log_name);
}

LoggerSingleton::initializeLogger(args.runtime_dir, proto_logger);
TbotsProto::ThunderbotsConfig tbots_proto;

Expand Down
Loading