{ "cells": [ { "cell_type": "markdown", "id": "61f49e0f", "metadata": {}, "source": [ "# Profiling the effect of input parameters on TARDIS runtime #" ] }, { "cell_type": "code", "execution_count": null, "id": "2c26cee6", "metadata": {}, "outputs": [], "source": [ "import tardis\n", "from tardis import run_tardis\n", "\n", "from tardis.io.config_reader import Configuration\n", "from tardis.io.atom_data.base import AtomData\n", "from tardis.simulation import Simulation\n", "from tardis import run_tardis\n", "from tardis.io.atom_data.util import download_atom_data\n", "download_atom_data('kurucz_cd23_chianti_H_He')\n", "\n", "import time\n", "import numpy as np\n", "from astropy import units as u\n", "import csv\n", "import statistics\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "id": "dac6305a", "metadata": {}, "source": [ "### Functions for building the dictionaries and interacting with files ###" ] }, { "cell_type": "code", "execution_count": null, "id": "a806b64e", "metadata": {}, "outputs": [], "source": [ "def build_dictionary(fp):\n", " \"\"\"\n", " Uses the fp generated from open_file to return a dictionary. Then closes\n", " the file at the end of execution.\n", "\n", " 'testing_param' : dict\n", " 'iteration' : time\n", " \n", " Parameters\n", " ----------\n", " fp : file\n", " \n", " Returns\n", " -------\n", " data_dict : dictionary\n", " \"\"\"\n", " \n", " data_dict = {}\n", " reader = csv.reader(fp)\n", " for line in reader:\n", " testing_param = int(line[0])\n", " \n", " iteration = int(line[1])\n", " \n", " time_of_run = float(line[2])\n", " \n", " if testing_param not in data_dict:\n", " data_dict[testing_param] = {}\n", " if iteration not in data_dict[testing_param]:\n", " data_dict[testing_param][iteration] = time_of_run\n", " fp.close()\n", " return data_dict \n", "\n", "def add_to_dictionary(data_dict, info):\n", " \"\"\"\n", " Adds the data to the dictionary\n", " \n", " Parameters\n", " ----------\n", " data_dict : dictionary\n", " info : list\n", " It is set up as [testing_param, iteration, run_time]\n", " \"\"\"\n", " \n", " testing_param = info[0]\n", " iteration = info[1]\n", " time_of_run = info[2]\n", " if testing_param not in data_dict:\n", " data_dict[testing_param] = {}\n", " data_dict[testing_param][iteration] = time_of_run\n", "\n", "\n", "def check_existance(data_dict, info):\n", " \"\"\"\n", " Checks to see if a data poitn already exists in data_dict.\n", " Returns true if it exists, false if it does not.\n", " \n", " Parameters\n", " ----------\n", " data_dict : dictionary\n", " info : list\n", " [testing_param, iteration]\n", " \n", " Returns\n", " -------\n", " Boolean\n", " \"\"\"\n", " \n", " testing_param = info[0]\n", " iteration = info[1]\n", " #Checks if it thread already exists\n", " if testing_param not in data_dict:\n", " return False\n", " #If it does\n", " elif (testing_param in data_dict):\n", " #Does the iteration exist yet\n", " if iteration not in data_dict[testing_param]:\n", " return False\n", " return True" ] }, { "cell_type": "markdown", "id": "92662ad4", "metadata": {}, "source": [ "## Timing of a specific parameter in TARDIS ##" ] }, { "cell_type": "markdown", "id": "a55336ba", "metadata": {}, "source": [ "### Number of Threads ###" ] }, { "cell_type": "code", "execution_count": null, "id": "411e12a2", "metadata": {}, "outputs": [], "source": [ "def generate_data(thread_count, \n", " iteration_count, \n", " thread_dictionary, \n", " cpu_timing_dictionary, \n", " tardis_file, \n", " thread_file, \n", " cpu_file,):\n", " \"\"\"\n", " This function generates data for running a tardis file on different numbers of threads different times.\n", " It writes the data to two dictionaries and their respecitve files.\n", " \n", " This function can be modified to write to as many or as few dictionaries and files as necessary.\n", " In order to test different parameters, the index into the tardis_config must be adjusted. This must be done \n", " manually in order to select the correct testing parameter.\n", " \n", " Parameters\n", " ----------\n", " thread_count : int\n", " Max number of threads to be tested\n", " iteration_count : int\n", " Amount of testing on each thread\n", " thread_dictionary : dict\n", " cpu_timing_dictionary : dict\n", " tardis_file : str\n", " TARDIS config file\n", " thread_file : str\n", " .csv file name\n", " cpu_file : str\n", " .csv file name\n", " \"\"\"\n", "\n", " tardis_config = Configuration.from_yaml(tardis_file)\n", "\n", " for threads in range(1, thread_count+1):\n", " for i in range(1, iteration_count+1):\n", " #Continue if this data point already exists. If it exists in one it exists in both\n", " if check_existance(thread_dictionary, [threads, i]):\n", " continue\n", "\n", " print(threads, \" : \", i)\n", " tardis_config[\"montecarlo\"][\"nthreads\"] = threads\n", "\n", " sim = Simulation.from_config(tardis_config)\n", " start = time.monotonic()\n", " sim.run()\n", " end = time.monotonic()\n", " run_time = end-start\n", " \n", " add_to_dictionary(thread_dictionary, [threads, i, run_time])\n", " add_to_dictionary(cpu_timing_dictionary, [threads, i, (run_time * threads)])\n", "\n", " with open(thread_file, \"a\") as data_file:\n", " data_file.write(\"{}, {}, {}\\n\".format(threads, i, run_time))\n", " with open(cpu_file, \"a\") as data_file:\n", " data_file.write(\"{}, {}, {}\\n\".format(threads, i, run_time * threads))\n", " \n", " print(\"Completed\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d6966653", "metadata": { "scrolled": true }, "outputs": [], "source": [ "TARDIS_FILE = 'tardis_example.yml'\n", "STORAGE_FILE = 'threads_dict_128_threads_1_node.csv'\n", "CPU_STORAGE_FILE = 'cpu_threads_dict_128_threads_1_node.csv'\n", "\n", "#Compile the numba functions\n", "run_tardis(TARDIS_FILE)\n", "\n", "#Opens file and builds the dictionary\n", "with open(STORAGE_FILE, \"r\") as threads_fp:\n", " threads_dict = build_dictionary(threads_fp)\n", "\n", "with open(CPU_STORAGE_FILE, \"r\") as cpu_fp:\n", " cpu_threads_dict = build_dictionary(cpu_fp)\n", "\n", "#Creates list of main data\n", "threads_numbers = [i for i in range(1, 129, 1)]\n", "cpu_threads_numbers = [i for i in range(1, 129, 1)]\n", "\n", "generate_data(128, 30, threads_dict, cpu_threads_dict, TARDIS_FILE, STORAGE_FILE, CPU_STORAGE_FILE)" ] }, { "cell_type": "code", "execution_count": null, "id": "7056ee6e", "metadata": {}, "outputs": [], "source": [ "def create_stat_dictionaries(main_dict):\n", " \"\"\"\n", " This builds two dictionaries, one that holds the mean of the data and the other that holds the stdev\n", " \n", " Parameters\n", " ----------\n", " main_dict : dict\n", " Data dictionary, format of\n", " testing_param : dictionary\n", " iteration : time\n", " \n", " Returns\n", " -------\n", " main_dict_mean : dict\n", " main_dict_stdev : dict\n", " \"\"\"\n", "\n", " main_dict_list = {}\n", " for thread, iteration_dict in main_dict.items():\n", " main_dict_list[thread] = [v for (k, v) in iteration_dict.items()]\n", "\n", " main_dict_mean = {}\n", " main_dict_stdev = {}\n", " for thread, timing_list in main_dict_list.items():\n", " main_dict_mean[thread] = statistics.mean(timing_list)\n", " main_dict_stdev[thread] = statistics.stdev(timing_list)\n", "\n", " return main_dict_mean, main_dict_stdev\n", "\n", "threads_dict_mean, threads_dict_stdev = create_stat_dictionaries(threads_dict)\n", "cpu_threads_dict_mean, cpu_threads_dict_stdev = create_stat_dictionaries(cpu_threads_dict)" ] }, { "cell_type": "code", "execution_count": null, "id": "ae1e8624", "metadata": { "scrolled": false }, "outputs": [], "source": [ "def graph_data(testing_params, mean_dict, stdev_dict, x_axis_title, y_axis_title, graph_title, text_labels=False):\n", " \"\"\"\n", " Uses pyplot to graph the data given the mean and standard deviation of data.\n", " \n", " Parameters\n", " ----------\n", " testing_params : list\n", " This is a list of the testing parameters, which is used for the axis construction.\n", " mean_dict : dict\n", " stdev_dict : dict\n", " x_axis_title : str\n", " y_axis_title : str\n", " graph_title : str\n", " text_labels : bool\n", " This is a boolean parameter for if the testing parameters are text. If they are text, then\n", " it will set each tick mark on the x-axis to be a label in testing_params.\n", " \"\"\"\n", " \n", " lists = sorted(mean_dict.items())\n", " lists_stdev = sorted(stdev_dict.items())\n", " x, y = zip(*lists)\n", " x_stdev, y_stdev = zip(*lists_stdev)\n", "\n", " fig = plt.figure(figsize=(20,10))\n", " ax = fig.add_subplot(1,1,1)\n", " ax.set_xlabel(x_axis_title)\n", " ax.set_ylabel(y_axis_title)\n", " ax.set_title(graph_title)\n", " ax.grid(True)\n", " \n", " if text_labels:\n", " xtick_spacer = np.arange(0, len(testing_params), 1)\n", " xticks = testing_params\n", " ax.set_xticks(xtick_spacer)\n", " ax.set_xticklabels(xticks, rotation='horizontal', fontsize=18)\n", " elif not text_labels:\n", " #If there are a lot of ticks, it will reduce for visibility.\n", " interval = testing_params[1] - testing_params[0]\n", " if len(testing_params) > 80:\n", " xticks = np.arange(testing_params[0], testing_params[-1]+interval, interval*2)\n", " else:\n", " xticks = np.arange(testing_params[0], testing_params[-1]+interval, interval)\n", " ax.set_xticks(xticks)\n", " \n", " ax.plot(x, y, color='tab:blue', label=\"{} vs {}\".format(x_axis_title, y_axis_title))\n", " plt.errorbar(x, y, yerr=y_stdev, label='stdev')\n", " leg = plt.legend(loc='best', ncol=1, mode='expanded', fancybox=False)\n", " leg.get_frame().set_alpha(0.5)\n", "\n", "graph_data(threads_numbers, threads_dict_mean, threads_dict_stdev, \"threads\", \"time (sec)\", \"threads vs time\")\n", "graph_data(cpu_threads_numbers, cpu_threads_dict_mean, cpu_threads_dict_stdev, \"threads\", \"time (sec)\", \"threads vs cpu time\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }