portfolio_quicktest_local_fs_one script and data

Input files

Accounts

account_idearliest_period_start
ARKK2020-12-31
ARKW2020-12-31
ARKF2020-12-31
ARKQ2020-12-31
ARKG2020-12-31
ARKX2020-12-31

Total 6 rows

Transactions

tsaccount_idtickerqtyprice
2020-10-16ARKKTSLA2466031439.67
2020-10-20ARKKTSLA18992421.94
2020-10-21ARKKTSLA2374422.64
2020-10-22ARKKTSLA7122425.79
2020-10-23ARKKTSLA7122420.63
2020-10-27ARKKTSLA1187424.68
2020-10-28ARKKTSLA4748406.02
2020-10-29ARKKTSLA14244410.83
2020-11-02ARKKTSLA3561400.51
2020-11-03ARKKTSLA-151615423.9

Total 88459 rows

Holdings

account_iddtickerqty
ARKK2020-09-30TSLA0
ARKK2020-12-31TSLA2660439
ARKK2021-03-31TSLA3757949
ARKK2021-06-30TSLA3566520
ARKK2021-09-30TSLA2545118
ARKK2021-12-31TSLA1198876
ARKK2022-03-31TSLA1094098
ARKK2022-06-30TSLA1023093
ARKK2022-09-30TSLA2927507
ARKK2022-12-31TSLA0

Total 4300 rows

1_read_txns

Script node 1_read_txns:

{
  "1_read_txns": {
    "type": "file_table",
    "desc": "Load txns from csv",
    "r": {
      "urls": [
        "{dir_in}/txns.csv"
      ],
      "csv": {
        "hdr_line_idx": 0,
        "first_data_line_idx": 1
      },
      "columns": {
        "col_ts": {
          "csv": {
            "col_hdr": "ts"
          },
          "col_type": "string"
        },
        "col_account_id": {
          "csv": {
            "col_hdr": "account_id"
          },
          "col_type": "string"
        },
        "col_ticker": {
          "csv": {
            "col_hdr": "ticker"
          },
          "col_type": "string"
        },
        "col_qty": {
          "csv": {
            "col_hdr": "qty",
            "col_format": "%d"
          },
          "col_type": "int"
        },
        "col_price": {
          "csv": {
            "col_hdr": "price",
            "col_format": "%f"
          },
          "col_type": "float"
        }
      }
    },
    "w": {
      "name": "txns",
      "having": "w.ts > \"{period_start_eod}\" && w.ts <= \"{period_end_eod}\"",
      "fields": {
        "account_id": {
          "expression": "r.col_account_id",
          "type": "string"
        },
        "ts": {
          "expression": "r.col_ts",
          "type": "string"
        },
        "txn_json": {
          "expression": "fmt.Sprintf(`\"%s|%s|%d|%s\"`, r.col_ts, r.col_ticker, r.col_qty, decimal2(r.col_price))",
          "type": "string"
        }
      },
      "indexes": {
        "idx_txns_account_id": "non_unique(account_id)"
      }
    }
  }
}

Script node 1_read_txns produces Cassandra table txns:

rowidaccount_idbatch_idxtstxn_json
2587346277616661029ARKQ02021-04-09"2021-04-09|IRDM|27117|42.95"
8058523756562434107ARKW02022-06-22"2022-06-22|GBTC|11246|12.96"
5691201099142348053ARKG02022-11-11"2022-11-11|CDXS|55689|6.84"
262360517248037460ARKG02022-04-29"2022-04-29|ACCD|1543713|5.56"
4010264079755232786ARKF02021-07-26"2021-07-26|PATH|92655|62.39"
580357491565213583ARKG02021-11-02"2021-11-02|VERV|6921|52.94"
8031803561601393301ARKK02021-07-21"2021-07-21|TXG|31240|184.06"
84347160782433589ARKF02021-12-07"2021-12-07|STNE|-20408|16.8"
3086360915248135679ARKQ02022-01-24"2022-01-24|BYDDY|-12210|61.7"
6799970392833002444ARKQ02021-07-19"2021-07-19|TCEHY|-1569|69.97"

Total 79487 rows

1_read_accounts

Script node 1_read_accounts:

{
  "1_read_accounts": {
    "type": "file_table",
    "desc": "Load accounts from csv",
    "r": {
      "urls": [
        "{dir_in}/accounts.csv"
      ],
      "csv": {
        "hdr_line_idx": 0,
        "first_data_line_idx": 1
      },
      "columns": {
        "col_account_id": {
          "csv": {
            "col_hdr": "account_id"
          },
          "col_type": "string"
        },
        "col_earliest_period_start": {
          "csv": {
            "col_hdr": "earliest_period_start"
          },
          "col_type": "string"
        }
      }
    },
    "w": {
      "name": "accounts",
      "having": "w.earliest_period_start <= \"{period_start_eod}\"",
      "fields": {
        "account_id": {
          "expression": "r.col_account_id",
          "type": "string"
        },
        "earliest_period_start": {
          "expression": "r.col_earliest_period_start",
          "type": "string"
        }
      }
    }
  }
}

Script node 1_read_accounts produces Cassandra table accounts:

rowidaccount_idbatch_idxearliest_period_start
1844852441546341834ARKK02020-12-31
7842096268861126874ARKQ02020-12-31
1309455181663667797ARKX02020-12-31
6959969552891013918ARKG02020-12-31
7032023660306557735ARKW02020-12-31
2620023871393000445ARKF02020-12-31

Total 6 rows

1_read_period_holdings

Script node 1_read_period_holdings:

{
  "1_read_period_holdings": {
    "type": "file_table",
    "desc": "Load holdings from csv",
    "r": {
      "urls": [
        "{dir_in}/holdings.csv"
      ],
      "csv": {
        "hdr_line_idx": 0,
        "first_data_line_idx": 1
      },
      "columns": {
        "col_eod": {
          "csv": {
            "col_hdr": "d"
          },
          "col_type": "string"
        },
        "col_account_id": {
          "csv": {
            "col_hdr": "account_id"
          },
          "col_type": "string"
        },
        "col_ticker": {
          "csv": {
            "col_hdr": "ticker"
          },
          "col_type": "string"
        },
        "col_qty": {
          "csv": {
            "col_hdr": "qty",
            "col_format": "%d"
          },
          "col_type": "int"
        }
      }
    },
    "w": {
      "name": "period_holdings",
      "having": "\"{period_start_eod}\" <= w.eod && w.eod <= \"{period_end_eod}\"",
      "fields": {
        "account_id": {
          "expression": "r.col_account_id",
          "type": "string"
        },
        "eod": {
          "expression": "r.col_eod",
          "type": "string"
        },
        "holding_json": {
          "expression": "fmt.Sprintf(`\"%s|%s|%d\"`, r.col_eod, r.col_ticker, r.col_qty)",
          "type": "string"
        }
      },
      "indexes": {
        "idx_period_holdings_account_id": "non_unique(account_id)"
      }
    }
  }
}

Script node 1_read_period_holdings produces Cassandra table period_holdings:

rowidaccount_idbatch_idxeodholding_json
120779861358948894ARKX02022-09-30"2022-09-30|KTOS|1741326"
4844171472413275121ARKW02021-12-31"2021-12-31|FSLY|85"
8094531162130969568ARKK02021-12-31"2021-12-31|TRMB|2040897"
5629687996933754035ARKQ02021-03-31"2021-03-31|NIO|0"
8320094651713409914ARKX02021-06-30"2021-06-30|MKFG|0"
403872239716635090ARKK02020-12-31"2020-12-31|TER|0"
1645336250561295416ARKX02021-09-30"2021-09-30|NVDA|7817"
1009997571983556129ARKG02021-03-31"2021-03-31|PACB|14204625"
3596585721417360193ARKG02022-06-30"2022-06-30|GH|794"
2581714236544905773ARKX02021-09-30"2021-09-30|PLTR|283345"

Total 3914 rows

2_account_txns_outer

Script node 2_account_txns_outer:

{
  "2_account_txns_outer": {
    "type": "table_lookup_table",
    "desc": "For each account, merge all txns into single json string",
    "start_policy": "{manual_if_multi}",
    "r": {
      "table": "accounts",
      "expected_batches_total": 10
    },
    "l": {
      "index_name": "idx_txns_account_id",
      "join_on": "r.account_id",
      "group": true,
      "join_type": "left"
    },
    "w": {
      "name": "account_txns",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "txns_json": {
          "expression": "string_agg(l.txn_json,\",\")",
          "type": "string"
        }
      }
    }
  }
}

Script node 2_account_txns_outer produces Cassandra table account_txns:

rowidaccount_idbatch_idxtxns_json
133491151350819234ARKX6"2022-05-10|ACHR|-13665|3.34","2021-05-07|DE|-3300|394.22","2022-09-06|MKFG|13016|2.37","2022-03-03| ... total length 105764
8354642669759315914ARKF9"2022-02-02|GLBE|216876|38.17","2022-06-14|PATH|-21861|17.75","2021-09-14|KSPI|-1404|113","2021-02-1 ... total length 281243
5231205311566687863ARKG6"2021-08-23|CLLS|-2099|13.48","2022-02-17|PLTR|-36|11.77","2021-09-14|INCY|-2994|72.24","2022-07-13| ... total length 636188
7522254119129968820ARKK2"2022-08-24|RBLX|45060|41.18","2021-07-26|DOCU|2120|305.77","2021-03-26|MCRB|-82173|18.87","2022-10- ... total length 596281
6858403599919527653ARKQ8"2021-07-14|TCEHY|-1152|71.83","2021-08-11|XONE|-2140|17.28","2021-04-16|AVAV|-7049|108.4","2022-01- ... total length 329102
8392498949058647234ARKW0"2021-08-13|ETSY|-1746|192.08","2022-12-22|NVDA|-452|153.39","2022-01-04|NET|5531|114.96","2022-01-2 ... total length 418834

Total 6 rows

2_account_period_holdings_outer

Script node 2_account_period_holdings_outer:

{
  "2_account_period_holdings_outer": {
    "type": "table_lookup_table",
    "desc": "For each account, merge all holdings into single json string",
    "start_policy": "{manual_if_multi}",
    "r": {
      "table": "accounts",
      "expected_batches_total": 10
    },
    "l": {
      "index_name": "idx_period_holdings_account_id",
      "join_on": "r.account_id",
      "group": true,
      "join_type": "left"
    },
    "w": {
      "name": "account_period_holdings",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "holdings_json": {
          "expression": "string_agg(l.holding_json,\",\")",
          "type": "string"
        }
      },
      "indexes": {
        "idx_account_period_holdings_account_id": "unique(account_id)"
      }
    }
  }
}

Script node 2_account_period_holdings_outer produces Cassandra table account_period_holdings:

rowidaccount_idbatch_idxholdings_json
3506489190793690889ARKG6"2022-12-31|NSTG|0","2021-03-31|SRPT|1851195","2022-09-30|DRNA|96","2022-03-31|ONVO|201189","2020-12 ... total length 18350
5521338660495980318ARKW0"2021-03-31|TEAM|284311","2022-12-31|SNOW|0","2021-03-31|Z|1367988","2021-09-30|TWTR|4475167","2022- ... total length 17187
6556059227660797012ARKF9"2021-09-30|COIN|795658","2022-12-31|GLBE|0","2021-09-30|FB|208953","2021-03-31|TCS LI|1332266","202 ... total length 14937
7597486096890187308ARKX6"2022-06-30|3690|203063","2022-09-30|KTOS|1741326","2022-03-31|SPR|191224","2021-12-31|SPR|202648"," ... total length 10055
851009968452317298ARKK2"2022-03-31|TER|475","2020-12-31|PLTR|0","2022-06-30|HUYA|189","2022-09-30|TCEHY|134","2021-03-31|TR ... total length 16870
3849863648629409774ARKQ8"2021-06-30|BZ|425891","2022-12-31|U|0","2021-09-30|EXPC|2683982","2022-09-30|NVDA|115352","2022-12- ... total length 13764

Total 6 rows

3_build_account_period_activity

Script node 3_build_account_period_activity:

{
  "3_build_account_period_activity": {
    "type": "table_lookup_table",
    "desc": "For each account, merge holdings and txns",
    "start_policy": "{manual_if_multi}",
    "r": {
      "table": "account_txns",
      "expected_batches_total": 10
    },
    "l": {
      "index_name": "idx_account_period_holdings_account_id",
      "join_on": "r.account_id",
      "group": false,
      "join_type": "left"
    },
    "w": {
      "name": "account_period_activity",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "txns_json": {
          "expression": " \"[\" + r.txns_json + \"]\" ",
          "type": "string"
        },
        "holdings_json": {
          "expression": " \"[\" + l.holdings_json + \"]\" ",
          "type": "string"
        }
      }
    }
  }
}

Script node 3_build_account_period_activity produces Cassandra table account_period_activity:

rowidaccount_idbatch_idxholdings_jsontxns_json
2618211313873703279ARKX0["2022-06-30|3690|203063","2022-09-30|KTOS|1741326","2022-03-31|SPR|191224","2021-12-31|SPR|202648", ... total length 10057["2022-05-10|ACHR|-13665|3.34","2021-05-07|DE|-3300|394.22","2022-09-06|MKFG|13016|2.37","2022-03-03 ... total length 105766
7730360701964962666ARKF4["2021-09-30|COIN|795658","2022-12-31|GLBE|0","2021-09-30|FB|208953","2021-03-31|TCS LI|1332266","20 ... total length 14939["2022-02-02|GLBE|216876|38.17","2022-06-14|PATH|-21861|17.75","2021-09-14|KSPI|-1404|113","2021-02- ... total length 281245
4848226235652605868ARKK1["2022-03-31|TER|475","2020-12-31|PLTR|0","2022-06-30|HUYA|189","2022-09-30|TCEHY|134","2021-03-31|T ... total length 16872["2022-08-24|RBLX|45060|41.18","2021-07-26|DOCU|2120|305.77","2021-03-26|MCRB|-82173|18.87","2022-10 ... total length 596283
7620804993772899826ARKQ7["2021-06-30|BZ|425891","2022-12-31|U|0","2021-09-30|EXPC|2683982","2022-09-30|NVDA|115352","2022-12 ... total length 13766["2021-07-14|TCEHY|-1152|71.83","2021-08-11|XONE|-2140|17.28","2021-04-16|AVAV|-7049|108.4","2022-01 ... total length 329104
6617328317653861530ARKG0["2022-12-31|NSTG|0","2021-03-31|SRPT|1851195","2022-09-30|DRNA|96","2022-03-31|ONVO|201189","2020-1 ... total length 18352["2021-08-23|CLLS|-2099|13.48","2022-02-17|PLTR|-36|11.77","2021-09-14|INCY|-2994|72.24","2022-07-13 ... total length 636190
3227819590137877318ARKW2["2021-03-31|TEAM|284311","2022-12-31|SNOW|0","2021-03-31|Z|1367988","2021-09-30|TWTR|4475167","2022 ... total length 17189["2021-08-13|ETSY|-1746|192.08","2022-12-22|NVDA|-452|153.39","2022-01-04|NET|5531|114.96","2022-01- ... total length 418836

Total 6 rows

4_calc_account_period_perf

Script node 4_calc_account_period_perf:

{
  "4_calc_account_period_perf": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "py_calc",
    "desc": "Apply Python-based calculations to account holdings and txns",
    "r": {
      "table": "account_period_activity",
      "expected_batches_total": 10
    },
    "p": {
      "python_code_urls": [
        "{dir_py}/portfolio_test_company_info_provider.py",
        "{dir_py}/portfolio_test_eod_price_provider.py",
        "{dir_py}/portfolio_calc.py"
      ],
      "calculated_fields": {
        "perf_json": {
          "expression": "txns_and_holdings_to_twr_cagr_by_sector_year_quarter_json(\"{period_start_eod}\", \"{period_end_eod}\", r.holdings_json, r.txns_json, PortfolioTestEodPriceProvider, PortfolioTestCompanyInfoProvider)",
          "type": "string"
        }
      }
    },
    "w": {
      "name": "account_period_perf",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "perf_json": {
          "expression": "p.perf_json",
          "type": "string"
        }
      }
    }
  }
}

Script node 4_calc_account_period_perf produces Cassandra table account_period_perf:

rowidaccount_idbatch_idxperf_json
5940186441348519617ARKX0{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 4544
4849645507883377956ARKF8{"2021": {"All": {"cagr": -0.1912, "twr": -0.1912}, "Communication Services": {"cagr": -0.4067, "twr ... total length 4679
2228990773412516205ARKK1{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 4731
2300653478629187302ARKW9{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 4758
4131923837221936723ARKG3{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 4533
3013935535393510462ARKQ6{"2021": {"All": {"cagr": -0.0152, "twr": -0.0152}, "Communication Services": {"cagr": 0.062, "twr": ... total length 4674

Total 6 rows

5_tag_by_period

Script node 5_tag_by_period:

{
  "5_tag_by_period": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "tag_and_denormalize",
    "desc": "Tag accounts by period name",
    "r": {
      "table": "account_period_perf",
      "expected_batches_total": 10
    },
    "p": {
      "tag_field_name": "period",
      "tag_criteria": {
        "2021": "re.MatchString(`\"2021\":`, r.perf_json)",
        "2021Q1": "re.MatchString(`\"2021Q1\":`, r.perf_json)",
        "2021Q2": "re.MatchString(`\"2021Q2\":`, r.perf_json)",
        "2021Q3": "re.MatchString(`\"2021Q3\":`, r.perf_json)",
        "2021Q4": "re.MatchString(`\"2021Q4\":`, r.perf_json)",
        "2022": "re.MatchString(`\"2022\":`, r.perf_json)",
        "2022Q1": "re.MatchString(`\"2022Q1\":`, r.perf_json)",
        "2022Q2": "re.MatchString(`\"2022Q2\":`, r.perf_json)",
        "2022Q3": "re.MatchString(`\"2022Q3\":`, r.perf_json)",
        "2022Q4": "re.MatchString(`\"2022Q4\":`, r.perf_json)"
      }
    },
    "w": {
      "name": "account_period_perf_by_period",
      "fields": {
        "period": {
          "expression": "p.period",
          "type": "string"
        },
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "perf_json": {
          "expression": "r.perf_json",
          "type": "string"
        }
      }
    }
  }
}

Script node 5_tag_by_period produces Cassandra table account_period_perf_by_period:

rowidaccount_idbatch_idxperf_jsonperiod
16258496403682928ARKW5{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582021Q1
6409235141644625130ARKK5{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312022Q1
485805896765640535ARKQ7{"2021": {"All": {"cagr": -0.0152, "twr": -0.0152}, "Communication Services": {"cagr": 0.062, "twr": ... total length 46742022Q2
6234490550527682285ARKW5{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582022Q1
6797782836296021287ARKG0{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 45332021Q3
5525927667461070012ARKW5{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582022Q4
4152539243068825420ARKX1{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 45442021Q2
7078341914729386756ARKW5{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582021Q4
3921321870495637113ARKW5{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582021
579969918966745166ARKG0{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 45332021

Total 60 rows

5_tag_by_sector

Script node 5_tag_by_sector:

{
  "5_tag_by_sector": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "tag_and_denormalize",
    "desc": "Tag accounts by sector",
    "r": {
      "table": "account_period_perf_by_period",
      "expected_batches_total": 10
    },
    "p": {
      "tag_field_name": "sector",
      "tag_criteria": {
        "All": "re.MatchString(`\"All\":`, r.perf_json)",
        "Communication Services": "re.MatchString(`\"Communication Services\":`, r.perf_json)",
        "Consumer Cyclical": "re.MatchString(`\"Consumer Cyclical\":`, r.perf_json)",
        "Consumer Defensive": "re.MatchString(`\"Consumer Defensive\":`, r.perf_json)",
        "Financial Services": "re.MatchString(`\"Financial Services\":`, r.perf_json)",
        "Healthcare": "re.MatchString(`\"Healthcare\":`, r.perf_json)",
        "Industrials": "re.MatchString(`\"Industrials\":`, r.perf_json)",
        "Real Estate": "re.MatchString(`\"Real Estate\":`, r.perf_json)",
        "Technology": "re.MatchString(`\"Technology\":`, r.perf_json)"
      }
    },
    "w": {
      "name": "account_period_perf_by_period_sector",
      "fields": {
        "period": {
          "expression": "r.period",
          "type": "string"
        },
        "sector": {
          "expression": "p.sector",
          "type": "string"
        },
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "perf_json": {
          "expression": "r.perf_json",
          "type": "string"
        }
      }
    }
  }
}

Script node 5_tag_by_sector produces Cassandra table account_period_perf_by_period_sector:

rowidaccount_idbatch_idxperf_jsonperiodsector
7168965143715713226ARKK2{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312022Healthcare
7269178601070942003ARKQ5{"2021": {"All": {"cagr": -0.0152, "twr": -0.0152}, "Communication Services": {"cagr": 0.062, "twr": ... total length 46742022Q4Financial Services
5295164466069382562ARKQ3{"2021": {"All": {"cagr": -0.0152, "twr": -0.0152}, "Communication Services": {"cagr": 0.062, "twr": ... total length 46742021Q1Communication Services
9063329390583926200ARKW2{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582022Q2Consumer Defensive
5342794677629852851ARKG5{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 45332022Q1Financial Services
7414104404588080305ARKF4{"2021": {"All": {"cagr": -0.1912, "twr": -0.1912}, "Communication Services": {"cagr": -0.4067, "twr ... total length 46792021Q3Communication Services
6115848667962190958ARKX2{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 45442021Q1Communication Services
7658207571250279737ARKK4{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312021Q3Consumer Cyclical
5851178065865165370ARKK2{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312022Q4All
6085460532978503518ARKW4{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582021Q1All

Total 540 rows

6_perf_json_to_columns

Script node 6_perf_json_to_columns:

{
  "6_perf_json_to_columns": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "py_calc",
    "desc": "Use Python to read perf json and save stats as columns",
    "r": {
      "table": "account_period_perf_by_period_sector",
      "expected_batches_total": 100
    },
    "p": {
      "python_code_urls": [
        "{dir_py}/json_to_columns.py"
      ],
      "calculated_fields": {
        "twr": {
          "expression": "json_to_twr(r.perf_json, r.period, r.sector)",
          "type": "float"
        },
        "cagr": {
          "expression": "json_to_cagr(r.perf_json, r.period, r.sector)",
          "type": "float"
        }
      }
    },
    "w": {
      "name": "account_period_sector_twr_cagr",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "period": {
          "expression": "r.period",
          "type": "string"
        },
        "sector": {
          "expression": "r.sector",
          "type": "string"
        },
        "twr": {
          "expression": "p.twr",
          "type": "float"
        },
        "cagr": {
          "expression": "p.cagr",
          "type": "float"
        }
      }
    }
  }
}

Script node 6_perf_json_to_columns produces Cassandra table account_period_sector_twr_cagr:

rowidaccount_idbatch_idxcagrperiodsectortwr
8137916471552369728ARKF570.362022Q4Real Estate0.09
2235881837030019840ARKX98-30.932021Q4Consumer Cyclical-8.91
1549235067597828388ARKQ34-56.392021Q3Consumer Defensive-18.87
7821394913153981096ARKG8502022Q3Consumer Cyclical0
2115971575274467196ARKW29-33.662021Q3Technology-9.83
69418455958825887ARKK047.32021Q3Consumer Cyclical10.25
2515764555023439805ARKG2602021Q2Industrials0
1568926299271490416ARKW16-36.752022Q4All-10.91
6966961175879963676ARKQ4502021Real Estate0
350162544516462577ARKX4202022Q4Real Estate0

Total 540 rows

7_file_account_period_sector_perf

Script node 7_file_account_period_sector_perf:

{
  "7_file_account_period_sector_perf": {
    "type": "table_file",
    "desc": "Write yearly/quarterly perf results by sector to CSV file",
    "r": {
      "table": "account_period_sector_twr_cagr"
    },
    "w": {
      "top": {
        "order": "account_id,period,sector"
      },
      "url_template": "{dir_out}/account_period_sector_perf.csv",
      "columns": [
        {
          "csv": {
            "header": "ARK fund",
            "format": "%s"
          },
          "name": "account_id",
          "expression": "r.account_id",
          "type": "string"
        },
        {
          "csv": {
            "header": "Period",
            "format": "%s"
          },
          "name": "period",
          "expression": "r.period",
          "type": "string"
        },
        {
          "csv": {
            "header": "Sector",
            "format": "%s"
          },
          "name": "sector",
          "expression": "r.sector",
          "type": "string"
        },
        {
          "csv": {
            "header": "Time-weighted annualized return %",
            "format": "%.2f"
          },
          "name": "cagr",
          "expression": "r.cagr",
          "type": "float"
        }
      ]
    }
  }
}

Script node 7_file_account_period_sector_perf produces data file:

ARK fundPeriodSectorTime-weighted annualized return %
ARKF2021All-19.12
ARKF2021Communication Services-40.67
ARKF2021Consumer Cyclical-22.28
ARKF2021Consumer Defensive0.00
ARKF2021Financial Services16.53
ARKF2021Healthcare-55.78
ARKF2021Industrials-14.41
ARKF2021Real Estate-44.99
ARKF2021Technology-17.33
ARKF2021Q1All176.50

Total 540 rows

7_file_account_year_perf

Script node 7_file_account_year_perf:

{
  "7_file_account_year_perf": {
    "type": "table_file",
    "desc": "Write yearly perf results for all sectors to CSV file",
    "r": {
      "table": "account_period_sector_twr_cagr"
    },
    "w": {
      "top": {
        "order": "account_id,period"
      },
      "having": "len(w.period) == 4 && w.sector == \"All\"",
      "url_template": "{dir_out}/account_year_perf.csv",
      "columns": [
        {
          "csv": {
            "header": "ARK fund",
            "format": "%s"
          },
          "name": "account_id",
          "expression": "r.account_id",
          "type": "string"
        },
        {
          "csv": {
            "header": "Period",
            "format": "%s"
          },
          "name": "period",
          "expression": "r.period",
          "type": "string"
        },
        {
          "csv": {
            "header": "Sector",
            "format": "%s"
          },
          "name": "sector",
          "expression": "r.sector",
          "type": "string"
        },
        {
          "csv": {
            "header": "Time-weighted annualized return %",
            "format": "%.2f"
          },
          "name": "cagr",
          "expression": "r.cagr",
          "type": "float"
        }
      ]
    }
  }
}

Script node 7_file_account_year_perf produces data file:

ARK fundPeriodSectorTime-weighted annualized return %
ARKF2021All-19.12
ARKF2022All-63.35
ARKG2021All-33.84
ARKG2022All-50.87
ARKK2021All-23.98
ARKK2022All-68.07
ARKQ2021All-1.52
ARKQ2022All-61.45
ARKW2021All-19.49
ARKW2022All-65.28

Total 12 rows