portfolio_quicktest script and data

Input files

Accounts

account_idearliest_period_start
ARKK2020-12-31
ARKW2020-12-31
ARKF2020-12-31
ARKQ2020-12-31
ARKG2020-12-31
ARKX2020-12-31

Total 6 rows

Transactions

tsaccount_idtickerqtyprice
2020-10-16ARKKTSLA2466031439.67
2020-10-20ARKKTSLA18992421.94
2020-10-21ARKKTSLA2374422.64
2020-10-22ARKKTSLA7122425.79
2020-10-23ARKKTSLA7122420.63
2020-10-27ARKKTSLA1187424.68
2020-10-28ARKKTSLA4748406.02
2020-10-29ARKKTSLA14244410.83
2020-11-02ARKKTSLA3561400.51
2020-11-03ARKKTSLA-151615423.9

Total 88459 rows

Holdings

account_iddtickerqty
ARKK2020-09-30TSLA0
ARKK2020-12-31TSLA2660439
ARKK2021-03-31TSLA3757949
ARKK2021-06-30TSLA3566520
ARKK2021-09-30TSLA2545118
ARKK2021-12-31TSLA1198876
ARKK2022-03-31TSLA1094098
ARKK2022-06-30TSLA1023093
ARKK2022-09-30TSLA2927507
ARKK2022-12-31TSLA0

Total 4300 rows

1_read_txns

Script node 1_read_txns:

{
  "1_read_txns": {
    "type": "file_table",
    "desc": "Load txns from csv",
    "explicit_run_only": true,
    "r": {
      "urls": [
        "{dir_in}/txns.csv"
      ],
      "csv": {
        "hdr_line_idx": 0,
        "first_data_line_idx": 1
      },
      "columns": {
        "col_ts": {
          "csv": {
            "col_hdr": "ts"
          },
          "col_type": "string"
        },
        "col_account_id": {
          "csv": {
            "col_hdr": "account_id"
          },
          "col_type": "string"
        },
        "col_ticker": {
          "csv": {
            "col_hdr": "ticker"
          },
          "col_type": "string"
        },
        "col_qty": {
          "csv": {
            "col_hdr": "qty",
            "col_format": "%d"
          },
          "col_type": "int"
        },
        "col_price": {
          "csv": {
            "col_hdr": "price",
            "col_format": "%f"
          },
          "col_type": "float"
        }
      }
    },
    "w": {
      "name": "txns",
      "having": "w.ts > \"{period_start_eod}\" && w.ts <= \"{period_end_eod}\"",
      "fields": {
        "account_id": {
          "expression": "r.col_account_id",
          "type": "string"
        },
        "ts": {
          "expression": "r.col_ts",
          "type": "string"
        },
        "txn_json": {
          "expression": "strings.ReplaceAll(fmt.Sprintf(`{'ts':'%s','t':'%s','q':%d,'p':%s}`, r.col_ts, r.col_ticker, r.col_qty, decimal2(r.col_price)), `'`,`\"`)",
          "type": "string"
        }
      },
      "indexes": {
        "idx_txns_account_id": "non_unique(account_id)"
      }
    }
  }
}

Script node 1_read_txns produces Cassandra table txns:

rowidaccount_idbatch_idxtstxn_json
6094664611844062359ARKW02021-04-14{"ts":"2021-04-14","t":"FSLY","q":1532,"p":71.92}
8537883289672171463ARKF02022-02-03{"ts":"2022-02-03","t":"SNAP","q":-933,"p":24.5}
2749076004491469954ARKQ02021-11-12{"ts":"2021-11-12","t":"BIDU","q":-368,"p":170.57}
7445773725017646389ARKK02021-05-07{"ts":"2021-05-07","t":"TWLO","q":37128,"p":307.15}
8993784235489443377ARKK02021-11-12{"ts":"2021-11-12","t":"SKLZ","q":-26172,"p":12.5}
206487046063108640ARKW02022-09-20{"ts":"2022-09-20","t":"NNDM","q":-3,"p":2.45}
3354548830737968360ARKG02022-10-20{"ts":"2022-10-20","t":"ONVO","q":896,"p":1.71}
8851154163572220846ARKQ02022-10-28{"ts":"2022-10-28","t":"AVAV","q":-1166,"p":90.14}
151913958205174664ARKG02021-05-04{"ts":"2021-05-04","t":"GH","q":-7216,"p":148.19}
8571157745347545844ARKK02021-12-29{"ts":"2021-12-29","t":"HOOD","q":-43488,"p":17.11}

Total 79487 rows

1_read_accounts

Script node 1_read_accounts:

{
  "1_read_accounts": {
    "type": "file_table",
    "desc": "Load accounts from csv",
    "explicit_run_only": true,
    "r": {
      "urls": [
        "{dir_in}/accounts.csv"
      ],
      "csv": {
        "hdr_line_idx": 0,
        "first_data_line_idx": 1
      },
      "columns": {
        "col_account_id": {
          "csv": {
            "col_hdr": "account_id"
          },
          "col_type": "string"
        },
        "col_earliest_period_start": {
          "csv": {
            "col_hdr": "earliest_period_start"
          },
          "col_type": "string"
        }
      }
    },
    "w": {
      "name": "accounts",
      "having": "w.earliest_period_start <= \"{period_start_eod}\"",
      "fields": {
        "account_id": {
          "expression": "r.col_account_id",
          "type": "string"
        },
        "earliest_period_start": {
          "expression": "r.col_earliest_period_start",
          "type": "string"
        }
      }
    }
  }
}

Script node 1_read_accounts produces Cassandra table accounts:

rowidaccount_idbatch_idxearliest_period_start
748401470040242660ARKK02020-12-31
6621936461880082804ARKF02020-12-31
1522209384545663127ARKW02020-12-31
2380715082357295080ARKG02020-12-31
5559895917703115577ARKQ02020-12-31
6520205207003275793ARKX02020-12-31

Total 6 rows

1_read_period_holdings

Script node 1_read_period_holdings:

{
  "1_read_period_holdings": {
    "type": "file_table",
    "desc": "Load holdings from csv",
    "explicit_run_only": true,
    "r": {
      "urls": [
        "{dir_in}/holdings.csv"
      ],
      "csv": {
        "hdr_line_idx": 0,
        "first_data_line_idx": 1
      },
      "columns": {
        "col_eod": {
          "csv": {
            "col_hdr": "d"
          },
          "col_type": "string"
        },
        "col_account_id": {
          "csv": {
            "col_hdr": "account_id"
          },
          "col_type": "string"
        },
        "col_ticker": {
          "csv": {
            "col_hdr": "ticker"
          },
          "col_type": "string"
        },
        "col_qty": {
          "csv": {
            "col_hdr": "qty",
            "col_format": "%d"
          },
          "col_type": "int"
        }
      }
    },
    "w": {
      "name": "period_holdings",
      "having": "\"{period_start_eod}\" <= w.eod && w.eod <= \"{period_end_eod}\"",
      "fields": {
        "account_id": {
          "expression": "r.col_account_id",
          "type": "string"
        },
        "eod": {
          "expression": "r.col_eod",
          "type": "string"
        },
        "holding_json": {
          "expression": "fmt.Sprintf(`{\"d\":\"%s\",\"t\":\"%s\",\"q\":%d}`, r.col_eod, r.col_ticker, r.col_qty)",
          "type": "string"
        }
      },
      "indexes": {
        "idx_period_holdings_account_id": "non_unique(account_id)"
      }
    }
  }
}

Script node 1_read_period_holdings produces Cassandra table period_holdings:

rowidaccount_idbatch_idxeodholding_json
523927006809746992ARKW02022-03-31{"d":"2022-03-31","t":"PLTR","q":50}
7964715912605789246ARKW02022-06-30{"d":"2022-06-30","t":"VCYT","q":962271}
7321126894636851304ARKK02022-03-31{"d":"2022-03-31","t":"IRDM","q":913}
159541754259824109ARKK02021-06-30{"d":"2021-06-30","t":"PACB","q":6470871}
3147847123612473758ARKG02021-06-30{"d":"2021-06-30","t":"TXG","q":1114540}
6088195109262281354ARKW02021-03-31{"d":"2021-03-31","t":"SNAP","q":1293117}
3371442508833222747ARKQ02021-06-30{"d":"2021-06-30","t":"ADSK","q":119}
8119438770895972412ARKF02022-12-31{"d":"2022-12-31","t":"3690","q":0}
760021395079726942ARKF02021-12-31{"d":"2021-12-31","t":"6060","q":6081}
464401136269549803ARKG02022-12-31{"d":"2022-12-31","t":"SYRS","q":0}

Total 3914 rows

2_account_txns_outer

Script node 2_account_txns_outer:

{
  "2_account_txns_outer": {
    "type": "table_lookup_table",
    "desc": "For each account, merge all txns into single json string",
    "r": {
      "table": "accounts",
      "expected_batches_total": 10
    },
    "l": {
      "index_name": "idx_txns_account_id",
      "join_on": "r.account_id",
      "group": true,
      "join_type": "left"
    },
    "w": {
      "name": "account_txns",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "txns_json": {
          "expression": "string_agg(l.txn_json,\",\")",
          "type": "string"
        }
      }
    }
  }
}

Script node 2_account_txns_outer produces Cassandra table account_txns:

rowidaccount_idbatch_idxtxns_json
7337640464596222089ARKX6{"ts":"2021-04-23","t":"AIR","q":-286,"p":119.13},{"ts":"2021-07-20","t":"AVAV","q":8887,"p":96.85}, ... total length 181595
3719903644314909584ARKF5{"ts":"2022-02-04","t":"Z","q":-2835,"p":48.94},{"ts":"2022-12-31","t":"IPOB","q":-872035,"p":29.5}, ... total length 481037
7773052335557492403ARKG5{"ts":"2022-05-10","t":"PSNL","q":86800,"p":4.4},{"ts":"2022-01-21","t":"IOVA","q":-15248,"p":13.96} ... total length 1084244
2948673979393982941ARKW5{"ts":"2021-05-28","t":"NFLX","q":-465,"p":502.81},{"ts":"2021-04-30","t":"PYPL","q":-34800,"p":262. ... total length 714787
4577965328217475423ARKK0{"ts":"2021-03-04","t":"SQ","q":266809,"p":218.41},{"ts":"2021-03-11","t":"SQ","q":120492,"p":241.72 ... total length 1011871
6410928427592534468ARKQ3{"ts":"2022-06-15","t":"AVAV","q":-38717,"p":83.61},{"ts":"2022-05-11","t":"SNPS","q":-117,"p":260.8 ... total length 563105

Total 6 rows

2_account_period_holdings_outer

Script node 2_account_period_holdings_outer:

{
  "2_account_period_holdings_outer": {
    "type": "table_lookup_table",
    "desc": "For each account, merge all holdings into single json string",
    "r": {
      "table": "accounts",
      "expected_batches_total": 10
    },
    "l": {
      "index_name": "idx_period_holdings_account_id",
      "join_on": "r.account_id",
      "group": true,
      "join_type": "left"
    },
    "w": {
      "name": "account_period_holdings",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "holdings_json": {
          "expression": "string_agg(l.holding_json,\",\")",
          "type": "string"
        }
      },
      "indexes": {
        "idx_account_period_holdings_account_id": "unique(account_id)"
      }
    }
  }
}

Script node 2_account_period_holdings_outer produces Cassandra table account_period_holdings:

rowidaccount_idbatch_idxholdings_json
3798263933557717806ARKF5{"d":"2022-06-30","t":"ROKU","q":0},{"d":"2022-06-30","t":"BABA","q":200},{"d":"2021-06-30","t":"HDB ... total length 25305
2297870030956086858ARKK0{"d":"2022-12-31","t":"PATH","q":0},{"d":"2022-12-31","t":"MTLS","q":0},{"d":"2021-03-31","t":"COIN" ... total length 28246
6490459023829990771ARKX6{"d":"2021-06-30","t":"XLNX","q":17088},{"d":"2022-03-31","t":"GOOG","q":2899},{"d":"2021-06-30","t" ... total length 17095
7126461419439144862ARKW5{"d":"2022-03-31","t":"FTCH","q":101},{"d":"2021-06-30","t":"JD","q":1452974},{"d":"2022-03-31","t": ... total length 29139
5291018643877674808ARKQ3{"d":"2021-12-31","t":"ROK","q":1248},{"d":"2022-12-31","t":"WKHS","q":0},{"d":"2022-06-30","t":"ISR ... total length 23268
3073048863807356155ARKG5{"d":"2022-03-31","t":"CLLS","q":606},{"d":"2021-03-31","t":"PRME","q":0},{"d":"2020-12-31","t":"PHR ... total length 30734

Total 6 rows

3_build_account_period_activity

Script node 3_build_account_period_activity:

{
  "3_build_account_period_activity": {
    "type": "table_lookup_table",
    "desc": "For each account, merge holdings and txns",
    "r": {
      "table": "account_txns",
      "expected_batches_total": 10
    },
    "l": {
      "index_name": "idx_account_period_holdings_account_id",
      "join_on": "r.account_id",
      "group": false,
      "join_type": "left"
    },
    "w": {
      "name": "account_period_activity",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "txns_json": {
          "expression": " \"[\" + r.txns_json + \"]\" ",
          "type": "string"
        },
        "holdings_json": {
          "expression": " \"[\" + l.holdings_json + \"]\" ",
          "type": "string"
        }
      }
    }
  }
}

Script node 3_build_account_period_activity produces Cassandra table account_period_activity:

rowidaccount_idbatch_idxholdings_jsontxns_json
2832510643465443886ARKW6[{"d":"2022-03-31","t":"FTCH","q":101},{"d":"2021-06-30","t":"JD","q":1452974},{"d":"2022-03-31","t" ... total length 29141[{"ts":"2021-05-28","t":"NFLX","q":-465,"p":502.81},{"ts":"2021-04-30","t":"PYPL","q":-34800,"p":262 ... total length 714789
8724188995369700700ARKQ9[{"d":"2021-12-31","t":"ROK","q":1248},{"d":"2022-12-31","t":"WKHS","q":0},{"d":"2022-06-30","t":"IS ... total length 23270[{"ts":"2022-06-15","t":"AVAV","q":-38717,"p":83.61},{"ts":"2022-05-11","t":"SNPS","q":-117,"p":260. ... total length 563107
3797016840160679147ARKX1[{"d":"2021-06-30","t":"XLNX","q":17088},{"d":"2022-03-31","t":"GOOG","q":2899},{"d":"2021-06-30","t ... total length 17097[{"ts":"2021-04-23","t":"AIR","q":-286,"p":119.13},{"ts":"2021-07-20","t":"AVAV","q":8887,"p":96.85} ... total length 181597
6149240839756633886ARKK4[{"d":"2022-12-31","t":"PATH","q":0},{"d":"2022-12-31","t":"MTLS","q":0},{"d":"2021-03-31","t":"COIN ... total length 28248[{"ts":"2021-03-04","t":"SQ","q":266809,"p":218.41},{"ts":"2021-03-11","t":"SQ","q":120492,"p":241.7 ... total length 1011873
1335953350070866378ARKF2[{"d":"2022-06-30","t":"ROKU","q":0},{"d":"2022-06-30","t":"BABA","q":200},{"d":"2021-06-30","t":"HD ... total length 25307[{"ts":"2022-02-04","t":"Z","q":-2835,"p":48.94},{"ts":"2022-12-31","t":"IPOB","q":-872035,"p":29.5} ... total length 481039
1945457872909272069ARKG8[{"d":"2022-03-31","t":"CLLS","q":606},{"d":"2021-03-31","t":"PRME","q":0},{"d":"2020-12-31","t":"PH ... total length 30736[{"ts":"2022-05-10","t":"PSNL","q":86800,"p":4.4},{"ts":"2022-01-21","t":"IOVA","q":-15248,"p":13.96 ... total length 1084246

Total 6 rows

4_calc_account_period_perf

Script node 4_calc_account_period_perf:

{
  "4_calc_account_period_perf": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "py_calc",
    "desc": "Apply Python-based calculations to account holdings and txns",
    "r": {
      "table": "account_period_activity",
      "expected_batches_total": 10
    },
    "p": {
      "python_code_urls": [
        "{dir_py}/portfolio_test_company_info_provider.py",
        "{dir_py}/portfolio_test_eod_price_provider.py",
        "{dir_py}/portfolio_calc.py"
      ],
      "calculated_fields": {
        "perf_json": {
          "expression": "txns_and_holdings_to_twr_cagr_by_sector_year_quarter_json(\"{period_start_eod}\", \"{period_end_eod}\", r.holdings_json, r.txns_json, PortfolioTestEodPriceProvider, PortfolioTestCompanyInfoProvider)",
          "type": "string"
        }
      }
    },
    "w": {
      "name": "account_period_perf",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "perf_json": {
          "expression": "p.perf_json",
          "type": "string"
        }
      }
    }
  }
}

Script node 4_calc_account_period_perf produces Cassandra table account_period_perf:

rowidaccount_idbatch_idxperf_json
6887364451659257268ARKF6{"2021": {"All": {"cagr": -0.1912, "twr": -0.1912}, "Communication Services": {"cagr": -0.4067, "twr ... total length 4679
5754198081589614465ARKK6{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 4731
32049402890064872ARKW3{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 4758
4248855088935145245ARKQ9{"2021": {"All": {"cagr": -0.0152, "twr": -0.0152}, "Communication Services": {"cagr": 0.062, "twr": ... total length 4674
8662263145276984767ARKX9{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 4544
1210483319197080237ARKG5{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 4533

Total 6 rows

5_tag_by_period

Script node 5_tag_by_period:

{
  "5_tag_by_period": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "tag_and_denormalize",
    "desc": "Tag accounts by period name",
    "r": {
      "table": "account_period_perf",
      "expected_batches_total": 10
    },
    "p": {
      "tag_field_name": "period",
      "tag_criteria": {
        "2021": "re.MatchString(`\"2021\":`, r.perf_json)",
        "2021Q1": "re.MatchString(`\"2021Q1\":`, r.perf_json)",
        "2021Q2": "re.MatchString(`\"2021Q2\":`, r.perf_json)",
        "2021Q3": "re.MatchString(`\"2021Q3\":`, r.perf_json)",
        "2021Q4": "re.MatchString(`\"2021Q4\":`, r.perf_json)",
        "2022": "re.MatchString(`\"2022\":`, r.perf_json)",
        "2022Q1": "re.MatchString(`\"2022Q1\":`, r.perf_json)",
        "2022Q2": "re.MatchString(`\"2022Q2\":`, r.perf_json)",
        "2022Q3": "re.MatchString(`\"2022Q3\":`, r.perf_json)",
        "2022Q4": "re.MatchString(`\"2022Q4\":`, r.perf_json)"
      }
    },
    "w": {
      "name": "account_period_perf_by_period",
      "fields": {
        "period": {
          "expression": "p.period",
          "type": "string"
        },
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "perf_json": {
          "expression": "r.perf_json",
          "type": "string"
        }
      }
    }
  }
}

Script node 5_tag_by_period produces Cassandra table account_period_perf_by_period:

rowidaccount_idbatch_idxperf_jsonperiod
3812613906073144497ARKK7{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312022Q4
7276369387634914856ARKX4{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 45442021Q3
6738115215161291018ARKW0{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582022Q2
7605309852895153944ARKG6{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 45332021Q1
1922480560878025461ARKF2{"2021": {"All": {"cagr": -0.1912, "twr": -0.1912}, "Communication Services": {"cagr": -0.4067, "twr ... total length 46792022Q3
7092756260584728207ARKK7{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312021
4467122228499360750ARKG6{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 45332021
6162316215288009881ARKX4{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 45442021Q2
1759821806393733334ARKF2{"2021": {"All": {"cagr": -0.1912, "twr": -0.1912}, "Communication Services": {"cagr": -0.4067, "twr ... total length 46792021Q2
3848238608544572988ARKW0{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582022Q4

Total 60 rows

5_tag_by_sector

Script node 5_tag_by_sector:

{
  "5_tag_by_sector": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "tag_and_denormalize",
    "desc": "Tag accounts by sector",
    "r": {
      "table": "account_period_perf_by_period",
      "expected_batches_total": 10
    },
    "p": {
      "tag_field_name": "sector",
      "tag_criteria": {
        "All": "re.MatchString(`\"All\":`, r.perf_json)",
        "Communication Services": "re.MatchString(`\"Communication Services\":`, r.perf_json)",
        "Consumer Cyclical": "re.MatchString(`\"Consumer Cyclical\":`, r.perf_json)",
        "Consumer Defensive": "re.MatchString(`\"Consumer Defensive\":`, r.perf_json)",
        "Financial Services": "re.MatchString(`\"Financial Services\":`, r.perf_json)",
        "Healthcare": "re.MatchString(`\"Healthcare\":`, r.perf_json)",
        "Industrials": "re.MatchString(`\"Industrials\":`, r.perf_json)",
        "Real Estate": "re.MatchString(`\"Real Estate\":`, r.perf_json)",
        "Technology": "re.MatchString(`\"Technology\":`, r.perf_json)"
      }
    },
    "w": {
      "name": "account_period_perf_by_period_sector",
      "fields": {
        "period": {
          "expression": "r.period",
          "type": "string"
        },
        "sector": {
          "expression": "p.sector",
          "type": "string"
        },
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "perf_json": {
          "expression": "r.perf_json",
          "type": "string"
        }
      }
    }
  }
}

Script node 5_tag_by_sector produces Cassandra table account_period_perf_by_period_sector:

rowidaccount_idbatch_idxperf_jsonperiodsector
4880353688536716330ARKX7{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 45442022Q3All
5966907642021688353ARKG2{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 45332021Q2Communication Services
216345607086265348ARKW5{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582021Q3Financial Services
444465478572115389ARKW9{"2021": {"All": {"cagr": -0.1949, "twr": -0.1949}, "Communication Services": {"cagr": -0.293, "twr" ... total length 47582022Q1Industrials
4655715813499361957ARKK9{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312022Q1Financial Services
3105771363861377355ARKQ6{"2021": {"All": {"cagr": -0.0152, "twr": -0.0152}, "Communication Services": {"cagr": 0.062, "twr": ... total length 46742021Q1Consumer Defensive
1473264958443625609ARKK2{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312022Q4Technology
4441230069590019727ARKX0{"2021": {"All": {"cagr": -0.1055, "twr": -0.1055}, "Communication Services": {"cagr": 0.1719, "twr" ... total length 45442021Q2Communication Services
347198857808667031ARKG3{"2021": {"All": {"cagr": -0.3384, "twr": -0.3384}, "Communication Services": {"cagr": 0.1666, "twr" ... total length 45332022Q3All
5194216653932103424ARKK5{"2021": {"All": {"cagr": -0.2398, "twr": -0.2398}, "Communication Services": {"cagr": -0.3183, "twr ... total length 47312022Q2Healthcare

Total 540 rows

6_perf_json_to_columns

Script node 6_perf_json_to_columns:

{
  "6_perf_json_to_columns": {
    "type": "table_custom_tfm_table",
    "custom_proc_type": "py_calc",
    "desc": "Use Python to read perf json and save stats as columns",
    "r": {
      "table": "account_period_perf_by_period_sector",
      "expected_batches_total": 100
    },
    "p": {
      "python_code_urls": [
        "{dir_py}/json_to_columns.py"
      ],
      "calculated_fields": {
        "twr": {
          "expression": "json_to_twr(r.perf_json, r.period, r.sector)",
          "type": "float"
        },
        "cagr": {
          "expression": "json_to_cagr(r.perf_json, r.period, r.sector)",
          "type": "float"
        }
      }
    },
    "w": {
      "name": "account_period_sector_twr_cagr",
      "fields": {
        "account_id": {
          "expression": "r.account_id",
          "type": "string"
        },
        "period": {
          "expression": "r.period",
          "type": "string"
        },
        "sector": {
          "expression": "r.sector",
          "type": "string"
        },
        "twr": {
          "expression": "p.twr",
          "type": "float"
        },
        "cagr": {
          "expression": "p.cagr",
          "type": "float"
        }
      }
    }
  }
}

Script node 6_perf_json_to_columns produces Cassandra table account_period_sector_twr_cagr:

rowidaccount_idbatch_idxcagrperiodsectortwr
2051940333335580608ARKF30-42.32022Q4Communication Services-12.94
467099065024923780ARKG16-66.062022Q1All-23.39
4793009141456092103ARKK42-85.252022Q2Communication Services-37.95
1668642860725665609ARKG50-2.492022Q2Communication Services-0.63
4172230767046282068ARKF63-69.222021Q4Communication Services-25.69
3320490046305540588ARKQ91-32.862021Q1Healthcare-9.36
6384236349569128686ARKW3722.252022Q4Healthcare5.19
3902626804003825458ARKG32-1.752022Q2Financial Services-0.44
2059833829580478020ARKQ371.732021Technology1.73
4830284743586419631ARKQ12-73.122022Q3Consumer Cyclical-28.19

Total 540 rows

7_file_account_period_sector_perf

Script node 7_file_account_period_sector_perf:

{
  "7_file_account_period_sector_perf": {
    "type": "table_file",
    "desc": "Write yearly/quarterly perf results by sector to CSV file",
    "r": {
      "table": "account_period_sector_twr_cagr"
    },
    "w": {
      "top": {
        "order": "account_id,period,sector"
      },
      "url_template": "{dir_out}/account_period_sector_perf.csv",
      "columns": [
        {
          "csv": {
            "header": "ARK fund",
            "format": "%s"
          },
          "name": "account_id",
          "expression": "r.account_id",
          "type": "string"
        },
        {
          "csv": {
            "header": "Period",
            "format": "%s"
          },
          "name": "period",
          "expression": "r.period",
          "type": "string"
        },
        {
          "csv": {
            "header": "Sector",
            "format": "%s"
          },
          "name": "sector",
          "expression": "r.sector",
          "type": "string"
        },
        {
          "csv": {
            "header": "Time-weighted annualized return %",
            "format": "%.2f"
          },
          "name": "cagr",
          "expression": "r.cagr",
          "type": "float"
        }
      ]
    }
  }
}

Script node 7_file_account_period_sector_perf produces data file:

ARK fundPeriodSectorTime-weighted annualized return %
ARKF2021All-19.12
ARKF2021Communication Services-40.67
ARKF2021Consumer Cyclical-22.28
ARKF2021Consumer Defensive0.00
ARKF2021Financial Services16.53
ARKF2021Healthcare-55.78
ARKF2021Industrials-14.41
ARKF2021Real Estate-44.99
ARKF2021Technology-17.33
ARKF2021Q1All176.50

Total 540 rows

7_file_account_year_perf

Script node 7_file_account_year_perf:

{
  "7_file_account_year_perf": {
    "type": "table_file",
    "desc": "Write yearly perf results for all sectors to CSV file",
    "r": {
      "table": "account_period_sector_twr_cagr"
    },
    "w": {
      "top": {
        "order": "account_id,period"
      },
      "having": "len(w.period) == 4 && w.sector == \"All\"",
      "url_template": "{dir_out}/account_year_perf.csv",
      "columns": [
        {
          "csv": {
            "header": "ARK fund",
            "format": "%s"
          },
          "name": "account_id",
          "expression": "r.account_id",
          "type": "string"
        },
        {
          "csv": {
            "header": "Period",
            "format": "%s"
          },
          "name": "period",
          "expression": "r.period",
          "type": "string"
        },
        {
          "csv": {
            "header": "Sector",
            "format": "%s"
          },
          "name": "sector",
          "expression": "r.sector",
          "type": "string"
        },
        {
          "csv": {
            "header": "Time-weighted annualized return %",
            "format": "%.2f"
          },
          "name": "cagr",
          "expression": "r.cagr",
          "type": "float"
        }
      ]
    }
  }
}

Script node 7_file_account_year_perf produces data file:

ARK fundPeriodSectorTime-weighted annualized return %
ARKF2021All-19.12
ARKF2022All-63.35
ARKG2021All-33.84
ARKG2022All-50.87
ARKK2021All-23.98
ARKK2022All-68.07
ARKQ2021All-1.52
ARKQ2022All-61.45
ARKW2021All-19.49
ARKW2022All-65.28

Total 12 rows