Skip to content

Top Brokerage Market Share

Assignment

Fetch the data from Ho Chi Minh Stock Exchange about the market share of brokerage in the latest period then represent the ordered column chart for it.

Output:

The output of the assignment

Techstack

Python language with following package:

  • httpx: HTTP client to fetch data from the API of the exchange

  • polars: 2D Dataframe contain the output of the dataset

  • plotnine: is an implementation of a grammar of graphics in Python based on ggplot2

Diagram

---
title: Flow to develop the market share image
---

flowchart LR

  %% component
  fetch[Fetch dataset using API] --> build_image[Construct chart using plotnine] --> save[Save output into targeted folder]

Step

Step 1: Declare the related required package

requirements.txt

Step 2: Install dependencies in vitural environment

python -m venv venv
source venv/Script/active
python -m pip install -r requirements.txt --default-timeout 100

Step 3: Build the script

  • Declare function to fetch data from HOSE, using reverse engineer method.
Function: Get the market share dataset
def fetch_market_share() -> pl.DataFrame:
    url = (
        "https://www.hsx.vn"
        "/Modules/StockMember/Web/Top10SymbolMember"
        "?"
        "pageFieldName1=ReportOption"
        "&pageFieldValue1=1"
        "&pageFieldOperator1=eq"
        "&pageFieldName2=Year1"
        "&pageFieldValue2=2024"
        "&pageFieldOperator2=eq"
        "&pageFieldName3=Year4"
        "&pageFieldValue3=2024"
        "&pageFieldOperator3=eq"
        "&pageFieldName4=Quarter"
        "&pageFieldValue4=3"
        "&pageFieldOperator4=eq"
        "&pageFieldName5=Year2"
        "&pageFieldValue5=2023"
        "&pageFieldOperator5=eq"
        "&pageFieldName6=Month"
        "&pageFieldValue6=1"
        "&pageFieldOperator6=eq"
        "&pageFieldName7=Year3"
        "&pageFieldValue7=2024"
        "&pageFieldOperator7=eq"
        "&pageCriteriaLength=7"
        "&_search=false"
        "&nd=1711300078466"
        "&rows=2147483647"
        "&page=1"
        "&sidx=id"
        "&sord=desc"
    )
    headers = {"Accept": "application/json, text/javascript, */*; q=0.01"}

    # Metadata
    schema = ["id", "security_abbr", "security_name", "market_share", "period"]

    # Get
    records = []
    with httpx.Client(timeout=None) as sess:
        f1st_resp = sess.get(url=url, headers=headers)
        f1st_resp.raise_for_status()
        f1st_resu: dict = f1st_resp.json()
        total_page = int(f1st_resu.get("total"))
        for _pag in range(1, total_page + 1, 1):
            _resp = sess.get(url.replace("page=1", f"page={_pag}"), headers=headers)
            _resu: list[dict] = _resp.json().get("rows")
            _on_cell = [r.get('cell') for r in _resu]
            for cell in _on_cell:
                records.append(cell)

    # Cleaning
    handlers = pl.DataFrame(data=records, schema=schema)
    handlers = handlers.with_columns(
        pl.col("market_share").map_elements(function=lambda x: x.replace(",", ".")).cast(pl.Float64).name.keep()
    )

    return handlers
  • Fetch the dataset

This will returned the class::polars::DataFrame output

Fetch dataset
    # Fetch the component
    MARKET_SHARE = fetch_market_share()
>>> with pl.Config(tbl_rows=-1, tbl_cols=-1, fmt_str_lengths=200):
...     print(MARKET_SHARE)
...
# shape: (10, 5)
# ┌─────┬───────────────┬─────────────────────────────────────────────────────────────────────────────┬──────────────┬─────────────┐
# │ id  ┆ security_abbr ┆ security_name                                                               ┆ market_share ┆ period      │
# │ --- ┆ ---           ┆ ---                                                                         ┆ ---          ┆ ---         │
# │ i64 ┆ str           ┆ str                                                                         ┆ f64          ┆ str         │
# ╞═════╪═══════════════╪═════════════════════════════════════════════════════════════════════════════╪══════════════╪═════════════╡
# │ 74  ┆ VPS           ┆ Công ty Cổ phần Chứng khoán VPS                                             ┆ 19.92        ┆ Quý 03.2023 │
# │ 92  ┆ SSI           ┆ Công ty Cổ phần Chứng khoán SSI                                             ┆ 10.59        ┆ Quý 03.2023 │
# │ 76  ┆ VNDS          ┆ Công ty Cổ phần Chứng khoán VNDIRECT                                        ┆ 7.21         ┆ Quý 03.2023 │
# │ 101 ┆ TCBS          ┆ Công ty Cổ phần Chứng khoán Kỹ Thương                                       ┆ 6.8          ┆ Quý 03.2023 │
# │ 90  ┆ MBS           ┆ Công ty Cổ phần Chứng khoán MB                                              ┆ 5.09         ┆ Quý 03.2023 │
# │ 84  ┆ HSC           ┆ Công ty Cổ phần Chứng khoán TP. Hồ Chí Minh                                 ┆ 5.06         ┆ Quý 03.2023 │
# │ 28  ┆ MAS           ┆ Công ty Cổ phần Chứng khoán Mirae Asset (Việt Nam)                          ┆ 4.71         ┆ Quý 03.2023 │
# │ 37  ┆ Vietcap       ┆ Công ty Cổ phần Chứng khoán Vietcap                                         ┆ 4.0          ┆ Quý 03.2023 │
# │ 48  ┆ KIS           ┆ Công ty Cổ phần Chứng khoán KIS Việt Nam                                    ┆ 3.34         ┆ Quý 03.2023 │
# │ 86  ┆ VCBS          ┆ Công ty TNHH Chứng khoán Ngân hàng Thương mại Cổ phần Ngoại Thương Việt Nam ┆ 3.04         ┆ Quý 03.2023 │
# └─────┴───────────────┴─────────────────────────────────────────────────────────────────────────────┴──────────────┴─────────────┘
  • Get the metadata from the dataset
Get the metadata from dataset
    # Get the metadata

    # The period of the data
    # "Quý XX.XXXX into Quarter XX.XXXX"
    UPDATED_PERIOD: str = MARKET_SHARE.get_column("period").unique().item()
    UPDATED_PERIOD = UPDATED_PERIOD.replace("Quý", "Quarter").strip()
  • Construct Chart component using plotnine
Build chart element
    plot = (
        p9.ggplot(MARKET_SHARE)
        + p9.aes("reorder(security_abbr, market_share)", "market_share", label="market_share")
        + p9.geom_col(size=15, show_legend=True)
        + p9.geom_label(label_size=0.2)
        + p9.scale_y_continuous(limits=(0, 50))
        + p9.coord_flip(expand=True)
        + p9.labs(
            x="Security Participant",
            y="Market Share (%)",
            title=f"Top brokerage market share in {UPDATED_PERIOD}",
            caption=f"Source: Ho Chi Minh Stock Exchange. As of {date.today().strftime('%Y-%m-%d')}"
        )
        + p9.theme_538(base_size=11, base_family="DejaVu Sans")
        + p9.theme(
            plot_title=p9.element_text(weight="bold", ha="left", size=14),
            plot_caption=p9.element_text(ha="left", color="grey"),
        )
    )
  • Storage output
Storage output into file
    # Prepair for output
    os.makedirs(os.path.join("output"), exist_ok=True)

    # Save
    plot.save(
        path=os.path.join("output"),
        filename=f"market_share_at_{UPDATED_PERIOD.lower().replace(' ', '_')}.png",
        format="png",
        width=12,
        height=9,
        units="in",
        dpi=200,
        limitsize=False,
        verbose=True,
    )

Step 4: Run the script

python main.py

will yield output at output folder

Output image

Further reading

//

Full script

main.py
#!/bin/python3

# Global
import os
from datetime import date

# External
import httpx
import polars as pl
import plotnine as p9



def fetch_market_share() -> pl.DataFrame:
    url = (
        "https://www.hsx.vn"
        "/Modules/StockMember/Web/Top10SymbolMember"
        "?"
        "pageFieldName1=ReportOption"
        "&pageFieldValue1=1"
        "&pageFieldOperator1=eq"
        "&pageFieldName2=Year1"
        "&pageFieldValue2=2024"
        "&pageFieldOperator2=eq"
        "&pageFieldName3=Year4"
        "&pageFieldValue3=2024"
        "&pageFieldOperator3=eq"
        "&pageFieldName4=Quarter"
        "&pageFieldValue4=3"
        "&pageFieldOperator4=eq"
        "&pageFieldName5=Year2"
        "&pageFieldValue5=2023"
        "&pageFieldOperator5=eq"
        "&pageFieldName6=Month"
        "&pageFieldValue6=1"
        "&pageFieldOperator6=eq"
        "&pageFieldName7=Year3"
        "&pageFieldValue7=2024"
        "&pageFieldOperator7=eq"
        "&pageCriteriaLength=7"
        "&_search=false"
        "&nd=1711300078466"
        "&rows=2147483647"
        "&page=1"
        "&sidx=id"
        "&sord=desc"
    )
    headers = {"Accept": "application/json, text/javascript, */*; q=0.01"}

    # Metadata
    schema = ["id", "security_abbr", "security_name", "market_share", "period"]

    # Get
    records = []
    with httpx.Client(timeout=None) as sess:
        f1st_resp = sess.get(url=url, headers=headers)
        f1st_resp.raise_for_status()
        f1st_resu: dict = f1st_resp.json()
        total_page = int(f1st_resu.get("total"))
        for _pag in range(1, total_page + 1, 1):
            _resp = sess.get(url.replace("page=1", f"page={_pag}"), headers=headers)
            _resu: list[dict] = _resp.json().get("rows")
            _on_cell = [r.get('cell') for r in _resu]
            for cell in _on_cell:
                records.append(cell)

    # Cleaning
    handlers = pl.DataFrame(data=records, schema=schema)
    handlers = handlers.with_columns(
        pl.col("market_share").map_elements(function=lambda x: x.replace(",", ".")).cast(pl.Float64).name.keep()
    )

    return handlers



if __name__ == "__main__":

    # Fetch the component
    MARKET_SHARE = fetch_market_share()


    # Get the metadata

    # The period of the data
    # "Quý XX.XXXX into Quarter XX.XXXX"
    UPDATED_PERIOD: str = MARKET_SHARE.get_column("period").unique().item()
    UPDATED_PERIOD = UPDATED_PERIOD.replace("Quý", "Quarter").strip()

    plot = (
        p9.ggplot(MARKET_SHARE)
        + p9.aes("reorder(security_abbr, market_share)", "market_share", label="market_share")
        + p9.geom_col(size=15, show_legend=True)
        + p9.geom_label(label_size=0.2)
        + p9.scale_y_continuous(limits=(0, 50))
        + p9.coord_flip(expand=True)
        + p9.labs(
            x="Security Participant",
            y="Market Share (%)",
            title=f"Top brokerage market share in {UPDATED_PERIOD}",
            caption=f"Source: Ho Chi Minh Stock Exchange. As of {date.today().strftime('%Y-%m-%d')}"
        )
        + p9.theme_538(base_size=11, base_family="DejaVu Sans")
        + p9.theme(
            plot_title=p9.element_text(weight="bold", ha="left", size=14),
            plot_caption=p9.element_text(ha="left", color="grey"),
        )
    )


    # Prepair for output
    os.makedirs(os.path.join("output"), exist_ok=True)

    # Save
    plot.save(
        path=os.path.join("output"),
        filename=f"market_share_at_{UPDATED_PERIOD.lower().replace(' ', '_')}.png",
        format="png",
        width=12,
        height=9,
        units="in",
        dpi=200,
        limitsize=False,
        verbose=True,
    )