Top Brokerage Market Share¶
Assignment¶
Fetch the data from Ho Chi Minh Stock Exchange about the market share of brokerage in the latest period then represent the ordered column chart for it.
Output:
Techstack¶
Python language with following package:
-
httpx: HTTP client to fetch data from the API of the exchange
-
polars: 2D Dataframe contain the output of the dataset
-
plotnine: is an implementation of a grammar of graphics in Python based on ggplot2
Diagram¶
---
title: Flow to develop the market share image
---
flowchart LR
%% component
fetch[Fetch dataset using API] --> build_image[Construct chart using plotnine] --> save[Save output into targeted folder]
Step¶
Step 1: Declare the related required package
Step 2: Install dependencies in vitural environment
python -m venv venv
source venv/Script/active
python -m pip install -r requirements.txt --default-timeout 100
Step 3: Build the script
- Declare function to fetch data from HOSE, using reverse engineer method.
Function: Get the market share dataset
def fetch_market_share() -> pl.DataFrame:
url = (
"https://www.hsx.vn"
"/Modules/StockMember/Web/Top10SymbolMember"
"?"
"pageFieldName1=ReportOption"
"&pageFieldValue1=1"
"&pageFieldOperator1=eq"
"&pageFieldName2=Year1"
"&pageFieldValue2=2024"
"&pageFieldOperator2=eq"
"&pageFieldName3=Year4"
"&pageFieldValue3=2024"
"&pageFieldOperator3=eq"
"&pageFieldName4=Quarter"
"&pageFieldValue4=3"
"&pageFieldOperator4=eq"
"&pageFieldName5=Year2"
"&pageFieldValue5=2023"
"&pageFieldOperator5=eq"
"&pageFieldName6=Month"
"&pageFieldValue6=1"
"&pageFieldOperator6=eq"
"&pageFieldName7=Year3"
"&pageFieldValue7=2024"
"&pageFieldOperator7=eq"
"&pageCriteriaLength=7"
"&_search=false"
"&nd=1711300078466"
"&rows=2147483647"
"&page=1"
"&sidx=id"
"&sord=desc"
)
headers = {"Accept": "application/json, text/javascript, */*; q=0.01"}
# Metadata
schema = ["id", "security_abbr", "security_name", "market_share", "period"]
# Get
records = []
with httpx.Client(timeout=None) as sess:
f1st_resp = sess.get(url=url, headers=headers)
f1st_resp.raise_for_status()
f1st_resu: dict = f1st_resp.json()
total_page = int(f1st_resu.get("total"))
for _pag in range(1, total_page + 1, 1):
_resp = sess.get(url.replace("page=1", f"page={_pag}"), headers=headers)
_resu: list[dict] = _resp.json().get("rows")
_on_cell = [r.get('cell') for r in _resu]
for cell in _on_cell:
records.append(cell)
# Cleaning
handlers = pl.DataFrame(data=records, schema=schema)
handlers = handlers.with_columns(
pl.col("market_share").map_elements(function=lambda x: x.replace(",", ".")).cast(pl.Float64).name.keep()
)
return handlers
- Fetch the dataset
This will returned the class::polars::DataFrame
output
>>> with pl.Config(tbl_rows=-1, tbl_cols=-1, fmt_str_lengths=200):
... print(MARKET_SHARE)
...
# shape: (10, 5)
# ┌─────┬───────────────┬─────────────────────────────────────────────────────────────────────────────┬──────────────┬─────────────┐
# │ id ┆ security_abbr ┆ security_name ┆ market_share ┆ period │
# │ --- ┆ --- ┆ --- ┆ --- ┆ --- │
# │ i64 ┆ str ┆ str ┆ f64 ┆ str │
# ╞═════╪═══════════════╪═════════════════════════════════════════════════════════════════════════════╪══════════════╪═════════════╡
# │ 74 ┆ VPS ┆ Công ty Cổ phần Chứng khoán VPS ┆ 19.92 ┆ Quý 03.2023 │
# │ 92 ┆ SSI ┆ Công ty Cổ phần Chứng khoán SSI ┆ 10.59 ┆ Quý 03.2023 │
# │ 76 ┆ VNDS ┆ Công ty Cổ phần Chứng khoán VNDIRECT ┆ 7.21 ┆ Quý 03.2023 │
# │ 101 ┆ TCBS ┆ Công ty Cổ phần Chứng khoán Kỹ Thương ┆ 6.8 ┆ Quý 03.2023 │
# │ 90 ┆ MBS ┆ Công ty Cổ phần Chứng khoán MB ┆ 5.09 ┆ Quý 03.2023 │
# │ 84 ┆ HSC ┆ Công ty Cổ phần Chứng khoán TP. Hồ Chí Minh ┆ 5.06 ┆ Quý 03.2023 │
# │ 28 ┆ MAS ┆ Công ty Cổ phần Chứng khoán Mirae Asset (Việt Nam) ┆ 4.71 ┆ Quý 03.2023 │
# │ 37 ┆ Vietcap ┆ Công ty Cổ phần Chứng khoán Vietcap ┆ 4.0 ┆ Quý 03.2023 │
# │ 48 ┆ KIS ┆ Công ty Cổ phần Chứng khoán KIS Việt Nam ┆ 3.34 ┆ Quý 03.2023 │
# │ 86 ┆ VCBS ┆ Công ty TNHH Chứng khoán Ngân hàng Thương mại Cổ phần Ngoại Thương Việt Nam ┆ 3.04 ┆ Quý 03.2023 │
# └─────┴───────────────┴─────────────────────────────────────────────────────────────────────────────┴──────────────┴─────────────┘
- Get the metadata from the dataset
Get the metadata from dataset
# Get the metadata
# The period of the data
# "Quý XX.XXXX into Quarter XX.XXXX"
UPDATED_PERIOD: str = MARKET_SHARE.get_column("period").unique().item()
UPDATED_PERIOD = UPDATED_PERIOD.replace("Quý", "Quarter").strip()
- Construct Chart component using
plotnine
Build chart element
plot = (
p9.ggplot(MARKET_SHARE)
+ p9.aes("reorder(security_abbr, market_share)", "market_share", label="market_share")
+ p9.geom_col(size=15, show_legend=True)
+ p9.geom_label(label_size=0.2)
+ p9.scale_y_continuous(limits=(0, 50))
+ p9.coord_flip(expand=True)
+ p9.labs(
x="Security Participant",
y="Market Share (%)",
title=f"Top brokerage market share in {UPDATED_PERIOD}",
caption=f"Source: Ho Chi Minh Stock Exchange. As of {date.today().strftime('%Y-%m-%d')}"
)
+ p9.theme_538(base_size=11, base_family="DejaVu Sans")
+ p9.theme(
plot_title=p9.element_text(weight="bold", ha="left", size=14),
plot_caption=p9.element_text(ha="left", color="grey"),
)
)
- Storage output
Storage output into file
# Prepair for output
os.makedirs(os.path.join("output"), exist_ok=True)
# Save
plot.save(
path=os.path.join("output"),
filename=f"market_share_at_{UPDATED_PERIOD.lower().replace(' ', '_')}.png",
format="png",
width=12,
height=9,
units="in",
dpi=200,
limitsize=False,
verbose=True,
)
Step 4: Run the script
will yield output at output
folder
Further reading¶
//
Full script¶
main.py
#!/bin/python3
# Global
import os
from datetime import date
# External
import httpx
import polars as pl
import plotnine as p9
def fetch_market_share() -> pl.DataFrame:
url = (
"https://www.hsx.vn"
"/Modules/StockMember/Web/Top10SymbolMember"
"?"
"pageFieldName1=ReportOption"
"&pageFieldValue1=1"
"&pageFieldOperator1=eq"
"&pageFieldName2=Year1"
"&pageFieldValue2=2024"
"&pageFieldOperator2=eq"
"&pageFieldName3=Year4"
"&pageFieldValue3=2024"
"&pageFieldOperator3=eq"
"&pageFieldName4=Quarter"
"&pageFieldValue4=3"
"&pageFieldOperator4=eq"
"&pageFieldName5=Year2"
"&pageFieldValue5=2023"
"&pageFieldOperator5=eq"
"&pageFieldName6=Month"
"&pageFieldValue6=1"
"&pageFieldOperator6=eq"
"&pageFieldName7=Year3"
"&pageFieldValue7=2024"
"&pageFieldOperator7=eq"
"&pageCriteriaLength=7"
"&_search=false"
"&nd=1711300078466"
"&rows=2147483647"
"&page=1"
"&sidx=id"
"&sord=desc"
)
headers = {"Accept": "application/json, text/javascript, */*; q=0.01"}
# Metadata
schema = ["id", "security_abbr", "security_name", "market_share", "period"]
# Get
records = []
with httpx.Client(timeout=None) as sess:
f1st_resp = sess.get(url=url, headers=headers)
f1st_resp.raise_for_status()
f1st_resu: dict = f1st_resp.json()
total_page = int(f1st_resu.get("total"))
for _pag in range(1, total_page + 1, 1):
_resp = sess.get(url.replace("page=1", f"page={_pag}"), headers=headers)
_resu: list[dict] = _resp.json().get("rows")
_on_cell = [r.get('cell') for r in _resu]
for cell in _on_cell:
records.append(cell)
# Cleaning
handlers = pl.DataFrame(data=records, schema=schema)
handlers = handlers.with_columns(
pl.col("market_share").map_elements(function=lambda x: x.replace(",", ".")).cast(pl.Float64).name.keep()
)
return handlers
if __name__ == "__main__":
# Fetch the component
MARKET_SHARE = fetch_market_share()
# Get the metadata
# The period of the data
# "Quý XX.XXXX into Quarter XX.XXXX"
UPDATED_PERIOD: str = MARKET_SHARE.get_column("period").unique().item()
UPDATED_PERIOD = UPDATED_PERIOD.replace("Quý", "Quarter").strip()
plot = (
p9.ggplot(MARKET_SHARE)
+ p9.aes("reorder(security_abbr, market_share)", "market_share", label="market_share")
+ p9.geom_col(size=15, show_legend=True)
+ p9.geom_label(label_size=0.2)
+ p9.scale_y_continuous(limits=(0, 50))
+ p9.coord_flip(expand=True)
+ p9.labs(
x="Security Participant",
y="Market Share (%)",
title=f"Top brokerage market share in {UPDATED_PERIOD}",
caption=f"Source: Ho Chi Minh Stock Exchange. As of {date.today().strftime('%Y-%m-%d')}"
)
+ p9.theme_538(base_size=11, base_family="DejaVu Sans")
+ p9.theme(
plot_title=p9.element_text(weight="bold", ha="left", size=14),
plot_caption=p9.element_text(ha="left", color="grey"),
)
)
# Prepair for output
os.makedirs(os.path.join("output"), exist_ok=True)
# Save
plot.save(
path=os.path.join("output"),
filename=f"market_share_at_{UPDATED_PERIOD.lower().replace(' ', '_')}.png",
format="png",
width=12,
height=9,
units="in",
dpi=200,
limitsize=False,
verbose=True,
)