Wednesday, 8 September 2021

Data-Extraction from website

Extraction of daily market data refreshed after a time-interval of 5 minutes. I developed the program given below as part of a large financial software. I try to demonstrate it in a meaningful way.

There is a website http://www.psx.com.pk which is portal of a stock exchange. The stock exchange has a page which display OHLC data and company information formatted in html table. The java program use jsoup api to (a) connect to page (b) read the page (c) fetch the OHLC data values and exchange current status in the following format:-

Symbol  | Open | High | Low | Close | Volume

The exchange does not provide "symbol-code". Symbol values are determined through query from data table containing company-names and company-codes.  The following is screenshot of the web-page:-



The data extracted from the above web page is processed and displayed as shown in the screenshot below:-



The <download> button on the above screenshot triggers execution of the following code. 

  1 /*
  2  * To change this license header, choose License Headers in Project Properties.
  3  * To change this template file, choose Tools | Templates
  4  * and open the template in the editor.
  5  */
  6 package cmnew.downloader;
  7 
  8 import java.util.ArrayList;
  9 import java.util.Iterator;
 10 import java.util.Vector;
 11 import net.coolmarch.cmnew.common.DailyData;
 12 import net.coolmarch.cmnew.common.GeneralDB;
 13 import org.jsoup.Jsoup;
 14 import org.jsoup.nodes.Document;
 15 import org.jsoup.nodes.Element;
 16 import org.jsoup.select.Elements;
 17 
 18 /**
 19  *
 20  * @author salam
 21  */
 22 public class CurrentMarketState {
 23 
 24     ArrayList<DailyData> ddlist = new ArrayList<>();
 25 
 26     public CurrentMarketState() {
 27 
 28     }
 29 
 30     public CurrentMarketState(String status) {
 31         System.out.println("......thisfile...");
 32         String line = "";
 33         GeneralDB gdb = new GeneralDB();
 34         String query = "delete from current_data";
 35         String msg = gdb.execute(query);
 36         System.out.println(msg);
 37 
 38         try {
 39             Document doc = Jsoup.connect("https://www.psx.com.pk/market-summary/").get();
 40             String title = doc.title();
 41 
 42             Elements tabs = doc.select("table");
 43             for (Element table : tabs) {
 44                 if (table == tabs.first()) {
 45                     ;//do nothing
 46                 } else {
 47                     Elements rows = table.select("tr");
 48                     int c = 0;
 49                     for (int i = 1; i < rows.size(); i++) { //first row is the col names so skip it.
 50                         String l = "";
 51                         Element row = rows.get(i);
 52                         Elements cols = row.select("td");
 53                         String s_name = cols.get(0).wholeText();
 54                         if (s_name.startsWith("SCRIP") || s_name.contains("DEFAULTER")
 55                                 || s_name.contains("Indus") || s_name.contains("Millat")) {
 56                         } else {
 57                             String s_ldcp = cols.get(1).wholeText();
 58                             String s_open = cols.get(2).wholeText();
 59                             String s_high = cols.get(3).wholeText();
 60                             String s_low = cols.get(4).wholeText();
 61                             String s_close = cols.get(5).wholeText();
 62                             String s_change = cols.get(6).wholeText();
 63                             String s_volume = cols.get(7).wholeText();
 64 
 65                             s_name = s_name.replaceAll(",", "");
 66                             s_open = s_open.replaceAll(",", "");
 67                             s_high = s_high.replaceAll(",", "");
 68                             s_low = s_low.replaceAll(",", "");
 69                             s_close = s_close.replaceAll(",", "");
 70 //                            s_change = s_change.replaceAll(",", "");
 71                             s_volume = s_volume.replaceAll(",", "");
 72                             
 73                             //because the names in the table have slight difference but 
 74                             //usually the first and second word of the company are same.
 75                             String sq = "";
 76                             String[] st = s_name.split(" ");
 77                             if (st.length > 2) {
 78                                 sq = st[0] + " " + st[1];
 79                             } else {
 80                                 sq = s_name;
 81                             }
 82                             
 83                             
 84 
 85                             String q = "select cm_symbol from cm_companies "
 86                                     + "where cm_name like '" + sq + "%'";
 87                             String symbol = new GeneralDB().getSingleColumnData(q);
 88                             if (symbol == null || symbol.compareTo("") == 0) {
 89                                 symbol = s_name;
 90                             }
 91 
 92                             query = "insert into current_data(scrip, pr_open, pr_high,"
 93                                     + "pr_low, pr_close, pr_volume) values("
 94                                     + "'" + symbol + "',"
 95                                     + "" + s_open + ","
 96                                     + "" + s_high + ","
 97                                     + "" + s_low + ","
 98                                     + "" + s_close + ","
 99                                     + "" + s_volume + ""
100                                     + ")";
101                             System.out.println(query);
102                             System.out.println(gdb.execute(query));
103 //
104 
105                         }
106                     }
107 
108                 }
109 
110             }
111 
112         } catch (Exception e) {
113             System.out.println(e.getMessage());
114         }
115     }
116 
117     public ArrayList<DailyData> getDailySavedData() {
118         ArrayList<DailyData> dlist = new ArrayList<>();
119         String query = "select scrip, pr_open, pr_high, pr_low, pr_close, "
120                 + "pr_volume, (pr_volume*pr_close) as pr_mcap, "
121                 + "(pr_close-pr_open) as ch from current_data";
122         ArrayList al = new GeneralDB().searchRecord(query);
123         Iterator i = al.iterator();
124         while (i.hasNext()) {
125             Vector v = (Vector) i.next();
126             String symbol = (String) v.get(0);
127             String str_open = (String) v.get(1);
128             String str_high = (String) v.get(2);
129             String str_low = (String) v.get(3);
130             String str_close = (String) v.get(4);
131             String str_volume = (String) v.get(5);
132             String str_mcap = (String) v.get(6);
133             String str_ch = (String) v.get(7);
134             
135            
136 
137             DailyData dd = new DailyData();
138             dd.setSymbol(symbol);
139             dd.setOpen(Double.parseDouble(str_open));
140             dd.setHigh(Double.parseDouble(str_high));
141             dd.setLow(Double.parseDouble(str_low));
142             dd.setClose(Double.parseDouble(str_close));
143             dd.setVolume(Double.parseDouble(str_volume));
144             dd.setChange(Double.parseDouble(str_ch));
145             dd.setMcap(Double.parseDouble(str_mcap));
146             dlist.add(dd);
147             
148             System.out.println(symbol+" "+str_close+"  "+str_ch);
149         }
150 
151         return dlist;
152     }
153 
154     public ArrayList<DailyData> getDailyData() {
155         return ddlist;
156     }
157 
158     public static void main(String[] args) {
159         new CurrentMarketState("");
160     }
161 
162 }
163 

No comments:

Post a Comment

File: ESL4U

  m