Hello all, Recently I was writing a perl script to get the foreign exchange rates in the below webpage. (Sorry about the Chinese but I only need the numbers.) https://wwwfile.megabank.com.tw/rates/M001/viewF.asp If we take a look at the page source, the table cells are dynamically generated by the following tbody tag. <tbody id="contentTbody"></tbody> I guess the numbers are created from the long long JavaScript eval function at the bottom of the page source. <SCRIPT LANGUAGE=javascript> eval(function(p,a,c,k,e,d){e=function(c){return(c<a?"":e(parseInt(c/a)))+((c=c%a)>35?String.fromCharCode(c+29):c.toString(36))};if.......... </SCRIPT> I just want to access the numbers on this page. It'd be nice if these numbers are from another webpage. My question is, how do I find out where the numbers (or the id "contentTbody") are from? Or is there any JavaScript eval tool that I can see the result of the long JavaScript eval line? Any help will be appreciated. Thank you so much.
http://api.finance.xaviermedia.com/api/latest.xml - xml with latest exchange rates, updated daily. much nicer to work with - http://www.xavierforum.com/xavier-finance-currency-exchange-rate-api-t10979.html for full documentation. i just use a daily cron which does: wget -O /www/latest.xml http://api.finance.xaviermedia.com/api/latest.xml and then parse that with a P.H.P script and store the values i am interested in into my DB.
Thanks, dimitar. However, I need particular rates from the webpage. Can anyone tell me what is the following JavaScript codes, or how do I evaluate/decipher the codes? Thanks.
this actually is: jq = { hover: function (id, start) { var tb = document.getElementById(id); var tr = tb.getElementsByTagName('tr'); for (i = start; i < tr.length; i++) { if (start) { if (i % 2 == 0) { tr[i].className += "tbcolor1"; tr[i].onmouseout = function () { this.className = 'tbcolor1' } } else { tr[i].className += "tbcolor2"; tr[i].onmouseout = function () { this.className = 'tbcolor2' } } }; tr[i].onmouseover = function () { this.className = 'tbcolor3' } } } }; function createTimer(nextRequest) { try { timerID = self.setTimeout('doRefresh()', nextRequest) } catch (e) {} }; function doRefresh() { var xhttp; var ran_number = Math.random() * 4; var url = '../D001/_@V_.asp?random=' + ran_number; try { xhttp = new ActiveXObject("Msxml2.XMLHTTP") } catch (e) { try { xhttp = new ActiveXObject("Microsoft.XMLHTTP") } catch (e) { xhttp = new XMLHttpRequest(); xhttp.timeout = 5000 } }; try { xhttp.open('GET', url, true); xhttp.onreadystatechange = function () { if (xhttp.readyState == 4) { if (xhttp.status == 200) { onLoad(xhttp.responseText, xhttp.status) } } }; xhttp.send(null) } catch (e) {} }; function onLoad(text, status) { var nums1 = String.fromCharCode(49, 56, 48, 48, 48, 48); var nums2 = String.fromCharCode(54, 48, 48, 48, 48, 48); document.getElementById("dataDate").innerHTML = text.split("|")[0]; document.getElementById("dataTime").innerHTML = text.split("|")[1]; var rateArr = text.split("|")[2].split("#"); var tbodyRemove = document.getElementById("contentTbody"); var table = document.getElementById("contentTable"); table.removeChild(tbodyRemove); var tbody = table.appendChild(document.createElement("tbody")); tbody.setAttribute("id", "contentTbody"); for (var i = 0; i < rateArr.length; i++) { var tr = tbody.appendChild(document.createElement("tr")); var td0 = tr.appendChild(document.createElement("td")); td0.className = "con_td td_left"; td0.innerHTML = getKeyValuePairValue(rateArr[i], "col0"); var td1 = tr.appendChild(document.createElement("td")); td1.className = "con_td money_td"; td1.innerHTML = getKeyValuePairValue(rateArr[i], "col1"); var td2 = tr.appendChild(document.createElement("td")); td2.className = "con_td money_td"; td2.innerHTML = getKeyValuePairValue(rateArr[i], "col2"); var td3 = tr.appendChild(document.createElement("td")); td3.className = "con_td money_td"; td3.innerHTML = getKeyValuePairValue(rateArr[i], "col3"); var td4 = tr.appendChild(document.createElement("td")); td4.className = "con_td money_td"; td4.innerHTML = getKeyValuePairValue(rateArr[i], "col4"); var td5 = tr.appendChild(document.createElement("td")); td5.className = "con_td money_td"; td5.innerHTML = " " }; jq.hover('contentTable', 1); var i_timeout = 0; if (!isNaN(text.split("|")[3])) { i_timeout = 1 * text.split("|")[3] } else { i_timeout = parseInt(nums1) }; if (parseInt(i_timeout) != parseInt(nums1)) { var ran_number = 0; do { ran_number = Math.round(Math.random() * parseInt(nums2)) } while (parseInt(ran_number) > parseInt(nums2)) i_timeout = i_timeout + ran_number; createTimer("" + i_timeout) } else { createTimer(parseInt(nums1)) } }; doRefresh(); Code (javascript):
incidentally the data is fetched via ajax from this url: https://wwwfile.megabank.com.tw/rates/D001/_@V_.asp?random=1.4473183106113154 - it will be far easier to parse that directly.
Thank you, dimitar !! I found an unpacker which can convert the JavaScript codes. http://www.strictly-software.com/unpacker But now 'wget' doesn't work for the url. When I tried it in console mode, I got an blank file back. wget -O test.html https://wwwfile.megabank.com.tw/rates/D001/_@V_.asp?random=1.4473183106113154 Do you think it is due to the ActiveX stuff? Thanks a ton for your help!