1. Advertising
    y u no do it?

    Advertising (learn more)

    Advertise virtually anything here, with CPM banner ads, CPM email ads and CPC contextual links. You can target relevant areas of the site and show ads based on geographical location of the user if you wish.

    Starts at just $1 per CPM or $0.10 per CPC.

Free : asynchronous walker for deep recursive objects, with callbacks.

Discussion in 'JavaScript' started by seductiveapps.com, Nov 21, 2014.

  1. #1
    See http://jsfiddle.net/or23aft0/12/

    
    <html>
        <head>
            <title>WalkObject - an Asynchronous recursive Object walker</title>
            <script type="text/javascript" src="https://code.jquery.com/jquery-2.1.1.js"></script>
        </head>
        <body>
            <div id="log">
            </div>
        </body> 
    </html>
    
    Code (markup):
    
    var sa = { m : {
        walkObject : function (cmd) {
            /*
                walkObject.*() is COPYLEFTED - PERPETUALLY FREE FOR ALL TYPES OF USAGE
                DISCLAIMER : NO WARRANTIES EXPRESSED OR IMPLIED, USE ONLY AT YOUR OWN RISK
                (c) and (r) 2014-Nov by Rene Veerman <info@seductiveapps.com>
                this code has been tested on large arrays (>2MB in size, <100MB in size)
            */
            cmd.scanPointer = cmd.val;
            if (!cmd.origin) {
                cmd.origin = '[unknown origin]';
            }
            if (!cmd.levelsAtOnce) {
                cmd.levelsAtOnce = 1;
            }
        
            setTimeout (function () {
                sa.m.walkObject_scan (cmd, sa.m.walkObject_processList);
            }, 50);
        },
    
        walkObject_callbackScan : function (pvCmd) {
            if (cmd.progressbarHTMLid) {
                var el = document.getElementById(cmd.progressbarHTMLid);
                sa.pbar.setPercentage (el, cmd.scanIdx, cmd.scanResults.length);
            }
        },
    
        walkObject_scan : function (cmd, callback) {
            if (!cmd.scanResults) {
                cmd.scanResults = [{level:1, path:'', d:cmd.scanPointer}];
                cmd.scanIdx = 0;
                cmd.scanCount = 0;
                cmd.lastPause = 0;
                cmd.scanCallback = callback;
            };
    
            sa.m.walkObject_scanItem (cmd);
        
            if (typeof cmd.callbackScan=='function') {
                cmd.callbackScan (cmd);
            };
    
            if (cmd.scanIdx==cmd.scanResults.length) {
                if (typeof cmd.scanCallback=='function') {
                    setTimeout (function () {
                        cmd.scanCallback (cmd);
                    }, 300);
                }
                return true; // scanning done!
            }
        
            var pauseFactor = cmd.scanIdx;
            if (pauseFactor > cmd.lastPause + 50) {
                sa.m.log (1211, 'sa.m.walkObject_scan(): pausing for 500 milliseconds');
                setTimeout (function () {
                    cmd.lastPause = pauseFactor;
                    sa.m.walkObject_scan(cmd);
                }, 500);
                return false;
            } else {
                return sa.m.walkObject_scan(cmd);
            };
        },
    
    
        walkObject_scanItem : function (cmd, path) {
            var it = cmd.scanResults[cmd.scanIdx];
        
            var tit = typeof it.d;
            if (tit=='object') {
                if (!it.keys) {
                    it.keys = Object.keys (it.d);
                    it.keys.reverse();
                    it.keyIdx = 0;
                }
            }
            if (it.keys) {
                if (it.keyIdx<it.keys.length) {
                    var doUntil = it.keyIdx+20;
                    while (it.keyIdx<doUntil && it.keyIdx<it.keys.length) {
                        var r = sa.m.walkObject_scanKey (cmd, it.path+'/'+it.keys[it.keyIdx]);
                        it.keyIdx++;
                        cmd.scanCount++;
                    
                        var pauseFactor = cmd.scanCount / 43;
                        if (pauseFactor > cmd.lastPause + 1) {
                            cmd.lastPause = pauseFactor;
                            break;
                        }
                    }
                };
                if (it.keyIdx===it.keys.length) {
                    cmd.scanIdx++;
                }
            } else {
                cmd.scanIdx++;
                cmd.scanCount++;
            }
        },
    
        walkObject_scanKey : function (cmd, path) {
            var
            it = cmd.scanResults[cmd.scanIdx],
            val = it.d[it.keys[it.keyIdx]];
        
            cmd.scanResults.splice(cmd.scanIdx+1, 0, {level:it.level+1, path:path, d:val});
        },
    
        walkObject_processList : function (cmd) {
            if (!cmd.processIdx) {
                cmd.processIdx = 1;
                cmd.lastPause = 0;
            };
        
            //jQuery('#'+cmd.progressbarHTMLid).fadeOut (sa.hms.globals.hideProgressbarSpeed).slideUp(sa.hms.globals.hideProgressbarSpeed);
        
            if (cmd.processIdx==cmd.scanResults.length) {
                if (typeof cmd.callbackProcessDone=='function') {
                    cmd.callbackProcessDone (cmd);
                }
                return true; // task completed!!
            }        
        
            sa.m.walkObject_processItem (cmd);
        
            if (typeof cmd.callbackProcessUpdate=='function') {
                cmd.callbackProcessUpdate (cmd);
            }
        
            var pauseFactor = cmd.processIdx;
            if (pauseFactor > cmd.lastPause + 1) {
                setTimeout (function () {
                    cmd.lastPause = pauseFactor;
                    sa.m.walkObject_processList (cmd);
                },200);
                return false;
            
            } else {
                return sa.m.walkObject_processList (cmd);
            }
        },
    
        walkObject_processItem : function (cmd) {
            var it = cmd.scanResults[cmd.processIdx];
            sa.m.walkObject_processKeyValuePair (cmd, it);
            cmd.processIdx++;
        },
    
        walkObject_processKeyValuePair : function (cmd, item ) {
            sa.m.walkObject_processKeyOrValue (cmd, item, 'key');
            sa.m.walkObject_processKeyOrValue (cmd, item, 'data');
        },
    
        walkObject_processKeyOrValue : function (cmd, item, type) {
            switch (type) {
                case 'key':
                    if (typeof cmd.callbackKey==='function') {
                        cmd.callbackKey (cmd, item);
                    };
                    break;
                case 'data':
                    var
                    data = item.d;
    
                    if (typeof data!=='object') {
                        if (typeof cmd.callbackValue==='function') {
                            cmd.callbackValue(cmd, item);
                        }
                    }
                    break;
            };
        },
        log : function (level, msg) {
            console.log (msg, arguments);
            jQuery('#log')[0].innerHTML += msg + '<br/>';
        }
    }};
    
    /* USAGE - TEST CODE */
    
    var test = {
        a : 'b',
        c : 'd',
        e : {
            f : 'g',
            h : 'i',
            j : 'k'
        }
    };
    
    sa.m.walkObject ({
        val : test, // the recursive object to walk
        progressbarHTMLid : 'someHTMLid', // see walkObject_callbackScan()
        callbackScan : function (cmd) {
            // gets called for each key found during scanning
            sa.m.log (1, 'sa.m.walkObject.callbackScan found '+cmd.scanCount+' items', cmd);
        
            // sa.m.log() can be replaced with console.log()
        },
        callbackProcessUpdate : function (cmd) {
            // gets called periodically during processing (not for every item)
            sa.m.log (1, 'sa.m.walkObject.callbackBuildUpdate', cmd);
        },
        callbackKey : function (cmd, item) {
            // called for each key that is processed.
            sa.m.log (1, 'sa.m.walkObject.callbackKey : '+item.path, cmd, item);
        },
        callbackValue : function (cmd, item) {
            // called for each value that is processed.
            sa.m.log (1, 'sa.m.walkObject.callbackValue : '+item.path, cmd, item);
        },
        callbackProcessDone : function (cmd) {
            // called when everything's done
            sa.m.log (1, 'sa.m.walkObject.callbackBuildDone', cmd);
        }
    });
    Code (markup):

     
    Last edited: Nov 21, 2014
    seductiveapps.com, Nov 21, 2014 IP
  2. PoPSiCLe

    PoPSiCLe Illustrious Member

    Messages:
    4,623
    Likes Received:
    725
    Best Answers:
    152
    Trophy Points:
    470
    #2
    Ran the fiddle - a little curious, isn't the log/returnValue callbackValue meant to represent the actual value in the key/value-pair? Right now it just repeats the key-value from the line before?
     
    PoPSiCLe, Nov 22, 2014 IP
  3. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #3
    You can do that yourself, by listing item.d in the sa.m.log() call for callbackValue

    There's a difference between getting a hit for a key (item.d === key_name) and a value (item.d === value)
     
    seductiveapps.com, Nov 22, 2014 IP
  4. PoPSiCLe

    PoPSiCLe Illustrious Member

    Messages:
    4,623
    Likes Received:
    725
    Best Answers:
    152
    Trophy Points:
    470
    #4
    Ah, right. Again, you need to present the example a little better (documentation) - if I have to read through the code to find out how to use it (the actual code, not comments) then it's probably not gonna be used - as for the comment on the callbackValue, it says // called for each value that is processed. - which it isn't, because the item.path isn't the correct variable to use. This creates confusion, and will probably lead to less usage.
    As for the counter for keys, that doesn't really seem to be working as it should either - although I might misunderstand something about the "key"-variable here - adding a few entries to the array, it counts up to 24 - which is the total amount of key/value-pairs (well, not really, since it doesn't differentiate between key/value-pairs and key/array/key/value-pairs) - not so helpful (well, sure, it can be, but since it clearly states in the comments that it counts 'keys', not values, a bit misleading). Otherwise it seems to be working okay. (I'm not sure about the callbackProcessUpdate / callbackBuildUpdate-returnvalue though - what's that for?)
     
    PoPSiCLe, Nov 22, 2014 IP
  5. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #5
    see also https://forums.digitalpoint.com/thr...objects-arrays-asynchronously-or-not.2739024/

    the example is obviously at the bottom of the javascript..

    and i'm not documenting this anymore than i already have, there's no need for that imo.

    those return value statements in some of the functions can now be deleted, they're remnants of the original object these functions came out of.
     
    Last edited by a moderator: Nov 23, 2014
    seductiveapps.com, Nov 22, 2014 IP
  6. deathshadow

    deathshadow Acclaimed Member

    Messages:
    9,732
    Likes Received:
    1,998
    Best Answers:
    253
    Trophy Points:
    515
    #6
    This might be a silly question, but what is this even supposed to do? I don't get what the point of this code is, much less the application... which after some three and a half decades of writing code is a bit of a surprise for me. I was thinking it was just an object walker, but I fail to see how/why/what all that code is even for...

    ... though I think that might be that you're hand-tracking and passing on the stack while not leveraging recursive calls... in which case it's easily two or three times the code needed to do what it's doing. What's with the timeouts for nothing?

    I might take a stab at this -- it's just supposed to be a object walker with processing callbacks, right? Also love how people seem to be throwing the words "synchronous" and "asynchronous" at things that have NOTHING to do with either or applications that need neither...

    Though, is that to release execution because you might be throwing objects at it that are so massive they have no blasted business being processed using JavaScript in the first place? THEN ASYNC might make sense, except it would make more sense not to be doing **** in JS that has no business being done client-side.

    ... but from what I've seen of your other scripting, that does seem to be what you're trying to do with all this code bloat.
     
    Last edited: Dec 1, 2014
    deathshadow, Dec 1, 2014 IP
  7. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #7
    questions are never silly in my opinion, but talking like you know it all on subjects that you avoid like the plague, IS.

    both of the routines I posted are for dealing with > 1MB of JSON data in javascript. I believe javascript clientside processing of data certainly has a place in today's world. And I leave you all the choice of doing that or not, unlike you deathshadow who insists the whole world code as he does.

    the timeouts are very necessary to prevent the browser from pausing execution for a "page is not responding" dialogue and OK from the end-user to continue processing.

    recursive processing of such data IS done in several spots in both routines, but has to receive plenty of setTimeout()s to prevent that "page is not responding" dialogue from appearing. And I also found when I tried that simple recursive processing of yours, that you run into the stack size limitation fairly quickly.
     
    seductiveapps.com, Dec 2, 2014 IP
  8. PoPSiCLe

    PoPSiCLe Illustrious Member

    Messages:
    4,623
    Likes Received:
    725
    Best Answers:
    152
    Trophy Points:
    470
    #8
    Just because I'm curious - what in the world would cause you to fetch over 1MB of JSON data? Real world examples would be appreciated. I can't really see any real world application for it.
     
    PoPSiCLe, Dec 2, 2014 IP
  9. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #9
    I have a photo collection for my site, and i'm filling in proper descriptive filenames for all pictures that are not female model shots or fantasy art pictures (those remain as numbered files). For about 5 thousand pictures that have descriptive filenames, the JSON of just the filenames is at 1.6mb and growing (about 1400 pictures still to rename).
    And I do need the filenames at the javascript end, to let people search on description the photocollection for background images to use (or to download the picture).

    Then there's my servicelog, which in developer view transmits quite a lot of details per request.

    I can imagine that the browser in the future will do "infrequent" computations on large JSON datasets. Why tie up the server with complex JSON db queries if only 1 person wants to view some specific computation result? The result can even be sent back to be cached at the server for consecutive views of said computation. I'm thinking from a fiber-hosted single-server at home business perspective.
     
    seductiveapps.com, Dec 2, 2014 IP
  10. PoPSiCLe

    PoPSiCLe Illustrious Member

    Messages:
    4,623
    Likes Received:
    725
    Best Answers:
    152
    Trophy Points:
    470
    #10
    Uhm. Why push all that via JSON, when you could just as well do all that server side, and just update specifics? Would greatly reduce the cost of the JSON parsing? I do understand that you're trying to build the future, but that doesn't mean that avoiding well tested tech is a good idea.
     
    PoPSiCLe, Dec 2, 2014 IP
  11. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #11
    eh, i already answered that question didnt i?
     
    seductiveapps.com, Dec 2, 2014 IP
  12. ketting00

    ketting00 Well-Known Member

    Messages:
    772
    Likes Received:
    27
    Best Answers:
    3
    Trophy Points:
    128
    #12
    Could it be used to pre-load audio and video playlist. I always provide links to those stuffs in a JSON format and load them one by one on click or when the play ended.
    I'm thinking about preload them with the worker, but I'm concern about battery life of a mobile device.

    It's interesting you're calling a function this way:
    
    sa.m.walkObject ({
        val : test, // the recursive object to walk
        progressbarHTMLid : 'someHTMLid', // see walkObject_callbackScan()
        BLAH BLAH BLAH...
    });
    
    Code (markup):
    I never see it before.
     
    ketting00, Dec 2, 2014 IP
  13. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #13
    Yep, that's certainly an option.

    I get it from the jQuery.com framework, and other frameworks, who've been using it for years.
    It's pretty standard these days. Gives people the ability to name their parameters (and multi-layer their params), which saves lookups of "what the hell do these params do?'...
     
    Last edited: Dec 2, 2014
    seductiveapps.com, Dec 2, 2014 IP
  14. deathshadow

    deathshadow Acclaimed Member

    Messages:
    9,732
    Likes Received:
    1,998
    Best Answers:
    253
    Trophy Points:
    515
    #14
    Failing to take into the account the limitations of the medium is far more silly, and that seems to be what you are doing. What you are trying to do is just as flawed as what killed XHTML 1.1+ and XML applications.

    Which is the point I'd put a bullet in the developer before EVER allowing that much bloat to be deployed. A meg of JSON? When's your planned deployment date, 2064?

    Thankfully I'll probably be pushing up daisies before that's even viable on a real website; what with the impending bandwidth crunch, increase in the number of places with bandwidth caps and overage charges (ask our friends in Canada and Australia about that one), that there are places where 56k dialup is a good day like Coos county NH and large swaths of Utah and the Dakota's...

    Which is really our thinking two different worlds, since you're basically talking 100 times or more the amount of DATA I would EVER allow to be sent via JSON as one package for at LEAST the next twenty years. Not unless you want websites so painfully and agonizingly slow that it not only drives users away, it would likely kill the hosting it's on as well! In fact it would probably use MORE of the hosts capabilities than processing it server side since things like SQL indexes are designed to reduce the amount of data read from the REAL bottleneck in a modern system, the hard drive. Say hello to 100% IOWAIT.

    But again, I'm not rocking a 25mbps connection so I can visit websites that are slower and less useful than they were 20 years ago on 33.6k dialup.

    Of course @ketting00 brings up the best point of all -- mobile battery life? With what you are talking about, the question would be "what battery life?"

    Laughably most such JSON bull, particularly when mated to AJAX seems designed as "let's do EVERYTHING on the client" seems carefully crafted to screw over anything running on a battery... and for what, some sort of "pageloads are evil" paranoid bull?
     
    Last edited: Dec 2, 2014
    deathshadow, Dec 2, 2014 IP
  15. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #15
    deathshadow, i dont give a rats ass about your bs rants. i really dont. in fact, you're so stupidly annoying i'm gonna put you on forum-wide ignore dude :p good luck with your anger issues rofl :)
     
    seductiveapps.com, Dec 3, 2014 IP
  16. PoPSiCLe

    PoPSiCLe Illustrious Member

    Messages:
    4,623
    Likes Received:
    725
    Best Answers:
    152
    Trophy Points:
    470
    #16
    @seductiveapps.com you seem to be adamant to avoid any critique of your code - regardless of how many or how specific it gets. Numerous users on this forum (and, I would imagine, others) have pointed out repeatedly that most of what you're doing could be achieved by other means, often in a fraction of the time needed to execute the enormous amounts of javascript needed to run your code. I don't understand why you're not at all open to see other's way of thinking. And before you say "right back at ya" - we're open to it, but not when the implementation doesn't provide any extra value over existing methods - and basically bases its usefullness on a vague future speed improvement (it be javascript renderers, bandwidth, or other means of improving speed).
    By all means - there are breakthroughs being made every week, or month, pertaining to better performance, but there's also a cost-perspective which you seem to fail to consider. Have a look at the most expensive computers you can get today (for the private market), especially laptops. Do they blast the past generations out of the water when it comes to raw processing power? No, they don't - the improvement is in size, screen-real-estate, weight, and wireless networking capabilities. The speed of the latest version of Intel's laptop-CPUs are lower than the previous generation, but they consume less power, thus improving battery life.
    The same goes for bandwidth and Internet-speeds. I live in Norway, and even though most of the people here have broadband, the average speed of any private broadband connection is approx 6-10Mb/s. Even the countries with the best private broadband connections have less than 50% of the country wired up with speeds faster than 20Mb/s. The US (one of the world's most connected countries) have a way lower average speed than that (about 2Mb/s, IIRC).
    The point isn't that we want to see you fail, but maybe you should consider other options - why create something that isn't gonna be practically useful for the next 20 years? You need to remember that the cost of hooking everyone up with fiber, for instance, is INSANE - and that's even WITH the price-drop given that fiber gets to be the new ADSL. For Norway, for instace, they've estimated that just getting the 4-5 major cities up to fiber-standards, that means providing fiber to more or less every household, changing phonecables and other issues, will cost about 4 BILLION dollars. 4 BILLION! It's pretty self-explanatory that this is not a real priority anytime soon.
     
    PoPSiCLe, Dec 3, 2014 IP
  17. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #17
    Ok, this is politely worded so I'll be polite as well.

    First of all, only my own home needs fiber (and a server with SSD and a non-antique CPU ;), the rest of the world is fine on DSL speeds you indicated exist today ;)

    Second, I already gave you some usecases in use today where my JSON walker and copyer are in use and in a useful way.

    So this is not code for use in 20 years, both routines are useful in today's world.
     
    seductiveapps.com, Dec 3, 2014 IP
  18. deathshadow

    deathshadow Acclaimed Member

    Messages:
    9,732
    Likes Received:
    1,998
    Best Answers:
    253
    Trophy Points:
    515
    #18
    I was gonna let this go, but...

    ... and I was going to initially go "WHERE?!?", but apparently I missed this post:

    ... and it occurred to me... 1.6 megabytes in ~5 thousand records is a batshit insane amount of data to send client side; but is BUPKIS for a properly maintained SQL engine to handle -- why? INDEXES.

    What you described should be a non-issue for a properly structured relational database with proper indexes to handle for little to no real server load. 6400 records? Call me when you break a million. If searches are taking a while, set up a word index table or add a proper search extension like sphinxDB. After all, it works just FINE for forums.

    What's it take to brute-force that in your little client-side proggy while doing a text search? 2 minutes? After all, you'd be walking the entire thing AND doing regex with no indexes. (you would be doing a regex on all those descriptions, right?)

    Why would you be using JSON server side? Why would 1 specific result query count enough in a real database engine to be so much of an impact you'd screw the server AND the browser sending all this stuff client side?

    It just seems like you're creating solutions to problems that don't exist -- almost as if you don't actually know how to use a database. Are you intentionally just avoiding SQL engines server side for some strange reason? You can't actually think this grossly inefficient transmission of all the data in a grossly inefficeint file format and then brute-force walking it in a language that has grossly inefficient variable typing (thanks to the lack of strict typecasting) is going to be magically any better... but that seems to be your train of thought.

    Hell, even if I WERE to do something like this client side, I'd probably be looking at dumping your data into something like indexedDB or implementing a real database engine with indexes instead of screwing around with trying to process what is for all intents and purposes a grossly inefficient transmission system. (I'm still pissed that web sql is defunct because some freetards got their panties in a wad over it using sqLite) -- at least then you'd have, well... indexes. The only purpose your walker might serve then is to shove that data on first load into it -- at which point a better data transmission method than JSON might be in order.

    I mean, if you insist on client side processing, have you looked at the Indexed Database API at all?

    http://www.w3.org/TR/IndexedDB/
    https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API/Using_IndexedDB

    Setting aside the overhead of sending that massive amount of data, it would STILL be way more efficient at doing what you described as an application since again, INDEXES. You could also try to keep it as localstorage and if you had server-side timestamps only send updates to return visits instead of the entire data set. (which can happen with cache since again, cache is NOT a bottomless well)

    Have you ever considered something more efficient for same-status organized data? CSV? How about good old-fashioned ASCII control codes? Weed out some of that overhead and reduce the whole thing to a simple array of arrays instead of complex and slow objects? NOT that JS arrays are much better on speed, thanks lack of strict typecasting! -- or even better just split each record and dump it into the local indexedDB!

    I mean if every single record in your data set has the same fields, using \0x1E between records and \0x1F between cells would be WAY more efficient than the bloated slow mess that is JSON. (or the other bloated dead-end tech, XML). If they are in categories you could use \0x01 for SOH and \0x02 for SOD, and divide multiple categories with \0x1D as GS. Easy string splits that would be way faster than most JSON parsing. (especially since it's usually a fraction the file size) -- you could even use \0x17 ETB and \0x04 EOT to break it into transmission blocks so you can process and recieve simultaneously with AJAX. In other words, real lock-step ASYNC.

    JSON and XML are 'cute' for moving between systems as a human interpretable data, but as a storage and processing medium they are so grossly inefficient it's ridiculous. They only shine at two things; human legibility and when every record contains DIFFERENT fields. If there's a lack of uniformity in the data sets, THEN they start to make SOME sense... kinda... I guess. I'm a man, and I can change... If I have to... I guess...

    Hell, for massive runs of "like data" CSV is more efficient -- and that's actually pretty pathetic if you think about it. But to be fair, I've still got that machine language attitude towards data processing. If your not storing real numbers as 80 bit floats or arbitrary precision BCD, or integers as actual words, dwords and qwords, you're probably doing something wrong. Plaintext is just so much /FAIL/ for anything other than text.

    Also, is that 1.6mb before or after server-side gzip compression? :D I'd actually be kind of interested in seeing your data set from that 'example' scenario -- it might explain a lot of your "why", and would give a means of showing you "how" the rest of us would handle that and "why" we have a problem with how you are going about things... though dimes to dollars you could probably strip away at least a third that filesize by ditching JSON -- well, unless your property names are uselessly tiny and cryptic. It's often a laugh that most people's JSON is two or three times as large as is needed for their data -- and XML is even worse.

    You would be averaging... 335 bytes per record? What are your size limits for filename and description? What's the average size of the filenames and descriptions NOT counting the json overhead and what other data is being tracked? Even just one sample record would go a long ways towards dialing in something better than how you're trying to do it because... well... just wow man.

    Unless... no, that would be nuts.. are you actually maintaining a 1.6 meg static JSON file and editing it directly as your storage medium?!? No... nobody would actually do that, would they? No, wait, someone might, why not it's thinking like that which killed XML applications before they ever had a chance...

    Just some things to consider. If you insist on client side processing, it could be that JSON and object walking might just be the wrong tool for the job. I know JSON (being for all intents and purposes the "new XML") is hot and trendy, but much like a number of file formats before it, well... it sucks for large uniform data processing.
     
    deathshadow, Dec 3, 2014 IP
  19. seductiveapps.com

    seductiveapps.com Active Member

    Messages:
    200
    Likes Received:
    6
    Best Answers:
    0
    Trophy Points:
    60
    #19
    Well, your attitude is improving. Thanks. :)

    So, SQL for photo album collections..
    (1) I dont wanna have the server seek through data that the client can search through. My aim is to spend as little on server hardware as possible.
    (2) The photo album is not going to grow to millions of images, it's grow very slowly from about 10k images to maybe 25k images in about 5 years.
    (3) The javascript that you claim is so utterly slow in seeking through this data performs near-instantly on my core-i5 for about 12k images. There's hardly a delay if I use my menu to request a "frog" background or a "scenic landscape" background.
    (3.1) why use regexp when .indexOf (substring position search) is much more efficient?
    (4) I *would* (and have) use(d) SQL for other types of datasets where JSON *would* fail (for the reasons you think my photoalbum approach is lame, btw).
    (5) There's hardly *any* overhead in JSON for the filenames of a photocollection.
    (6) The 1.6mb is before gzipping. However, gzipping (even on a core-i5, is *slower* in delivery-time-to-the-client than not gzipping). I have yet to work out how to cache gzipped content in a file that can be PHP:readfile()ed. It's not the easiest thing to do.
    (7) Ofcourse I'm not putting filenames in a JSON file by hand.. I have a PHP routine that gets a recursive and filtered directory listing, and that gets put into a JSON file and I have a menu-item that deletes JSON cache files like that so that the next page view repopulates these cache JSON db files (and btw the photoalbum JSON gets loaded these days after the rest of my seductiveapps framework has fully loaded and displayed - i *have* taken your bitching about my site's load speed to heart and editor)
     
    seductiveapps.com, Dec 3, 2014 IP
  20. PoPSiCLe

    PoPSiCLe Illustrious Member

    Messages:
    4,623
    Likes Received:
    725
    Best Answers:
    152
    Trophy Points:
    470
    #20
    1. Why? I mean, seriously, why? Servers are servers for a reason? And usually way, WAY more powerful than what most people have at home, when it comes to actually parsing data (more RAM, better CPUs).
    2. 25k images (ie, 25k rows in a database) should take about 0.1 sec to search through
    3. Near instantly on a localhost. No overhead, direct file-access... have you tested this on a remote server at all, via an actual web-connection?
    3.1 I haven't done comparisons, but that sounds decent enough
    4. SQL, or any other type of database, is exactly that - a database meant to cope with data - datasets, relations etc. Indexes is one of the benefits.
    5. Why do you want to push all the filenames - I get it if the user searches for a specific filename, but wouldn't it be better to have a decent meta-tag system instead? So that the filename is irrelevant, but the tags aren't?
    6. I'm not gonna go into this bit here. Not my strong point.
    7. I've seen online javascript-based galleries that have visual search capabilities with thumbs etc. based on whatever input the user provides. I'm pretty sure neither of those base their approach on JSON. Could you provide a couple lines of the JSON, as @deathshadow asked for?
     
    PoPSiCLe, Dec 3, 2014 IP