draft.aux

\relax 
\citation{HastieBook:SL}
\citation{white2012hadoop}
\citation{zaharia2010spark}
\citation{borthakur2008hdfs}
\citation{dean2008mapreduce}
\citation{zhang2004solving}
\citation{peng2012sublinear}
\citation{white2012hadoop}
\citation{dean2008mapreduce}
\citation{zaharia2010spark}
\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}}
\newlabel{sec:int}{{I}{1}}
\@writefile{toc}{\contentsline {section}{\numberline {II}Related Work}{1}}
\newlabel{sec:rew}{{II}{1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-A}}Computing Platforms}{1}}
\newlabel{sec:platform}{{\unhbox \voidb@x \hbox {II-A}}{1}}
\citation{mahoutscalable}
\citation{clarkson2010sublinear}
\citation{hazanbeating}
\citation{cotter2012kernelized}
\citation{hazan2011optimal}
\citation{garberapproximating}
\citation{peng2012sublinear}
\citation{chang2011psvm}
\citation{Liu:2011:PPL:1961189.1961198}
\citation{Chen5444877}
\citation{Li:2008:PPF:1454008.1454027}
\citation{chang2011foundations}
\citation{Bekkerman2012}
\citation{peng2012sublinear}
\citation{peng2012sublinear}
\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Platform Comparison: Hadoop vs. Spark}}{2}}
\newlabel{tab:systemcmp}{{I}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-B}}Sublinear Methods}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-C}}Other Related Work}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {III}Logistic Regression Model and Sequential Sublinear Algorithm}{2}}
\newlabel{sec:plr}{{III}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-A}}Logistic Regression Model}{2}}
\newlabel{sec:def}{{\unhbox \voidb@x \hbox {III-A}}{2}}
\newlabel{eqn:1}{{1}{2}}
\newlabel{eqn:2}{{2}{2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-B}}Sequential Sublinear Algorithm}{2}}
\newlabel{alg:1}{{\unhbox \voidb@x \hbox {III-B}}{2}}
\@writefile{toc}{\contentsline {section}{\numberline {IV}Parallel Sublinear Logistic Regression}{3}}
\newlabel{sec:framework}{{IV}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}Parallel Sublinear Algorithms on Hadoop}{3}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Parallel implementation flow chart for PSUBPLR-MR}}{3}}
\newlabel{fig:frame}{{1}{3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-B}}Parallel Sublinear algorithms on Spark}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-C}}Parallel Gradient Descent in Spark}{4}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-D}}Online Stochastic Gradient Descent in Mahout}{4}}
\citation{sarwar2001item}
\citation{guyon2004result}
\citation{DelanyKBS05}
\citation{ma2009identifying}
\citation{fan2008liblinear}
\citation{chang2011foundations}
\citation{Bekkerman2012}
\@writefile{toc}{\contentsline {section}{\numberline {V}Experimental Setup}{5}}
\newlabel{sec:setup}{{V}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-A}}Dataset Information}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-B}}Testing Environment}{5}}
\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Cluster Information}}{5}}
\newlabel{tab:table2}{{III}{5}}
\@writefile{toc}{\contentsline {section}{\numberline {VI}Experimental Results}{5}}
\newlabel{sec:experiment}{{VI}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-A}}Results on Precision}{5}}
\newlabel{sec:precision}{{\unhbox \voidb@x \hbox {VI-A}}{5}}
\@writefile{lot}{\contentsline {table}{\numberline {IV}{\ignorespaces Accuracy Results. The meanings of abbreviations are as follows: 20-N-G, 20 News Group; URL-R, URL-Reputation.}}{5}}
\newlabel{tab:table3}{{IV}{5}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-B}}Results on Running Time}{5}}
\newlabel{sec:time}{{\unhbox \voidb@x \hbox {VI-B}}{5}}
\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Datasets}}{6}}
\newlabel{tab:table1}{{II}{6}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Test error, as a function of iteration number.}}{6}}
\newlabel{fig:accuracy}{{2}{6}}
\@writefile{lot}{\contentsline {table}{\numberline {V}{\ignorespaces Running Time. The meanings of abbreviations are as follows: 20-N-G, 20 News Group; URL-R, URL-Reputation.}}{6}}
\newlabel{tab:table4}{{V}{6}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Running time.}}{6}}
\newlabel{fig:08}{{3}{6}}
\citation{chang2011psvm}
\bibstyle{plain}
\bibdata{mlpaper}
\bibcite{Bekkerman2012}{1}
\bibcite{borthakur2008hdfs}{2}
\bibcite{chang2011foundations}{3}
\bibcite{chang2011psvm}{4}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-C}}Results on Cluster Size}{7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {VI-D}}Fault Tolerance}{7}}
\@writefile{lot}{\contentsline {table}{\numberline {VI}{\ignorespaces Fault Tolerance Analysis}}{7}}
\newlabel{tab:table5}{{VI}{7}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Iteration time, as a function of percentage of failed maps, on \textbf  {URL-Reputation} Dataset, run on 6 nodes}}{7}}
\newlabel{fig:14}{{5}{7}}
\@writefile{toc}{\contentsline {section}{\numberline {VII}Conclusion}{7}}
\newlabel{sec:concl}{{VII}{7}}
\@writefile{toc}{\contentsline {section}{References}{7}}
\bibcite{Chen5444877}{5}
\bibcite{clarkson2010sublinear}{6}
\bibcite{cotter2012kernelized}{7}
\bibcite{dean2008mapreduce}{8}
\bibcite{DelanyKBS05}{9}
\bibcite{fan2008liblinear}{10}
\bibcite{garberapproximating}{11}
\bibcite{guyon2004result}{12}
\bibcite{HastieBook:SL}{13}
\bibcite{hazan2011optimal}{14}
\bibcite{hazanbeating}{15}
\bibcite{Li:2008:PPF:1454008.1454027}{16}
\bibcite{Liu:2011:PPL:1961189.1961198}{17}
\bibcite{ma2009identifying}{18}
\bibcite{mahoutscalable}{19}
\bibcite{peng2012sublinear}{20}
\bibcite{sarwar2001item}{21}
\bibcite{white2012hadoop}{22}
\bibcite{zaharia2010spark}{23}
\bibcite{zhang2004solving}{24}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Running time, as a function of used node number.}}{8}}
\newlabel{fig:time}{{4}{8}}