diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..311b729 --- /dev/null +++ b/.gitignore @@ -0,0 +1,25 @@ +/2022-03-replication/Makefile +/2022-03-replication/_minted-replication/ +/2022-03-replication/a.csv +/2022-03-replication/all.csv +/2022-03-replication/article.aux +/2022-03-replication/article.log +/2022-03-replication/article.pdf +/2022-03-replication/article.tex +/2022-03-replication/b.csv +/2022-03-replication/c.csv +/2022-03-replication/comb.py +/2022-03-replication/gen.py +/2022-03-replication/graph.png +/2022-03-replication/graph.py +/2022-03-replication/maketable.py +/2022-03-replication/org/.eg.org.~undo-tree~ +/2022-03-replication/org/*.bbl +/2022-03-replication/org/eg.tex +/2022-03-replication/org/obipy-resources/ +/2022-03-replication/replication.bbl +/2022-03-replication/replication.tex +/2022-03-replication/result.tex +/2022-03-replication/rs/.Rhistory +/2022-03-replication/org/.ob-jupyter/ +/2022-03-replication/test/ diff --git a/2022-03-replication/org/eg.org b/2022-03-replication/org/eg.org index e2cabf1..1e2f9ce 100644 --- a/2022-03-replication/org/eg.org +++ b/2022-03-replication/org/eg.org @@ -1,7 +1,7 @@ #+title: Example research paper written with org mode #+author: James Brusey #+options: toc:nil -#+property: header-args:ipython :session eg-sess :results output raw drawer :exports results +#+property: header-args:jupyter-python :session eg-sess :results output raw drawer :exports results #+latex_class: IEEEtran #+latex_header: \input{header} #+begin_abstract @@ -15,7 +15,7 @@ The introduction should not just repeat the abstract but should properly introdu Towards the end of the introduction, there should be a statement about what the contribution of the paper. In computer science these days, it is common to use a bullet pointed list with forward references to sections where that contribution can be found. * Related work -When it comes to citing references, org-ref is pretty cool. For example, I can cite a work [[cite:&kitchinExamplesEffectiveData2015]], and this will then show up in the references [[cite:&wilkinsModellingUncontrolledSolar2018]]. +When it comes to citing references, org-ref is pretty cool. For example, I can cite a work [[cite:&kitchinExamplesEffectiveData2015]], and this will then show up in the references [[cite:&wilkinsModellingUncontrolledSolar2018]]. * Methods and materials In the method section, you might have a diagram. @@ -37,7 +37,17 @@ While you can manually enter tables in org-mode, such as in autoref:table1. A similar table can be produced using code, as in autoref:table2. -#+BEGIN_SRC ipython +#+begin_src shell :results raw drawer +echo "hello" +#+end_src + +#+RESULTS: +:results: +hello +:end: + + +#+BEGIN_SRC jupyter-python :results raw drawer import pandas as pd import numpy as np import tabulate @@ -54,18 +64,17 @@ print( #+END_SRC #+RESULTS: -:results: +: +: #+caption: This table was produced from the all.csv data-set. label:table2 +: #+attr_latex: :align lSS +: | Class | \protect{mean} | \protect{std} | +: |---------+------------------+-----------------| +: | a | 4.3 | 3.16394 | +: | b | -0.114247 | 1.00812 | +: | c | 4.72661 | 3.85745 | -#+caption: This table was produced from the all.csv data-set. label:table2 -#+attr_latex: :align lSS -| Class | \protect{mean} | \protect{std} | -|---------+------------------+-----------------| -| a | 4.3 | 3.16394 | -| b | -0.114247 | 1.00812 | -| c | 4.72661 | 3.85745 | -:end: -#+BEGIN_SRC ipython +#+BEGIN_SRC jupyter-python %matplotlib inline import pandas as pd import numpy as np @@ -74,18 +83,10 @@ df = pd.read_csv("../all.csv") plt.figure() plt.plot(df['a'], df['b'], '.') plt.show() - #+END_SRC #+RESULTS: -:results: -# Out [2]: -# text/plain -:
- -# image/png -[[file:obipy-resources/2abc833ac46639e8c3c3db1b162ed52c45494cc7/2b10c27d7fec897b3ebf8364b6da6e8b674ac4e0.png]] -:end: +[[file:./.ob-jupyter/2b10c27d7fec897b3ebf8364b6da6e8b674ac4e0.png]] * Conclusions @@ -95,7 +96,7 @@ bibliography:~/Documents/zotero-export.bib bibliographystyle:plain * Needed for adding IEEEtran class :noexport: -#+BEGIN_SRC emacs-lisp +#+BEGIN_SRC emacs-lisp :results silent (add-to-list 'org-latex-classes '("IEEEtran" "\\documentclass\[10pt\]\{IEEEtran\}" @@ -103,18 +104,8 @@ bibliographystyle:plain ("\\subsection\{%s\}" . "\\subsection*\{%s\}") ("\\subsubsection\{%s\}" . "\\subsubsection*\{%s\}") ("\\paragraph\{%s\}" . "\\paragraph*\{%s\}") - )) + )) #+END_SRC -#+RESULTS: -| IEEEtran | \documentclass[10pt]{IEEEtran} | (\section{%s} . \section*{%s}) | (\subsection{%s} . \subsection*{%s}) | (\subsubsection{%s} . \subsubsection*{%s}) | (\paragraph{%s} . \paragraph*{%s}) | | -| scrartcl | \documentclass[10pt]{scrartcl} | (\section{%s} . \section*{%s}) | (\subsection{%s} . \subsection*{%s}) | (\subsubsection{%s} . \subsubsection*{%s}) | (\paragraph{%s} . \paragraph*{%s}) | | -| scrreprt | \documentclass[10pt,DIV=11]{scrreprt} | (\chapter{%s} . \chapter*{%s}) | (\section{%s} . \section*{%s}) | (\subsection{%s} . \subsection*{%s}) | (\subsubsection{%s} . \subsubsection*{%s}) | (\paragraph{%s} . \paragraph*{%s}) | -| beamer | \documentclass[presentation]{beamer} | (\section{%s} . \section*{%s}) | (\subsection{%s} . \subsection*{%s}) | (\subsubsection{%s} . \subsubsection*{%s}) | | | -| default-koma-letter | \documentclass[11pt]{scrlttr2} | | | | | | -| article | \documentclass[11pt]{article} | (\section{%s} . \section*{%s}) | (\subsection{%s} . \subsection*{%s}) | (\subsubsection{%s} . \subsubsection*{%s}) | (\paragraph{%s} . \paragraph*{%s}) | (\subparagraph{%s} . \subparagraph*{%s}) | -| report | \documentclass[11pt]{report} | (\part{%s} . \part*{%s}) | (\chapter{%s} . \chapter*{%s}) | (\section{%s} . \section*{%s}) | (\subsection{%s} . \subsection*{%s}) | (\subsubsection{%s} . \subsubsection*{%s}) | -| book | \documentclass[11pt]{book} | (\part{%s} . \part*{%s}) | (\chapter{%s} . \chapter*{%s}) | (\section{%s} . \section*{%s}) | (\subsection{%s} . \subsection*{%s}) | (\subsubsection{%s} . \subsubsection*{%s}) | - [[elisp:(org-latex-export-to-pdf)]] diff --git a/2022-03-replication/org/eg.pdf b/2022-03-replication/org/eg.pdf new file mode 100644 index 0000000..463b7be Binary files /dev/null and b/2022-03-replication/org/eg.pdf differ diff --git a/2022-03-replication/replication.org b/2022-03-replication/replication.org index 6bddf31..bfceaf0 100644 --- a/2022-03-replication/replication.org +++ b/2022-03-replication/replication.org @@ -18,7 +18,7 @@ + There are few incentives to publish *better* papers + Penalties for negligence, bias, and fraud are minor even if offenders are found out -#+attr_latex: :height 3cm +#+attr_latex: :height 3cm [[file:science-fictions-book.jpg]] * Our institutions are implicated @@ -42,7 +42,7 @@ These ideas are focused on the /analysis/ rather than the experimental work itse - but why were they using Excel? + An analysis of genomics research shows that many studies have fallen prey to MARCH1 gene being altered by autocorrect in Excel + There are many reasons why you should not use Word but the number one reason is that it will stop you from automating parts of your research---you will tend to be relying on cutting and pasting in figures and tables rather than auto-generating them. /Convert away before it is too late./ - + * Use the command line and GNU Make + Analysis ends up having several steps - combining multiple data-sets into one @@ -51,7 +51,7 @@ These ideas are focused on the /analysis/ rather than the experimental work itse - summarising data to produce a table - producing a graph -* Method for using Make +* Method for using Make + Each step should be performed with a command or script (e.g., gnuplot) + Form multiple steps into a pipeline with GNU Make + Alongside much on-line sources, also see Data Science at the Command Line https://datascienceatthecommandline.com/ @@ -61,7 +61,7 @@ These ideas are focused on the /analysis/ rather than the experimental work itse * Example---generating data For example, say we have a script to generate some data =a.csv=, =b.csv=, =c.csv= called =gen.py= -#+BEGIN_SRC ipython :exports code +#+BEGIN_SRC python :tangle gen.py import pandas as pd import numpy as np @@ -75,12 +75,9 @@ df = pd.DataFrame(np.random.normal(5, 3, size=SZ), columns=["value"]) df.to_csv("c.csv", index=False) #+END_SRC -#+RESULTS: -:results: -:end: * Example---combine data We might then have another script =comb.py= to combine them. -#+BEGIN_SRC ipython +#+BEGIN_SRC python :tangle comb.py import pandas as pd import numpy as np @@ -97,7 +94,7 @@ df.to_csv("all.csv", index=False) :end: * Example---table We can produce a table using python tabulate in a script called =maketable.py= -#+BEGIN_SRC ipython +#+BEGIN_SRC python :tangle maketable.py import pandas as pd import numpy as np import tabulate @@ -112,13 +109,10 @@ with open("result.tex", "w") as f: ) #+END_SRC -#+RESULTS: -:results: -:end: * Example---graph Finally, we might use =graph.py= to plot =a= versus =b= (ok, this is not a very meaningful graph!) -#+BEGIN_SRC ipython +#+BEGIN_SRC python :tangle graph.py import pandas as pd import numpy as np import matplotlib.pyplot as plt @@ -127,15 +121,12 @@ df.plot(x="a", y="b", kind="scatter") plt.savefig("graph.png") #+END_SRC -#+RESULTS: -:results: -:end: #+attr_latex: :height 3cm [[file:graph.png]] * Example---LaTeX doc Naturally, we need a LaTeX document: -#+BEGIN_SRC latex :exports code :file article.tex +#+BEGIN_SRC latex :exports code :tangle article.tex \documentclass{article} \usepackage{siunitx} \usepackage{graphicx} @@ -154,36 +145,33 @@ Blah blah blah. \end{document} #+END_SRC -#+RESULTS: -#+begin_export latex -#+end_export * Example---Makefile Finally, we tie everything together with a =Makefile= -#+BEGIN_SRC makefile +#+BEGIN_SRC makefile :tangle Makefile article.pdf: article.tex result.tex graph.png - pdflatex article.tex + pdflatex article.tex graph.png: all.csv graph.py - python graph.py + python graph.py result.tex: all.csv maketable.py - python maketable.py + python maketable.py all.csv: a.csv b.csv c.csv comb.py - python comb.py + python comb.py a.csv: gen.py - python gen.py + python gen.py #+END_SRC - + * Using RStudio + RStudio allows you to put all the steps into a notebook form + The result can be exported to a LaTeX document + Best for R but difficult to format for a paper + A great resource for R and the tidyverse is R for Data Science https://r4ds.had.co.nz/ + You can also use Pandoc separately from RStudio -* RStudio example :noexport: +* RStudio example :noexport: [[file:rs/rs-eg.Rmd]] * RStudio example #+include: rs/rs-eg.Rmd src markdown @@ -209,7 +197,7 @@ a.csv: gen.py ("\\subsection\{%s\}" . "\\subsection*\{%s\}") ("\\subsubsection\{%s\}" . "\\subsubsection*\{%s\}") ("\\paragraph\{%s\}" . "\\paragraph*\{%s\}") - )) + )) #+END_SRC * Formatting numbers @@ -221,7 +209,9 @@ a.csv: gen.py 2. John Kitchin has a nice article on embedding data into PDFs. [[cite:&kitchinExamplesEffectiveData2015]] 3. He also has a youtube describing org mode for research https://youtu.be/1-dUkyn_fZA -bibliographystyle:plainnat + + +bibliographystyle:plainnat bibliography:~/Documents/zotero-export.bib * build :noexport: [[elisp:(org-beamer-export-to-pdf)]] diff --git a/2022-03-replication/replication.pdf b/2022-03-replication/replication.pdf new file mode 100644 index 0000000..a471389 Binary files /dev/null and b/2022-03-replication/replication.pdf differ diff --git a/2022-03-replication/rs/rs-eg.pdf b/2022-03-replication/rs/rs-eg.pdf new file mode 100644 index 0000000..f71df23 Binary files /dev/null and b/2022-03-replication/rs/rs-eg.pdf differ