The following listing shows the ARM linker script used in the LegUp ARM hybrid system.
1 /* Linker Sctipt Parameters
2 Use --defsym <parameter>=<value> in the linker command to define values.
3
10 __data_memory should be specified if you wish to put the data sections 11 in a spcific memory location. i.e. use
12 --defsym __data_memory=0xC0000000 in the linker command 13 __text_lma allow user to specify the logical memory address to be
14 used for the .text section
15 __vma_offset allow user to specify a virtual memory offset from the
16 logical memory address
17 __stack_top allow user to specify the top of the stack (probably the
18 end of memory)
24 /* Bring in the interrupt routines & vector */
25 EXTERN (__cs3_reset)
37 /* These symbols are defined through linker options in Monitor Program.[
ArmA9AsmProgram.java]
38 EXTERN(arm_program_mem arm_available_mem_size) 39
40 EXTERN(__cs3_start_c main __cs3_stack __cs3_heap_end) 41
42 /* Provide fall-back values */
108
Appendix B. ARM Linker Script 109
43 PROVIDE(__cs3_heap_start = _end);
44 PROVIDE(__cs3_heap_end = __cs3_region_start_ram + __cs3_region_size_ram);
45 PROVIDE(__cs3_region_num = (__cs3_regions_end - __cs3_regions) / 20);
46 PROVIDE(__cs3_stack = __cs3_region_start_ram + __cs3_region_size_ram);
47
48 PROVIDE(__startup_lma = 0x00100000);
49 PROVIDE(__text_lma = 0x00200000);
50 PROVIDE(__vma_offset = 0x00000000);
51 __text_vma = __text_lma + __vma_offset;
52
53 PROVIDE(__stack_top = __vma_offset + 0x40000000);
54
55 INPUT(arm_startup.o) 56
57 /* __legup_init_arm is the entry point the preloader should branch to */
58 ENTRY(__legup_init_arm) 59
60 SECTIONS 61 {
62 /* The startup section should be first
63 This section has the same LMA and VMA, and will reside in the 1GB or DDR on 64 the DE1-SoC. */
65 .startup __startup_lma :
66 {
67 /* LegUp ARM startup code - setup MMU, Page Tables, Caches, etc */
68 arm_startup.o(.startup) 69 *(.startup)
70
71 /* Put the translation table here */
72 . = ALIGN(0x4000);
79 /* The .text section has an LMA in the first 1GB of the address space, and a 80 VMA in the second GB of the address space, eg. starting at 0x40000000. */
81 .text __text_vma : 82 AT(__text_lma)
83 {
84 __text_start = .;
85 /* the .text section of arm_startup should be the first thing to run after our 86 startup code */
107 __data_start_vma = DEFINED(__data_memory) ? __data_memory : . ;
108 __data_start_lma = DEFINED(__data_memory) ? __data_memory : . - __vma_offset ; 109
110 .data __data_start_vma : 111 AT(__data_start_lma) 112 ALIGN (0x10)
113 {
114 KEEP(*(.jcr)) 115 *(.got.plt) *(.got) 116 *(.shdata)
117 *(.data .data.* .gnu.linkonce.d.*) 118 . = ALIGN (8);
119 *(.ram)
120 . = ALIGN (8);
121 _edata = .;
122 }
123 .bss : ALIGN (0x10)
124 {
125 *(.shbss)
126 *(.bss .bss.* .gnu.linkonce.b.*) 127 *(COMMON)
128 . = ALIGN (8);
129 *(.ram.b .bss.ram) 130 . = ALIGN (8);
131 _end = .;
132 __end = .;
133 }
134 135 }
Bibliography
[1] Achronix Semiconductor. Introducing the Speedcore eFPGA, 2019. https://www.
achronix.com/product/speedcore/.
[2] Laksono Adhianto, Sinchan Banerjee, Mike Fagan, Mark Krentel, Gabriel Marin, John Mellor-Crummey, and Nathan R Tallent. Hpctoolkit: Tools for performance analysis of optimized parallel programs. Concurrency and Computation: Practice and Experience, 22(6):685–701, 2010.
[3] Altera, Corp. Altera SDK for OpenCL, 2014.
[4] Altera, Corp. Cyclone V SoC hard processor system, 2014.
[5] ARM. ARM Architecture Reference Manual, 2016.
[6] ARM. ARM Cortex-A9 MPCore Technical Reference Manual, 2016.
[7] ARM. ARM Cortex-A9 Technical Reference Manual, 2016.
[8] ARM. CoreLink Level 2 Cache Controller Technical Reference Manual, 2016.
[9] Fabrice Bellard. Qemu, a fast and portable dynamic translator. In USENIX Annual Technical Conference, FREENIX Track, volume 41, page 46, 2005.
[10] N. Calagar, J. Anderson, and S. Brown. Source-level debugging for FPGA high-level synthesis. In International Conference on Field-Programmable Logic and Applications, 2014.
[11] A. Canis, J. Choi, and et al. LegUp: high-level synthesis for FPGA-based processor/ac-celerator systems. In ACM/SIGDA FPGA, pages 33–36, 2011.
111
[12] Andrew Canis, Jason H Anderson, and Stephen D Brown. Multi-pumping for resource reduction in fpga high-level synthesis. In Design, Automation & Test in Europe Conference
& Exhibition (DATE), 2013, pages 194–197. IEEE, 2013.
[13] Andrew Canis, Stephen D Brown, and Jason H Anderson. Modulo sdc scheduling with recurrence minimization in high-level synthesis. In Field Programmable Logic and Appli-cations (FPL), 2014 24th International Conference on, pages 1–8. IEEE, 2014.
[14] Andrew Canis, Jongsok Choi, Blair Fort, Ruolong Lian, Qijing Huang, Nazanin Calagar, Marcel Gort, Jia Jun Qin, Mark Aldham, Tomasz Czajkowski, et al. From software to accelerators with LegUp high-level synthesis. In Compilers, Architecture and Synthesis for Embedded Systems (CASES), 2013 International Conference on, pages 1–9. IEEE, 2013.
[15] N Cha. Fatfs generic fat file system module, 2019.
[16] J. Choi, J. Anderson, and S. Brown. From software threads to parallel hardware in FPGA high-level synthesis. In IEEE International Conference on Field-Programmable Technology, pages 270–279, 2013.
[17] Jongsok Choi, Stephen D Brown, and Jason H Anderson. From pthreads to multicore hardware systems in legup high-level synthesis for fpgas. IEEE Transactions on Very Large Scale Integration (VLSI) Systems, 25(10):2867–2880, 2017.
[18] Jongsok Choi, Ruo Long Lian, Stephen Brown, and Jason Anderson. A unified software approach to specify pipeline and spatial parallelism in FPGA hardware. In Application-specific Systems, Architectures and Processors (ASAP), 2016 IEEE 27th International Conference on, pages 75–82. IEEE, 2016.
[19] Jongsok Choi, Kevin Nam, Andrew Canis, Jason Anderson, Stephen Brown, and Tomasz Czajkowski. Impact of cache architecture and interface on performance and area of FPGA-based processor/parallel-accelerator systems. In Field-Programmable Custom Computing Machines (FCCM), 2012 IEEE 20th Annual International Symposium on, pages 17–24.
IEEE, 2012.
[20] Arnaldo Carvalho De Melo. The new linux ‘perf’ tools. In Slides from Linux Kongress, volume 18, 2010.
[21] Stephane Eranian. Perfmon2: a flexible performance monitoring interface for linux. In Proc. of the 2006 Ottawa Linux Symposium, pages 269–288, 2006.
Bibliography 113 [22] Tom Feist. Vivado design suite. White Paper, 5, 2012.
[23] Blair Fort, Andrew Canis, Jongsok Choi, Nazanin Calagar, Ruolong Lian, Stefan Hadjis, Yu Ting Chen, Mathew Hall, Bain Syrowik, Tomasz Czajkowski, et al. Automating the design of processor/accelerator embedded systems with LegUp high-level synthesis. In Embedded and Ubiquitous Computing (EUC), 2014 12th IEEE International Conference on, pages 120–129. IEEE, 2014.
[24] Stefan Hadjis, Andrew Canis, Ryoya Sobue, Yuko Hara-Azumi, Hiroyuki Tomiyama, and Jason Anderson. Profiling-driven multi-cycling in fpga high-level synthesis. In Proceedings of the 2015 Design, Automation & Test in Europe Conference & Exhibition, pages 31–36.
EDA Consortium, 2015.
[25] Y. Hara, H. Tomiyama, S. Honda, and H. Takada. Proposal and quantitative analysis of the CHStone benchmark program suite for practical C-based high-level synthesis. Jour. of Information Processing, 17:242 – 254, 2009.
[26] Jens Huthmann, Bj¨orn Liebig, Julian Oppermann, and Andreas Koch. Hardware/software co-compilation with the nymble system. In Reconfigurable and Communication-Centric Systems-on-Chip (ReCoSoC), 2013 8th International Workshop on, pages 1–8. IEEE, 2013.
[27] Intel Corp. Intel HLS Compiler, 2018.
[28] J Johnston and T Fitzsimmons. The newlib homepage. URL http://sourceware. org/newlib, 2011.
[29] Vinod Kathail, James Hwang, Welson Sun, Yogesh Chobe, Tom Shui, and Jorge Carrillo.
Sdsoc: A higher-level programming environment for zynq soc and ultrascale+ mpsoc. In Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays, pages 4–4. ACM, 2016.
[30] Ana Klimovic and Jason H Anderson. Bitwidth-optimized hardware accelerators with soft-ware fallback. In Field-Programmable Technology (FPT), 2013 International Conference on, pages 136–143. IEEE, 2013.
[31] David Koeplinger et al. Automatic generation of efficient accelerators for reconfigurable hardware. In ACM/IEEE Int. Symp. on Computer Architecture (ISCA). IEEE, 2016.
[32] Rick Kufrin. Perfsuite: An accessible, open source performance analysis environment for linux. In 6th International Conference on Linux Clusters: The HPC Revolution, volume 151, page 05, 2005.
[33] I. Kuon and J. Rose. Measuring the gap between FPGAs and ASICs. IEEE Trans. On CAD, 26(2):203–215, February 2007.
[34] LegUp High-Level Synthesis. LegUp 4.0 Documentation, 2018. http://legup.eecg.
utoronto.ca/docs/4.0/index.html.
[35] John Levon, Philippe Elie, et al. Oprofile, a system-wide profiler for linux systems. 2019.
http://oprofile.sourceforge.net.
[36] Critical Link. Important Note about FPGA/HPS SDRAM Bridge, 2013.
https://support.criticallink.com/redmine/projects/mityarm-5cs/wiki/
Important_Note_about_FPGAHPS_SDRAM_Bridge.
[37] LLVM Compiler Project. LLVM Users, 2018. https://llvm.org/Users.html.
[38] Roel Meeuws, Carlo Galuzzi, and Koen Bertels. High level quantitative hardware predic-tion modeling using statistical methods. In Embedded Computer Systems (SAMOS), 2011 International Conference on, pages 140–149. IEEE, 2011.
[39] Mentor Graphics. Catapult high-level synthesis, 2016. https://www.mentor.com/
hls-lp/catapult-high-level-synthesis/.
[40] Microsemi Corporation. SmartFusion2 SoC FPGAs, 2019. https://www.microsemi.
com/product-directory/soc-fpgas/1692-smartfusion2.
[41] Sparsh Mittal and Jeffrey S Vetter. A survey of CPU-GPU heterogeneous computing techniques. ACM Computing Surveys (CSUR), 47(4):69, 2015.
[42] Philip J. Mucci, Shirley Browne, Christine Deane, and George Ho. PAPI: A portable interface to hardware performance counters. In In Proceedings of the Department of Defense HPCMP Users Group Conference, pages 7–10, 1999.
[43] Razvan Nane, Vlad-Mihai Sima, Christian Pilato, Jongsok Choi, Blair Fort, Andrew Canis, Yu Ting Chen, Hsuan Hsiao, Stephen Brown, Fabrizio Ferrandi, et al. A survey and evaluation of FPGA high-level synthesis tools. IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, 35(10):1591–1604, 2016.
Bibliography 115 [44] Razvan Nane, Vlad Mihai Sima, Cuong Pham Quoc, Fernando Goncalves, and Koen Ber-tels. High-level synthesis in the delft workbench hardware/software co-design tool-chain.
In Embedded and Ubiquitous Computing (EUC), 2014 12th IEEE International Conference on, pages 138–145. IEEE, 2014.
[45] Julian Oppermann and Andreas Koch. Detecting kernels suitable for C-based high-level hardware synthesis. In IEEE Conf. on Ubiquitous Intelligence & Computing, Advanced and Trusted Computing, Scalable Computing and Communications, Cloud and Big Data Computing, Internet of People, and Smart World Congress, pages 1157–1164. IEEE, 2016.
[46] S Arash Ostadzadeh, Roel J Meeuws, Carlo Galuzzi, and Koen Bertels. Quad–a memory access pattern analyser. In International Symposium on Applied Reconfigurable Computing, pages 269–281. Springer, 2010.
[47] Luca Piccolboni et al. Cosmos: Coordination of high-level synthesis and memory opti-mization for hardware accelerators. ACM Transactions on Embedded Computing Systems (TECS), 16(5s):150, 2017.
[48] Andrew Putnam, Adrian M Caulfield, Eric S Chung, Derek Chiou, Kypros Constantinides, John Demme, Hadi Esmaeilzadeh, Jeremy Fowers, Gopi Prashanth Gopal, Jan Gray, et al.
A reconfigurable fabric for accelerating large-scale datacenter services. ACM SIGARCH Computer Architecture News, 42(3):13–24, 2014.
[49] RotateRight. rotateright, 2019. https://www.rotateright.com/.
[50] B Carrion Schafer and Kazutoshi Wakabayashi. Machine learning predictive modelling high-level synthesis design space exploration. IET Computers & Digital Techniques, 6(3):153–159, 2012.
[51] Martin Schulz, Jim Galarowicz, Don Maghrak, William Hachfeld, David Montoya, and Scott Cranford. Open— speedshop: An open source infrastructure for parallel performance analysis. Scientific Programming, 16(2-3):105–121, 2008.
[52] Yakun Sophia Shao et al. Aladdin: A pre-rtl, power-performance accelerator simulator enabling large design space exploration of customized architectures. In ACM SIGARCH Computer Architecture News, volume 42, pages 97–108. IEEE, 2014.
[53] Sameer S Shende and Allen D Malony. The TAU parallel performance system. The International Journal of High Performance Computing Applications, 20(2):287–311, 2006.
[54] Bain Syrowik, Blair Fort, and Stephen Brown. Use of CPU performance counters for accel-erator selection in HLS-generated CPU-accelaccel-erator systems. In International Symposium on Highly-Efficient Accelerators and Reconfigurable Technology, 2018.
[55] Chris Tofallis. Least squares percentage regression. Journal of Modern Applied Statistical Methods, 2009.
[56] Kuen Hung Tsoi and Wayne Luk. Axel: a heterogeneous cluster with fpgas and gpus.
In Proceedings of the 18th annual ACM/SIGDA international symposium on Field pro-grammable gate arrays, pages 115–124. ACM, 2010.
[57] University of Cambridge. The Tiger ”MIPS” processor., 2010. http://www.cl.cam.ac.
uk/teaching/0910/ECAD+Arch/mips.html.
[58] University of Tennessee. papiex, 2019. http://icl.cs.utk.edu/˜mucci/papiex/. [59] Markus Vogt, Gerald Hempel, Jer´onimo Castrill´on, and Christian Hochberger.
GCC-plugin for automated accelerator generation and integration on hybrid FPGA-SoCs. CoRR, abs/1509.00025, 2015.
[60] Wikipedia contributors. Compiler — Wikipedia, the free encyclopedia, 2018. [Online;
accessed 22-August-2018].
[61] Loring Wirbel. Xilinx SDAccel: a unified development environment for tomorrow’s data center. Technical report, Technical Report, The Linley Group Inc, 2014.
[62] Jieru Zhao et al. Comba: A comprehensive model-based analysis framework for high level synthesis of real applications. In IEEE/ACM Int. Conf. on Computer-Aided Design (ICCAD), pages 430–437. IEEE, 2017.
[63] Guanwen Zhong, Alok Prakash, Yun Liang, Tulika Mitra, and Smail Niar. Lin-analyzer: a high-level performance analysis tool for FPGA-based accelerators. In ACM/EDAC/IEEE Design Automation Conf. (DAC), page 136. ACM, 2016.